# CNN with Pairwise encoding for recommendation

The goal of this notebook is to study whether the algorithm proposed in *CosRec: 2D Convolutional Neural Networks for Sequential Recommendation* (Yan et al) can be efficiently applied to sequential recommendation.

In [None]:
from keras.layers import Dense, Activation, Input, MaxPooling2D, Embedding, Conv2D, Reshape, Permute, Dropout, RepeatVector, Concatenate, BatchNormalization, AveragePooling2D
from keras.models import load_model, Model, Sequential
from keras.utils import Sequence
from keras import backend as K
import tensorflow as tf

import matplotlib.pyplot as plt
import csv
import math
import numpy as np

In [None]:
from google.colab import drive # Il faut pouvoir lire les fichiers CSV du Drive
drive.mount('/content/drive')

In [None]:
# Nombre de valeurs dans le dictionnaire + la valeur vide
d = 14370 + 1 # 1 catégories supplémentaires : une <EOS>
# Taille de l'input
Tx = 64
# Taille de l'output
Ty = 16
# Dimension de l'embedding
n_e = 64
# Mini-batch size
m = 256

# Paramètre du premier CONV2D
n_k_1 = 2
n_k_2 = 3
n_k_3 = 1
n_k_4 = 3

n_f_1 = 1.75 * n_e
n_f_2 = 1.5 * n_e
n_f_3 = 1.25 * n_e
n_f_4 = n_e

n_ap_1 = (4, 2) # on réduit le nombre de lignes lors des average poolings
n_ap_2 = (8, 1) # on réduit le nombre de lignes lors des average poolings

# Paramêtres du FC network
n_fc = 128


In [None]:
# def CNNmodel():
#   inpt = Input(shape=(Tx,))
#   # Embedding
#   embedding = Embedding(input_dim=d , output_dim=n_e, input_length=Tx)
#   # First layer, we embedd the input sequence to get a (Tx , n_e) picture
#   x = embedding(inpt)

#   x = Reshape(target_shape=(Tx, 1, n_e))(x)

#   x = Concatenate(axis = 2)([x] * Tx)

# # x = RepeatVector(Tx)(x) # On est censé obtenir ensuite du (Tx, Tx, n_e)
# # Dans cette nouvelle forme, quand on représente x comme un carré Tx x Tx de profondeur n_e, chaque colonne du carré représente un même produit
#   # xprime = K.transpose(x)
#   # xprime = K.reverse(xprime, axes = 1) # rotation de 90 degrés dans le sens des aiguilles d'une montre par transposition puis symétrie
#   # il ne reste plus maintenant qu'à concaténer x et xprime selon la profondeur (3e dimension)

#   # xprime = Permute((1,2), input_shape=(Tx, Tx, n_e))(x)
#   xprime = K.permute_dimensions(x, (0, 2, 1, 3))

#   x = Concatenate(axis = 3)([x, xprime]) # en théorie ici on a précisément ce qu'on veut, et x est de dimension (T_x, T_x, 2 * n_e)
# # De plus, quand on regarde x de face avec 2*n_e en profondeur, la colonne détermine les n_e premières composantes (le premier produit en venant de nous), et la ligne détermine les n_e suivantes

#   print(tf.shape(x))
#     # A Conv2D network
#   x = Conv2D(filters=n_f_1 , kernel_size= n_k_1, padding = "valid", strides = n_k_1, activation="relu")(x)

#   x = Conv2D(filters=n_f_2 , kernel_size= n_k_2, padding = "same", activation="relu")(x)

#   x = BatchNormalization(axis = (1, 2))(x)
#   x = AveragePooling2D(pool_size=n_ap_1, padding = "valid", strides = n_ap_1)(x)
#   x = Conv2D(filters=n_f_3 , kernel_size= n_k_3, padding = "same", activation="relu")(x)

#   x = Conv2D(filters=n_f_4 , kernel_size= n_k_4, padding = "same", activation="relu")(x)

#   x = BatchNormalization(axis = (1, 2))(x)
#   x = AveragePooling2D(pool_size=n_ap_2, padding = "valid", strides = n_ap_2)(x)


# # Si mes calculs sont bons on doit arriver à (1, Ty, n_e) ici
#   x = Reshape(target_shape=(Ty, n_e))(x)
#   # A FC network
#   x = Dense(n_fc, activation="relu")(x)
#   x = Dropout(0.2)(x)
#   x = Dense(d , activation="softmax")(x)

#   model = Model(inputs = inpt , outputs = x)
#   return model

In [None]:
# model = CNNmodel()
# model.compile(loss="sparse_categorical_crossentropy",optimizer="adam", metrics="categorical_accuracy")
model.summary()

In [None]:
# model.save("PairwiseEncodingCNN_save/")

In [None]:
folder = "/content/drive/MyDrive/PSC Recommandation séquentielle/Données/DataTables/"

class DataGenerator(Sequence):
  def __init__(self , nb_lines, X_path, Y_path):
    self.X_path = X_path
    self.X_reader = csv.reader(open(folder + X_path , "r"))
    self.Y_path = Y_path
    self.Y_reader = csv.reader(open(folder + Y_path , "r"))
    self.nb_lines = nb_lines

  def __len__(self):
    return math.ceil(self.nb_lines/m)

  def __getitem__(self, idx):
    X1 = []
    Y = []
    for i in range(m):
      x,y = self.getNextSample()
      x = [int(i) for i in x]
      y = [[int(i)] for i in y]
      X1.append(x)
      Y.append(y)
    X1 = np.array(X1)
    return np.array(X1) , np.array(Y)

  def getNextSample(self):
    x = next(self.X_reader , None)
    y = next(self.Y_reader , None)
    if x is None:
      self.X_reader = csv.reader(open(folder + self.X_path , "r"))
      self.Y_reader = csv.reader(open(folder + self.Y_path , "r"))
      x = next(self.X_reader , None)
      y = next(self.Y_reader , None)
    return x , y

In [None]:
model = load_model("/content/drive/MyDrive/PSC Recommandation séquentielle/Modèles/CNN/CNN Pairwise Encoding/PairwiseEncodingCNN_save/")

In [None]:
# train_gen = DataGenerator(3705954
#                           , "./s2_X_train.csv"
#                           ,"./s2_Y_train.csv")
# model.fit(train_gen , epochs=1, verbose = 1)

# model.save("PairwiseEncodingCNN_save/")