In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from random import sample

import tensorflow as tf

import keras
from keras.models import Model
from keras.layers import Input, Reshape, Dot, Concatenate, Dense, Dropout, Lambda, Activation, Add
from keras.layers.embeddings import Embedding
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import l2

In [3]:
print(tf.__version__)

2.6.0


In [None]:
pip from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
PATH = '/content/drive/MyDrive/Colab Notebooks/one-m-capstone/data/'

In [None]:
ratings = pd.read_csv(PATH+'processed_ratings.csv').drop(columns=['Unnamed: 0'])
new_ratings = pd.read_csv(PATH+'new_ratings.csv')

In [None]:
ratings.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [None]:
new_ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,7000,2858,4
1,7000,260,4
2,7000,1196,4
3,7000,16,4
4,7000,24,4


In [None]:
ratings = ratings[['user_id','movie_id','rating']]
new_ratings = new_ratings[['user_id','movie_id','rating']]

In [None]:
df = ratings
#df = pd.concat([ratings,new_ratings],ignore_index=True)

In [None]:
user_enc = LabelEncoder()
df['user'] = user_enc.fit_transform(df['user_id'].values)
n_users = df['user'].nunique()

item_enc = LabelEncoder()
df['movie'] = item_enc.fit_transform(df['movie_id'].values)
n_movies = df['movie'].nunique()

df['rating'] = df['rating'].values.astype(np.float32)  # make it more workable with keras

X = df[['user', 'movie']].values
y = df["rating"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

class EmbeddingLayer:
    def __init__(self, n_items, n_factors, name=None):
        self.n_items = n_items
        self.n_factors = n_factors
        self.name = name
    
    def __call__(self, x):
      if self.name:
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                    embeddings_regularizer=l2(1e-6), name=self.name)(x)
      else:
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                    embeddings_regularizer=l2(1e-6))(x)

      x = Reshape((self.n_factors,))(x)
      return x
      
f_dim = 10 # this is the n-dimension of our embedding layers it is a hyper param

def Recommender(n_users, n_movies, n_factors):
    
    user = Input(shape=(1,), name='user_input')
    u = Embedding(n_users, n_factors, embeddings_initializer='he_normal',
                  embeddings_regularizer=l2(1e-6))(user)
    u = Reshape((n_factors,))(u)
    
    movie = Input(shape=(1,), name='movie_input')
    m = Embedding(n_movies, n_factors, embeddings_initializer='he_normal',
                  embeddings_regularizer=l2(1e-6))(movie)
    m = Reshape((n_factors,))(m)
    
    x = Dot(axes=1)([u, m])    
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)    

    return model

model = Recommender(n_users, n_movies, f_dim)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         [(None, 1)]          0                                            
__________________________________________________________________________________________________
movie_input (InputLayer)        [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1, 10)        60410       user_input[0][0]                 
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 1, 10)        38830       movie_input[0][0]                
____________________________________________________________________________________________

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
early_stop = EarlyStopping(patience=2)
history = model.fit(x=X_train_array, y=y_train,
    batch_size=128,
    epochs=5,
    verbose=1,
    validation_data=(X_test_array,y_test),
    callbacks=[early_stop]
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
new_ratings['movie_id'].values

array([2858,  260, 1196,   16,   24,   13,   25, 1300,    1, 1097, 1210,
         55,    4,   18,   26,   29, 3114, 2628, 2571,   30,   35,   10,
        193,  171,  313])

In [None]:
model.get_layer('user_input').input

<KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'user_input')>

In [None]:
model_2 = Model(inputs=model.get_layer('user_input').input, outputs=model.get_layer('embedding_2').output) 
usrc = new_ratings['movie_id'].values
user_movie_embeddings = model_2.predict(x = usrc) # usrc = array of movie id liked by user
ser_movie_embeddings = user_movie_embeddings.reshape(len(usrc),10) 
user_movie_bias = np.array([5 for temp in range(len(usrc))])
user_embedding, residuals, rank, s = np.linalg.lstsq(user_movie_embeddings,user_movie_bias, rcond=-1) # Get embedding for new user
user_embedding = user_embedding.reshape(1, 10) # User embedding based on choices of user
user_embedding

LinAlgError: ignored

In [None]:
tf.keras.models.save_model(model, '/content/drive/MyDrive/Colab Notebooks/one-m-capstone/model.keras')

In [None]:
import pickle

In [None]:
pickle.dump(user_enc, open('/content/drive/MyDrive/Colab Notebooks/one-m-capstone/user_enc.p', 'wb'))
pickle.dump(item_enc, open('/content/drive/MyDrive/Colab Notebooks/one-m-capstone/item_enc.p', 'wb'))

In [None]:
def Recommender(n_users, n_movies, n_factors):
    
    user = Input(shape=(1,), name='user_input')
    u = Embedding(n_users, n_factors, embeddings_initializer='he_normal',
                  embeddings_regularizer=l2(1e-6))(user)
    u = Flatten()(u)
    
    movie = Input(shape=(1,), name='movie_input')
    m = Embedding(n_movies, n_factors, embeddings_initializer='he_normal',
                  embeddings_regularizer=l2(1e-6))(movie)
    m = Flatten(())(m)
    
    x = Concatenate()([u_embedding, m_embedding])
    model = Model(inputs=[user, movie], outputs=x)
    opt = Adam(lr=0.001)
    model.compile(loss='mean_squared_error', optimizer=opt)    

    return model

model = Recommender(n_users, n_movies, f_dim)
model.summary()

NameError: ignored