In [48]:

import torch
import numpy as np
import pickle
import pandas as pd
from sklearn.preprocessing import OneHotEncoder


# import the datasets
user_movies=pd.read_csv("user_movies.csv")
rating_history_norm=pd.read_csv("rating_history_norm.csv",index_col=0)
with open('one_hot_encoder_sec.pkl', 'rb') as f:
    active_ohe = pickle.load(f)


# Factorisation machines class
class FactorizationMachine(torch.nn.Module):
    def __init__(self, n, k, bias=False):
        super(FactorizationMachine, self).__init__()
        self.n = n
        self.k = k
        self.linear = torch.nn.Linear(self.n, 1, bias)
        self.V = torch.nn.Parameter(torch.randn(n, k))  
    def forward(self, x_batch):
        part_1 = torch.sum(torch.mm(x_batch, self.V).pow(2))
        part_2 = torch.sum(torch.mm(x_batch.pow(2), self.V.pow(2)))
        inter_term = 0.5 * (part_1 - part_2)
        var_strength = self.linear(x_batch)
        return var_strength + inter_term
    

# initialize the model
model=FactorizationMachine(n=29721, k=20)
model.load_state_dict(torch.load('model_cola.pt'))


# define the recommendation function
def recommend_movie(USER_ID,NUMBER_RECO):

    last_movie_seen=user_movies[user_movies.userId==USER_ID].iloc[-1]['imdbId']
    user_movies.loc[:,"last_seen"]=last_movie_seen

    #form user_can_rate dataframe
    user_rated = user_movies[user_movies['userId']==USER_ID]
    user_can_rate = user_movies[ ~user_movies.imdbId.isin (user_rated["imdbId"])]
    user_can_rate.loc[:,"userId"]=USER_ID
    user_can_rate= user_can_rate.drop_duplicates().reset_index(drop=True)

    # form the features datafram
    cat_cols__ = user_can_rate.drop(columns=['userId', 'imdbId', 'last_seen'])
    agg_history__ = user_can_rate[['userId']].merge(rating_history_norm, left_on='userId', right_index=True) 
    active_groups__ = active_ohe.transform(user_can_rate[['userId','imdbId','last_seen']]) 
    features = np.hstack((active_groups__, agg_history__.iloc[:,1:], cat_cols__))

    # predcit using the model
    features_tensor = torch.tensor(features, dtype=torch.float32)
    with torch.no_grad():
        model.eval()
        y = model(features_tensor)

    # form a sorted list of the top n movies
    ratingss=y.numpy().round(2).reshape(-1,1)
    movies=user_can_rate['imdbId'].values.reshape(-1,1)
    result = np.concatenate((ratingss, movies), axis=1)

    return result[np.argsort(result[:, 0][::-1])][:NUMBER_RECO,1].astype(int)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [50]:
recommend_movie(359,10)

array([ 386588,  422618, 1082588, 1714208, 1055366,  166110,   32553,
        259446,   97981,  454824])

In [6]:
# array([ 758752,   55257,   91064,   97493, 1545660])

In [54]:
import numpy as np
import pickle
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# import the datasets
user_movies = pd.read_csv("user_movies.csv")
rating_history_norm = pd.read_csv("rating_history_norm.csv", index_col=0)
with open('one_hot_encoder_sec.pkl', 'rb') as f:
    active_ohe = pickle.load(f)


class FactorizationMachine:
    def __init__(self, n, k):
        self.n = n
        self.k = k
        self.linear_weights = np.random.randn(n, 1)
        self.V = np.random.randn(n, k)  # Factor matrix

    def forward(self, x_batch):
        part_1 = np.sum(np.dot(x_batch, self.V) ** 2, axis=1, keepdims=True)
        part_2 = np.sum(np.dot(x_batch ** 2, self.V ** 2), axis=1, keepdims=True)
        inter_term = 0.5 * (part_1 - part_2)
        var_strength = np.dot(x_batch, self.linear_weights.T)
        return var_strength + inter_term
        return var_strength 

    def load_state_dict(self, state_dict):
        self.linear_weights = np.array(state_dict['linear_weights'])
        self.V = np.array(state_dict['V'])


# Load the model parameters from a file
def load_model(filepath):
    with open(filepath, 'rb') as f:
        state_dict = pickle.load(f)
    model = FactorizationMachine(n=29721, k=20)
    model.load_state_dict(state_dict)
    return model

model = load_model('model_cola_pickle_numpy.pkl')


# define the recommendation function
def recommend_movie(USER_ID, NUMBER_RECO):
    last_movie_seen = user_movies[user_movies.userId == USER_ID].iloc[-1]['imdbId']
    user_movies.loc[:, "last_seen"] = last_movie_seen

    # Form user_can_rate dataframe
    user_rated = user_movies[user_movies['userId'] == USER_ID]
    user_can_rate = user_movies[~user_movies.imdbId.isin(user_rated["imdbId"])]
    user_can_rate.loc[:, "userId"] = USER_ID
    user_can_rate = user_can_rate.drop_duplicates().reset_index(drop=True)

    # Form the features dataframe
    cat_cols__ = user_can_rate.drop(columns=['userId', 'imdbId', 'last_seen'])
    agg_history__ = user_can_rate[['userId']].merge(rating_history_norm, left_on='userId', right_index=True)
    active_groups__ = active_ohe.transform(user_can_rate[['userId', 'imdbId', 'last_seen']])
    features = np.hstack((active_groups__, agg_history__.iloc[:, 1:], cat_cols__))

    # Predict using the model
    features_np = np.array(features, dtype=np.float32)
    y = model.forward(features_np)

    # Form a sorted list of the top n movies
    ratings = y.round(2).reshape(-1, 1)
    movies = user_can_rate['imdbId'].values.reshape(-1, 1)
    result = np.concatenate((ratings, movies), axis=1)

    return result[np.argsort(result[:, 0])[::-1]][:NUMBER_RECO, 1].astype(int)
    # return result




https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [55]:
# Example usage
recommend_movie(359,10)

array([ 424755, 1677561,  100339,   93871, 1067106,  239655, 2375574,
         95875,   83806,  104740])

In [None]:
array([ 424755,  109402, 1517252,  825283,   95875])

In [None]:
array([  91064,  758752,   55257, 1545660,   97493])

In [None]:
# # save the model parameters as a pickle file and a NumPy file for deployment

# import torch

# class FactorizationMachine(torch.nn.Module):
#     def __init__(self, n, k, bias=False):
#         super(FactorizationMachine, self).__init__()
#         self.n = n
#         self.k = k
#         self.linear = torch.nn.Linear(self.n, 1, bias)
#         self.V = torch.nn.Parameter(torch.randn(n, k))  
#     def forward(self, x_batch):
#         part_1 = torch.sum(torch.mm(x_batch, self.V).pow(2), dim=1, keepdim=True)
#         part_2 = torch.sum(torch.mm(x_batch.pow(2), self.V.pow(2)), dim=1, keepdim=True)
#         inter_term = 0.5 * (part_1 - part_2)
#         var_strength = self.linear(x_batch)
#         return var_strength + inter_term


# model=FactorizationMachine(n=29721, k=20)
# model.load_state_dict(torch.load('model_cola.pt'))


# state_dict = {
#     'linear_weights': model.linear.weight.tolist(),
#     'V': model.V.detach().numpy()
# }

# with open('model_cola_pickle_numpy.pkl', 'wb') as f:
#     pickle.dump(state_dict, f)


In [None]:
import numpy as np
import pickle
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

# import the datasets
user_movies = pd.read_csv("user_movies.csv")
rating_history_norm = pd.read_csv("rating_history_norm.csv", index_col=0)
with open('one_hot_encoder_sec.pkl', 'rb') as f:
    active_ohe = pickle.load(f)


class FactorizationMachine:
    def __init__(self, n, k):
        self.n = n
        self.k = k
        self.linear_weights = np.random.randn(n, 1)
        self.V = np.random.randn(n, k)  # Factor matrix

    def forward(self, x_batch):
        part_1 = np.sum(np.dot(x_batch, self.V) ** 2, axis=1, keepdims=True)
        part_2 = np.sum(np.dot(x_batch ** 2, self.V ** 2), axis=1, keepdims=True)
        inter_term = 0.5 * (part_1 - part_2)
        var_strength = np.dot(x_batch, self.linear_weights.T)
        return var_strength + inter_term

    def load_state_dict(self, state_dict):
        self.linear_weights = np.array(state_dict['linear_weights'])
        self.V = np.array(state_dict['V'])


# Load the model parameters from a file
def load_model(filepath):
    with open(filepath, 'rb') as f:
        state_dict = pickle.load(f)
    model = FactorizationMachine(n=29721, k=20)
    model.load_state_dict(state_dict)
    return model

model = load_model('model_cola_pickle_numpy.pkl')


# define the recommendation function
USER_ID = 8

NUMBER_RECO = 10


last_movie_seen = user_movies[user_movies.userId == USER_ID].iloc[-1]['imdbId']
user_movies.loc[:, "last_seen"] = last_movie_seen

# Form user_can_rate dataframe
user_rated = user_movies[user_movies['userId'] == USER_ID]
user_can_rate = user_movies[~user_movies.imdbId.isin(user_rated["imdbId"])]
user_can_rate.loc[:, "userId"] = USER_ID
user_can_rate = user_can_rate.drop_duplicates().reset_index(drop=True)

# Form the features dataframe
cat_cols__ = user_can_rate.drop(columns=['userId', 'imdbId', 'last_seen'])
agg_history__ = user_can_rate[['userId']].merge(rating_history_norm, left_on='userId', right_index=True)
active_groups__ = active_ohe.transform(user_can_rate[['userId', 'imdbId', 'last_seen']])
features = np.hstack((active_groups__, agg_history__.iloc[:, 1:], cat_cols__))

# Predict using the model
features_np = np.array(features, dtype=np.float32)
y = model.forward(features_np)

# Form a sorted list of the top n movies
ratings = y.round(2).reshape(-1, 1)
movies = user_can_rate['imdbId'].values.reshape(-1, 1)
result = np.concatenate((ratings, movies), axis=1)

result =  result[np.argsort(result[:, 0])[::-1]][:NUMBER_RECO, 1].astype(int)
# return result




In [18]:
USER_ID = 8

NUMBER_RECO = 10


last_movie_seen = user_movies[user_movies.userId == USER_ID].iloc[-1]['imdbId']
user_movies.loc[:, "last_seen"] = last_movie_seen

# Form user_can_rate dataframe
user_rated = user_movies[user_movies['userId'] == USER_ID]
user_can_rate = user_movies[~user_movies.imdbId.isin(user_rated["imdbId"])]
user_can_rate.loc[:, "userId"] = USER_ID
user_can_rate = user_can_rate.drop_duplicates().reset_index(drop=True)

# Form the features dataframe
cat_cols__ = user_can_rate.drop(columns=['userId', 'imdbId', 'last_seen'])
agg_history__ = user_can_rate[['userId']].merge(rating_history_norm, left_on='userId', right_index=True)
active_groups__ = active_ohe.transform(user_can_rate[['userId', 'imdbId', 'last_seen']])
features = np.hstack((active_groups__, agg_history__.iloc[:, 1:], cat_cols__))

# Predict using the model
features_np = np.array(features, dtype=np.float32)
# y = model.forward(features_np)

# # Form a sorted list of the top n movies
# ratings = y.round(2).reshape(-1, 1)
# movies = user_can_rate['imdbId'].values.reshape(-1, 1)
# result = np.concatenate((ratings, movies), axis=1)

# result =  result[np.argsort(result[:, 0])[::-1]][:NUMBER_RECO, 1].astype(int)

In [20]:
features_np


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [16]:
model.V

array([[-0.23161049,  0.16279529, -0.32845557, ...,  0.06948352,
        -0.56501   ,  0.40879342],
       [ 0.02308583, -0.09877924, -0.05107058, ...,  0.07289457,
        -0.00310018,  0.02678443],
       [-0.12866688,  0.3054938 ,  0.1358318 , ...,  0.08835667,
         0.12933207, -0.69142413],
       ...,
       [-0.17398903,  0.21960019, -0.13789454, ..., -0.34476689,
        -0.05934772, -0.32462913],
       [ 0.08348624, -0.25757474,  0.18738982, ...,  0.0300645 ,
         0.14736415,  0.319353  ],
       [-0.11154244, -0.15386644, -0.37823576, ..., -0.20922843,
        -0.16290176, -0.03436306]], dtype=float32)

In [56]:


x_batch = features_np

part_1 = np.sum(np.dot(x_batch, model.V) ** 2, axis=1, keepdims=True)
part_2 = np.sum(np.dot(x_batch ** 2, model.V ** 2), axis=1, keepdims=True)
inter_term = 0.5 * (part_1 - part_2)
var_strength = np.dot(x_batch, model.linear_weights.T)
y= var_strength + inter_term

ratings = y.round(2).reshape(-1, 1)
movies = user_can_rate['imdbId'].values.reshape(-1, 1)
result = np.concatenate((ratings, movies), axis=1)

result =  result[np.argsort(result[:, 0])[::-1]][:NUMBER_RECO, 1].astype(int)
result  

array([ 424755, 1517252,  385586,  104740,  130414, 1677561,  420901,
       1067106,  108941, 2375574])

In [42]:
np.sum(np.dot(x_batch ** 2, model.V ** 2), axis=1).shape   

(9677,)

In [29]:
x_batch.shape

(9677, 29721)

In [27]:
model.V.shape

torch.Size([29721, 20])

In [39]:
model.linear_weights.T.shape

(29721, 1)

In [47]:
np.array([1,2,3,4,5])

array([1, 2, 3, 4, 5])