In [1]:
import pandas as pd
import numpy as np
import random
import math
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.metrics.pairwise import cosine_similarity
import time

from functions import train_test

In [2]:
rating_df = pd.read_csv('data/user_rating_pt.csv')
rating_df.columns = rating_df.columns.astype(int)

In [3]:
rating_treshold = 3.5

rating_df[rating_df < rating_treshold] = 0
rating_df[rating_df >= rating_treshold] = 1
rating_df.columns = range(len(rating_df.columns))

In [4]:
rating_matrix = np.array(rating_df)

In [5]:
known = []

for u in range(rating_matrix.shape[0]):
    for i in range(rating_matrix.shape[1]):
        if rating_matrix[u,i] > 0:
            known.append((u, i))

training, testing = train_test(known, test_size=0.3)

In [6]:
train_matrix = np.zeros((rating_matrix.shape[0], rating_matrix.shape[1]))
test_matrix = np.zeros((rating_matrix.shape[0], rating_matrix.shape[1]))

for u, i in training:
    train_matrix[u][i] = 1

for u, i in testing:
    test_matrix[u][i] = 1

In [7]:
def get_metrics(test_matrix, user_rec):
    true_positive = 0
    false_positive = 0
    false_negative = 0
    
    for u in range(user_rec.shape[0]):
        for i in user_rec[u]:
            if test_matrix[u][i] == 1:
                true_positive+=1
            else:
                false_positive+=1
                
    for u in range(test_matrix.shape[0]):
        for i in range(test_matrix.shape[1]):
            if test_matrix[u][i] == 1 and i not in user_rec[u]:
                false_negative+=1
    
    precision = true_positive/(true_positive + false_positive)
    recall = true_positive/(true_positive + false_negative)
    F1_score = 2*(precision*recall)/(precision + recall)

    print("Precision :", precision)
    print("Recall :", recall)
    print("F1 Score :", F1_score)
    
    return precision, recall, F1_score

In [8]:
def get_recommendations(pred, k=5):
    rec_list = []
    
    for u in range(pred.shape[0]):
        rec = np.argpartition(pred[u],-k)[-k:]
        rec_list.append(rec)
    
    return np.array(rec_list)

In [9]:
def LREC(dataset, reg=1.0, user_list=None):
    
    if user_list == None:
        user_list = range(dataset.shape[0])
        
    X = dataset.T
    Y = X*2 - 1
   
    W = []

    for index in user_list:
        if 1 in Y.T[index] and -1 in Y.T[index]:
            rdg = Ridge(alpha=reg).fit(X, Y.T[index])
            
            W.append(rdg.coef_)
        else:
            W.append(np.zeros(dataset.shape[0]))
    
        
    if user_list == range(dataset.shape[0]):     
        W = np.array(W)
        pred = np.dot(W.T, dataset[user_list])

    else:
        W = np.array(W)
        pred = np.dot(W, dataset)
        
        
    train_replace = pred.min() - 1
    for index, u in enumerate(user_list):
        user_ratings = dataset[u]
        liked_movies = np.where(user_ratings == 1)[0]
            
        for i in liked_movies:
            pred[index][i] = train_replace
        
    return pred

In [10]:
pred_ = LREC(train_matrix, reg=250)
user_rec = get_recommendations(pred_, k=15)

get_metrics(test_matrix, user_rec)


Precision : 0.2500546448087432
Recall : 0.12357547934107481
F1 Score : 0.1654075546719682


(0.2500546448087432, 0.12357547934107481, 0.1654075546719682)

In [11]:
user_rec

array([[2257,   46,  659, ...,  938,  921,  913],
       [7022, 8287, 3633, ..., 5901, 6134, 4791],
       [ 989,  913,  835, ..., 1390,  914,  901],
       ...,
       [5363,  314, 4900, ..., 3609, 2224,  254],
       [ 257,  472,  507, ...,  277,  322,  508],
       [ 906,  224, 3868, ..., 7355, 3633, 3609]])

In [12]:
t = time.time()
pred_1 = LREC(train_matrix, reg=250, user_list=[45])
user_rec1 = get_recommendations(pred_1, k=15)
time.time() - t

0.13360095024108887

In [13]:
user_rec1

array([[472, 436, 297,  43, 510, 217, 287, 134, 507, 418, 307, 314, 506,
        123, 508]])

In [14]:
user_rec[45]

array([472, 436, 297,  43, 510, 217, 287, 134, 507, 418, 307, 314, 506,
       123, 508])