# Recommendation System

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from operator import itemgetter

In [2]:
data = pd.read_csv('./ml-latest-small/ratings.csv')

In [3]:
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [4]:
n_users = data['userId'].unique().shape[0]
n_movies = data['movieId'].unique().shape[0]
print ('Number of users = ' + str(n_users) + ' | Number of movies = ' + str(n_movies) ) 

Number of users = 671 | Number of movies = 9066


We must create the utility matrix(x_label = userId, y_label= movieId, values = rating )

In [5]:
n_users = data['userId'].unique().shape[0]
n_items = data['movieId'].unique().shape[0]

In [6]:
dat = data.pivot(index='userId', columns='movieId', values='rating').fillna(0)

In [7]:
utility_matrix = dat.loc[:, (dat != 0).any(axis=0)]

In [8]:
utility_matrix

movieId,1,2,3,4,5,6,7,8,9,10,...,161084,161155,161594,161830,161918,161944,162376,162542,162672,163949
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Collaborative Filtering (CF) User & User

When the utility matrix is created, similary between users & users could be measure. Cosine similarity is 
one of the metrics which do it.

In [9]:
def similarity_users(a,b):
    
    utility_matrix_users = utility_matrix.T
    
    us = list(utility_matrix_users)
    
    if a in us and b in us:
        
        one = np.array(utility_matrix_users[a].values).reshape(1, -1)
        two = np.array(utility_matrix_users[b].values).reshape(1, -1)
    
        sim = cosine_similarity(one,two)
    
        return(sim[0][0])
    
    else:
        
        return('users not found')

    

In [10]:
def find_user(a):
    
    utility_matrix_users = utility_matrix.T
    
    us = list(utility_matrix_users)
    
    tot = []
    
    for i in us:
        
        if us!= a:
            
            one = np.array(utility_matrix_users[a].values).reshape(1, -1)
            two = np.array(utility_matrix_users[i].values).reshape(1, -1)
    
            sim = cosine_similarity(one,two)
            
            tot.append([i,sim[0][0]])
            
    tot_ord = sorted(tot, key=itemgetter(1))
    
    return(tot_ord[0][0])
            

In [11]:
def predict_movies_user(a):
    
    b = find_user(a)
    
    utility_matrix_users = utility_matrix.T
    
   
    # Movies watched by user a
    
    w = utility_matrix_users[a]
    
    w = w[w > 0.0]
    
    watched = w.index
    
    
    movi = utility_matrix_users[b]
    
    movi2 = movi.sort_values(ascending=False).index
    
    
    if movi.idxmax() not in watched:
        
        return(movi.idxmax())
    
    else:
        
        for i in movi2:
            
            if i not in watched:
                
                return(i)
            
            break

To preform a prediction only change (a) attribute of  predict_movies_user function.

In [12]:
predict_movies_user(1)

17

## Prediction ratings movies Global baseline approach

In [13]:
# mean_rating_movies

inter= []

for i in utility_matrix.values:
    
    inter2 = []
    
    for j in i:
        
        if float(j) > 0 :
            
            inter2.append(j)
    
    inter.append(np.mean(inter2))
                          
mean_rating_movies =  np.mean(inter)             

In [14]:
# mean_rating_user

inter= []

for i in utility_matrix.T.values:
    
    inter2 = []
    
    for j in i:
        
        if float(j) > 0 :
            
            inter2.append(j)
    
    inter.append(np.mean(inter2))
                          
mean_rating_user =  np.mean(inter)  


In [15]:
def golbal(user,movie):
    
    utility_matrix_users = utility_matrix.T
        
        
    # mean movie
    
    w = utility_matrix[movie]
    
    w = w[w > 0.0]
    
    mean_movie = w.mean()
    
    
    # mean user
    
    
    w = utility_matrix_users[user]
    
    w = w[w > 0.0]
    
    mean_user = w.mean()
    
    
    ranting_movie_user = mean_movie + (mean_rating_user - mean_user) + (mean_rating_movies - mean_movie)
    
    
    return(ranting_movie_user)    

To preform a predcition, only change (user, movie) attributes of golbal function.

In [16]:
golbal(3,3)

3.38101369473239