# Recommandation filtrage collaboratif

In [163]:
import os
import re
import pandas as pd
from sklearn.decomposition import NMF
from scipy.sparse import dok_matrix
import numpy as np
import pandas as pd
from scipy.spatial.distance import sqeuclidean, cosine

In [50]:
# take as input two lists of ratings

def MSE_err(truth,pred):
    """
    computes MSE from real-pred difference
    """
    return np.mean((truth-pred)**2)

def MAE_err(truth,pred):
    """
    computes MAE from real-pred difference
    """
    return np.mean(abs(np.array(truth-pred)))
        

In [51]:
#path = '/Vrac/PLDAC_addic7ed/ratings/small_ratings'
#path = "/Users/constancescherer/Desktop/ratings/ratings_60"
path = "ratings_60"

In [52]:
d_user = dict() #{username : {serie: note, serie : note}}

for user in sorted(os.listdir(path)):
    username = re.sub(".txt", "", user)
    d_user[username] = dict()
    with open(path+"/"+user) as file: 
        lignes = file.readlines()
        for ligne in lignes :
            serie, rating = ligne.split(" ")
            rating = rating.rstrip("\n")
            rating = float(rating)
            
            d_user[username][serie] = rating

In [53]:
liste_series = set()
for username, d_s in d_user.items() :
    for serie, rating in d_s.items() :
        liste_series.add(serie)
liste_series = list(liste_series)
len(liste_series)

1357

In [54]:
data = []
for username, d_s in d_user.items() :
    for serie, rating in d_s.items() :
        data.append( (username, serie, rating) )

In [55]:
data[0]

('ianlouisiana', 'father-figure', 1.0)

In [56]:
# We first remap users and item to ids between (0,len(user)) and (0,len(item))
u_dic = {} #{username : user id}
i_dic = {} #{item title : item id}
        
all_data = [] #[(user id, item id, rating)]
    
d_username_id = dict()
d_itemname_id = dict()
for uid,iid,rating in data:  # iterating on all data
    
    uk = u_dic.setdefault(uid,len(u_dic))
    ik = i_dic.setdefault(iid,len(i_dic))
    all_data.append((uk,ik,float(rating)))
    d_username_id[uid] = uk
    d_itemname_id[iid] = ik

num_user = len(u_dic)
num_item = len(i_dic)

print(str(num_user)+" users and "+str(num_item)+" items.")

60 users and 1357 items.


In [57]:
# (1) Create sparse matrix from all ratings
Full = dok_matrix((num_user, num_item), dtype=np.float32)

for uid,iid,rating in all_data:
    Full[uid,iid] = float(rating)
    
    
# (2) Factorizing matrix

model = NMF(n_components=25, init='random', random_state=0, max_iter=350)
U = model.fit_transform(Full) #users
I = model.components_      #items

I = I.transpose()
I.shape


(1357, 25)

In [58]:
Full

<60x1357 sparse matrix of type '<class 'numpy.float32'>'
	with 3673 stored elements in Dictionary Of Keys format>

In [59]:
Full.shape

(60, 1357)

In [60]:
len(all_data)

3673

In [61]:
# We take 10% of the train set as test data
train_mat = dok_matrix((num_user, num_item), dtype=np.float32)
test = []
train = []
    
for i,(uid,iid,rating) in enumerate(all_data):
    if i%10 == 0: #one out of 10 is for test
        test.append((uid,iid,rating))
    else:
        train.append((uid,iid,rating))
        train_mat[uid,iid] = rating
    
print("Number of train examples: ", train_mat.nnz)
print("Number of test examples: ", len(test))


Number of train examples:  3305
Number of test examples:  368


In [62]:
train_mat.shape

(60, 1357)

In [66]:
from sklearn.decomposition import NMF, TruncatedSVD


print("----------------------NMF---------------------------")



## NMF model
model = NMF(n_components=16, solver='cd' ,random_state=0, max_iter=100,alpha=5,l1_ratio=0.5)

#get submatrices
U_nmf = model.fit_transform(train_mat)
I_nmf = model.components_.transpose()

print("Shapes :")
print(U_nmf.shape) ## see the shapes of your submatrices
print(I_nmf.shape)

## to complete
def pred_func_nmf(uid,iid):
    
    Uu = U_nmf[uid]
    Ii = I_nmf[iid]
    
    return  np.dot(Uu, Ii)


## Getting the truth values
truth_tr = np.array([rating for (uid,iid),rating in train_mat.items()])
truth_te = np.array([rating for uid,iid,rating in test])

prediction_tr = np.array([pred_func_nmf(u, i) for (u,i),rating in train_mat.items()])
prediction_te = np.array([pred_func_nmf(u, i) for u,i,rating in test])


print("Training Error:")
print("MSE:",  MSE_err(prediction_tr,truth_tr))
print("MAE:",  MAE_err(prediction_tr,truth_tr))
    
print("Test Error:")
print("MSE:",  MSE_err(prediction_te,truth_te))
print("MAE:",  MAE_err(prediction_te,truth_te))

----------------------NMF---------------------------
Shapes :
(60, 16)
(1357, 16)
Training Error:
MSE: 25.42516863586805
MAE: 4.271334194527465
Test Error:
MSE: 51.88690779991371
MAE: 6.750066716516392


In [67]:
print("----------------------SVD---------------------------")

## SVD Model

model = TruncatedSVD(n_components=150)

#get submatrices
U_svd = model.fit_transform(train_mat)
I_svd = model.components_.transpose()


def pred_func_svd(uid,iid):
    
    Uu = U_svd[uid]
    Ii = I_svd[iid]
    
    return np.dot(Uu, Ii)  

    
prediction_tr = np.array([pred_func_svd(u, i) for (u,i),rating in train_mat.items()])
prediction_te = np.array([pred_func_svd(u, i) for u,i,rating in test])


print("Training Error:")
print("MSE:",  MSE_err(prediction_tr,truth_tr))
print("MAE:",  MAE_err(prediction_tr,truth_tr))
    
print("Test Error:")
print("MSE:",  MSE_err(prediction_te,truth_te))
print("MAE:",  MAE_err(prediction_te,truth_te))

----------------------SVD---------------------------
Training Error:
MSE: 1.1096638e-11
MAE: 2.4564868e-06
Test Error:
MSE: 60.90488546635258
MAE: 7.388586571976479


In [68]:
print("----------------------MEAN ONLY---------------------------")


# compute mean training ratings (~3.5)
mean = np.mean([rating for (uid,iid),rating in train_mat.items()])


def pred_func_mean(uid,iid):
    
    
    return mean

print("mean rating is ", mean)


prediction_tr = np.array([pred_func_mean(u, i) for (u,i),rating in train_mat.items()])
prediction_te = np.array([pred_func_mean(u, i) for u,i,rating in test])

print("Training Error:")
print("MSE:",  MSE_err(prediction_tr,truth_tr))
print("MAE:",  MAE_err(prediction_tr,truth_tr))
    
print("Test Error:")
print("MSE:",  MSE_err(prediction_te,truth_te))
print("MAE:",  MAE_err(prediction_te,truth_te))

----------------------MEAN ONLY---------------------------
mean rating is  7.369138
Training Error:
MSE: 6.1753864
MAE: 1.9692961
Test Error:
MSE: 6.314052361355291
MAE: 1.973389278287473


In [69]:
from sklearn.decomposition import NMF, TruncatedSVD


# (1) compute mean of training ratings
mean = mean




# (2) mean normalize training matrix
tmn = dok_matrix((num_user, num_item), dtype=np.float32)

for (uid,iid), rating in train_mat.items():
    tmn[uid,iid] = rating - mean

# (3) factorize matrix
model_norm = TruncatedSVD(n_components=150)

#get submatrices
U_msvd = model.fit_transform(tmn)
I_msvd = model.components_.transpose()

def pred_func_msvd(uid,iid): 
    
    Uu = U_msvd[uid]
    Ii = I_msvd[iid]
    
    return np.dot(Uu, Ii) + mean


prediction_tr = np.array([pred_func_msvd(u, i) for (u,i),rating in train_mat.items()])
prediction_te = np.array([pred_func_msvd(u, i) for u,i,rating in test])


print("Training Error:")
print("MSE:",  MSE_err(prediction_tr,truth_tr))
print("MAE:",  MAE_err(prediction_tr,truth_tr))
    
print("Test Error:")
print("MSE:",  MSE_err(prediction_te,truth_te))
print("MAE:",  MAE_err(prediction_te,truth_te))
    

Training Error:
MSE: 1.1855937e-12
MAE: 6.6548e-07
Test Error:
MSE: 6.314052483242787
MAE: 1.9733892795832262


In [70]:
from sklearn.decomposition import NMF, TruncatedSVD


def group_by_user(tuple_list):
    r_dic = {}
    for uid,iid,rating in tuple_list:
        list_rev = r_dic.get(uid,[])
        list_rev.append(rating)
    
        r_dic[uid] =list_rev
    return r_dic


def group_by_item(tuple_list):
    r_dic = {}
    for uid,iid,rating in tuple_list:
        list_rev = r_dic.get(iid,[])
        list_rev.append(rating)
    
        r_dic[iid] =list_rev
    return r_dic





# (1) compute means of training set
mean = mean

# user and item deviation to mean
u_means = {u:(np.mean(ratings - mean)) for u,ratings in group_by_user(train).items()}
i_means = {i:(np.mean(ratings) - mean) for i,ratings in group_by_item(train).items()}




# (2) normalize training matrix
tmn_k = dok_matrix((num_user, num_item), dtype=np.float32)

for (uid,iid), rating in train_mat.items():
    tmn_k[uid,iid] = rating - mean - u_means.get(uid,0) - i_means.get(iid,0)
    
# (3) factorize matrix
model_kor = TruncatedSVD(n_components=150)


U_ksvd = model.fit_transform(tmn_k)
I_ksvd = model.components_.transpose()


def pred_func_ksvd(uid,iid):
    Uu = U_ksvd[uid]
    Ii = I_ksvd[iid]
    Bu = u_means.get(uid,0)
    Bi = i_means.get(iid, 0)
    
    return np.dot(Uu, Ii) + mean + Bu + Bi


prediction_tr = np.array([pred_func_ksvd(u, i) for (u,i),rating in train_mat.items()])
prediction_te = np.array([pred_func_ksvd(u, i) for u,i,rating in test])


print("Training Error:")
print("MSE:",  MSE_err(prediction_tr,truth_tr))
print("MAE:",  MAE_err(prediction_tr,truth_tr))
    
print("Test Error:")
print("MSE:",  MSE_err(prediction_te,truth_te))
print("MAE:",  MAE_err(prediction_te,truth_te))

Training Error:
MSE: 5.903622099668938e-13
MAE: 5.307904076570574e-07
Test Error:
MSE: 5.756102910616171
MAE: 1.860425479002328


In [71]:
# The dcg@k is the sum of the relevance, penalized gradually
def dcg_at_k(r, k):
    """Score is discounted cumulative gain (dcg)
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        
    """
    r = np.asfarray(r)[:k]
    if r.size:
        return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        
    return 0.

# test values
# r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
# dcg_at_k(r, 1) => 3.0
# dcg_at_k(r, 2) => 4.2618595071429155
    

# And it's normalized version
def ndcg_at_k(r, k):
    """
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
    """
    dcg_max =  dcg_at_k(sorted(r)[::-1], k)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k) / dcg_max

# test values
# r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
# ndcg_at_k(r, 1) => 1.0
# ndcg_at_k(r, 4) => 0.794285
    
r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]    
ndcg_at_k(r, 4)   

0.7942854176010882

In [72]:
from random import shuffle

#1) Group (uid,iid,rating) per uid
def group_by_user(tuple_list):
    r_dic = {}
    for uid,iid,rating in tuple_list:
        list_rev = r_dic.get(uid,[])
        list_rev.append((uid,iid,rating))
    
        r_dic[uid] =list_rev
    return r_dic #returns {uid:[(uid,iid,rating),...],...}



userg_train = group_by_user(train)  #returns {uid:[(uid,iid,rating),...],...}
userg_test = group_by_user(test)


# Function to compute a random shuffle ndcg
def random_ndcg(uid_group_tuples,k=10):
    mean_ndcg = 0
    num_users = 0
    
    #for each test set
    for _,list_rating in uid_group_tuples.items():

        #shuffle real ratings.
        real_ratings = [rating for uid,iid,rating in list_rating]
        shuffle(real_ratings)
        pred_objects = real_ratings

        mean_ndcg += ndcg_at_k(pred_objects,k)
        num_users += 1

    return  mean_ndcg/num_users



#Function to compute ndcg on test set
def mean_ndcg_UI(U,I,pred_function,uid_group_tuples,k=10):
    mean_ndcg = 0
    num_users = 0
    
    #for each test set
    for _,list_rating in uid_group_tuples.items():
        
        #2)compute predictions
        pred_ratings = [pred_function(uid, iid) for uid, iid, rating in list_rating ]
        
        #3)to sort real ratings
        real_ratings = [rating for uid,iid,rating in list_rating]
        pred_objects = [ real_ratings[rid] for rid in np.argsort(pred_ratings)[::-1]]
        
        #4)and compute ndcg
        mean_ndcg += ndcg_at_k(pred_objects,k)
        num_users += 1

    return  mean_ndcg/num_users
    
    

print("train") 
print("-"*10)
print("mean == random", random_ndcg(userg_train))
print("ndcg nmf", mean_ndcg_UI(U_nmf,I_nmf,pred_func_nmf,userg_train)) 
print("ndcg svd", mean_ndcg_UI(U_svd,I_svd,pred_func_svd,userg_train))
print("ndcg svd + mean", mean_ndcg_UI(U_msvd,I_msvd,pred_func_nmf,userg_train))
print("ndcg svd koren", mean_ndcg_UI(U_ksvd,I_ksvd,pred_func_ksvd,userg_train))

print(" ")

print("test")    
print("-"*10) 
print("mean == random", random_ndcg(userg_test))
print("ndcg nmf", mean_ndcg_UI(U_nmf,I_nmf,pred_func_nmf,userg_test)) 
print("ndcg svd", mean_ndcg_UI(U_svd,I_svd,pred_func_svd,userg_test))
print("ndcg svd + mean", mean_ndcg_UI(U_msvd,I_msvd,pred_func_nmf,userg_test))
print("ndcg svd koren", mean_ndcg_UI(U_ksvd,I_ksvd,pred_func_ksvd,userg_test))


train
----------
mean == random 0.7501640499066579
ndcg nmf 0.8852155036620905
ndcg svd 1.0
ndcg svd + mean 0.8852155036620905
ndcg svd koren 1.0
 
test
----------
mean == random 0.9310830872377991
ndcg nmf 0.9457446096440696
ndcg svd 0.9379596860894555
ndcg svd + mean 0.9457446096440696
ndcg svd koren 0.952903111651975


In [73]:
from collections import Counter

counts = Counter(iid for _,iid,_ in train)

#the popularity predictor
def pop_pred(uid,iid):
    return counts[iid]

#Random ndcg, return a shuffled lists of all possible ratings
def random_ndcg_full(k=10,default=0):
    mean_ndcg = 0
    num_users = 0
    
    for _,list_rating in userg_test.items():

        #all possible ratings
        real_ratings = [rating for uid,iid,rating in list_rating] + [default]*(num_item - len(list_rating) - len(userg_train[uid]))
        shuffle(real_ratings)
        pred_objects = real_ratings
        
        mean_ndcg += ndcg_at_k(pred_objects,k)
        num_users += 1

    return  mean_ndcg/len(userg_test)


def mean_ndcg_UI_FULL(U,I,pred_function,k=10,default=0):
    mean_ndcg = 0
    
    test_users = set(uid for uid,_,_ in test)
    
    for user in test_users:
        u_train_set = set(iid for _,iid,_ in userg_train[uid])
        u_test_dic =  {iid:rating for uid,iid,rating in userg_test[uid]}
        
        pred_ratings = []
        real_ratings = []
        
        
        for item in range(num_item):
            
            if item in u_train_set:
                continue
            else:
                p_rating = pred_function(user,item)
            
            pred_ratings.append(p_rating)
            real_ratings.append(u_test_dic.get(item,default))
            
        #pred_objects = [] ##### complete here
        pred_objects = [ real_ratings[rid] for rid in np.argsort(pred_ratings)[::-1]]
        
        mean_ndcg += ndcg_at_k(pred_objects,k)

    return  mean_ndcg/len(test_users)

print("mean == random", random_ndcg_full())
print("ndcg pop", mean_ndcg_UI_FULL(U_nmf,I_nmf,pop_pred)) 
print("ndcg nmf", mean_ndcg_UI_FULL(U_nmf,I_nmf,pred_func_nmf)) 
print("ndcg svd", mean_ndcg_UI_FULL(U_svd,I_svd,pred_func_svd))
print("ndcg svd + mean", mean_ndcg_UI_FULL(U_msvd,I_msvd,pred_func_nmf))
print("ndcg svd koren", mean_ndcg_UI_FULL(U_msvd,I_msvd,pred_func_ksvd))

mean == random 0.006194394928816213
ndcg pop 0.033854063241665906
ndcg nmf 0.01205926922355665
ndcg svd 0.014423844767087805
ndcg svd + mean 0.01205926922355665
ndcg svd koren 0.004585390866403519


In [74]:
def create_sparse_mat(data) : 
    # We first remap users and item to ids between (0,len(user)) and (0,len(item))
    u_dic = {}
    i_dic = {}

    all_data = []

    d_username_id = dict()
    d_itemname_id = dict()
    for uid,iid,rating in data:  # iterating on all data

        uk = u_dic.setdefault(uid,len(u_dic))
        ik = i_dic.setdefault(iid,len(i_dic))
        all_data.append((uk,ik,float(rating)))
        d_username_id[uid] = uk
        d_itemname_id[iid] = ik

    num_user = len(u_dic)
    num_item = len(i_dic)
    
    # (1) Create sparse matrix from all ratings
    Full = dok_matrix((num_user, num_item), dtype=np.float32)

    for uid,iid,rating in all_data:
        Full[uid,iid] = float(rating)
        
    return d_username_id, d_itemname_id, Full

In [139]:
reversed_u_dic =  dict([[v,k] for k,v in u_dic.items()]) #{user id : username}
reversed_i_dic = dict([[v,k] for k,v in i_dic.items()]) #{item id: item title}

In [76]:
def plus_proche_voisin(username, d_username_id, Full) :
    user_id = d_username_id[username]
    f = Full[user_id].todense()
    d_min = 1000
    i_min = -1
    for i in range(0, Full.shape[0]) :
        if i != user_id :
            fi = Full[i].todense()
            d = cosine(f, fi)
            if d < d_min :
                i_min = i
                d_min = d
    return i_min

In [140]:
ppv = plus_proche_voisin('WeAreLive', d_username_id, Full)
reversed_u_dic[ppv]

'Johnny-the-Film-Sentinel-2187'

In [78]:
d_user['WeAreLive']

{'arrow': 6.0,
 'avengers-assemble': 10.0,
 'batgirl-year-one': 8.0,
 'batman': 10.0,
 'batman-beyond': 9.0,
 'batman-the-animated-series': 10.0,
 'batman-the-brave-and-the-bold': 10.0,
 'ben-10': 10.0,
 'ben-10-alien-force': 10.0,
 'ben-10-omniverse': 10.0,
 'ben-10-ultimate-alien': 8.0,
 'beware-the-batman': 9.0,
 'clone': 10.0,
 'dinosaur-king': 6.0,
 'duck-dodgers': 7.0,
 'evolution': 4.0,
 'fantastic-four': 7.0,
 'generator-rex': 10.0,
 'gotham': 3.0,
 'green-lantern-the-animated-series': 10.0,
 'hot-wheels-battle-force-5': 10.0,
 'hulk-and-the-agents-of-smash': 6.0,
 'iron-man': 5.0,
 'iron-man-armored-adventures': 5.0,
 'justice': 10.0,
 'justice-league': 10.0,
 'legends': 8.0,
 'legion-of-super-heroes': 9.0,
 'mr-bean': 10.0,
 'regular-show': 10.0,
 'silver-surfer': 1.0,
 'smallville': 10.0,
 'star-wars-rebels': 6.0,
 'star-wars-the-clone-wars': 10.0,
 'static-shock': 10.0,
 'steven-universe': 7.0,
 'supergirl': 4.0,
 'sym-bionic-titan': 10.0,
 'teen-titans': 10.0,
 'teen-titan

In [50]:
d_user['Johnny-the-Film-Sentinel-2187']

{'american-horror-story': 8.0,
 'archer': 10.0,
 'avatar-the-last-airbender': 9.0,
 'batman': 10.0,
 'batman-beyond': 9.0,
 'batman-the-animated-series': 10.0,
 'better-call-saul': 10.0,
 'beware-the-batman': 7.0,
 'black-mirror': 10.0,
 'boardwalk-empire': 10.0,
 'bojack-horseman': 10.0,
 'breaking-bad': 10.0,
 'brotherhood': 10.0,
 'clone': 9.0,
 'cowboy-bebop': 10.0,
 'daredevil': 10.0,
 'doctor-who': 10.0,
 'empire': 10.0,
 'fargo': 10.0,
 'farscape': 9.0,
 'fawlty-towers': 10.0,
 'game-of-thrones': 10.0,
 'gravity': 10.0,
 'gravity-falls': 10.0,
 'justice': 10.0,
 'justice-league': 10.0,
 'last-week-tonight-with-john-oliver': 9.0,
 'lupin-the-3rd': 9.0,
 'rick-and-morty': 10.0,
 'samurai-jack': 9.0,
 'seinfeld': 10.0,
 'sherlock': 10.0,
 'smallville': 9.0,
 'star-trek-continues': 9.0,
 'star-wars-rebels': 9.0,
 'star-wars-the-clone-wars': 9.0,
 'steven-universe': 10.0,
 'teen-titans-go-': 2.0,
 'the-batman': 4.0,
 'the-man-in-the-high-castle': 9.0,
 'the-muppet-show': 10.0,
 'the-

Deux utilisateurs qui ont regardé et noté beaucoup de séries de superhéros.

In [106]:
def series_pas_en_commun(username1, username2, d_user, d_itemname_id) :
    """id series que user2 a vu et pas user1"""
    series_u1 = d_user[username1].keys()
    series_u2 = d_user[username2].keys()
    
    s = set()
    #for s1 in series_u1 :
     #   if s1 not in series_u2 and d_user[username1][s1] >= 7:
            
      #      s.add(d_itemname_id[s1])
            
    for s2 in series_u2:
        if s2 not in series_u1 and d_user[username2][s2] >= 7:
            s.add(d_itemname_id[s2])
            
    return list(s)

In [141]:
pascomm = series_pas_en_commun("Johnny-the-Film-Sentinel-2187", "WeAreLive", d_user, d_itemname_id)

In [142]:
for ID in pascomm:
    print(reversed_i_dic[ID])

vixen
generator-rex
batgirl-year-one
sym-bionic-titan
ultimate-spider-man
ben-10-ultimate-alien
legion-of-super-heroes
regular-show
teen-titans
duck-dodgers
ben-10-alien-force
the-avengers
ben-10-omniverse
we-bare-bears
ben-10
transformers-prime
hot-wheels-battle-force-5
fantastic-four
the-spectacular-spider-man
the-flash
wolverine-and-the-x-men
wolverine
the-garfield-show
batman-the-brave-and-the-bold
legends
mr-bean
static-shock
avengers-assemble
green-lantern-the-animated-series


In [147]:
def recommandation(username1, data, d_user, nb_pred) :
    d_username_id, d_itemname_id, Full = create_sparse_mat(data)
    
    ppv = plus_proche_voisin(username1, d_username_id, Full)
    username2 = ""
    for username, ID in d_username_id.items() :
        if ID == ppv :
            username2 = username
            break
    if username2 == "" :
        print("problème avec le nom du plus proche voisin")
        return

    series_non_vues = series_pas_en_commun(username1, username2, d_user, d_itemname_id)

    predictions =dict()
    for i in series_non_vues:
        predictions[i] = pred_func_ksvd(d_username_id[username1], i)  #{id serie: note predite}
    sorted_rec = [(k, predictions[k]) for k in sorted(predictions, key=predictions.get, reverse=True)]
    res = [reversed_i_dic[sorted_rec[i][0]] for i in range(nb_pred)]
    
    return res, sorted_rec

In [167]:
top3_reco, p = recommandation('Johnny-the-Film-Sentinel-2187', data, d_user, 3)

In [168]:
top3_reco

['generator-rex', 'sym-bionic-titan', 'ultimate-spider-man']

Pas mal du tout !

In [150]:
user2 = d_user['WeAreLive']

In [171]:
s = [reversed_i_dic[p[i][0]] for i in range(len(p))]

In [172]:
notes_predites_user1 = [p[i][1] for i in range(len(p))]

In [177]:
notes_ppv = [user2[reversed_i_dic[p[i][0]]] for i in range(len(p))]

In [181]:
l = [('Serie', s), ('Note predite pour Johnny', notes_predites_user1), ('Note donnee par son plus proche voisin', notes_ppv)]

In [182]:
df = pd.DataFrame.from_items(l)

In [183]:
df

Unnamed: 0,Serie,Note predite pour Johnny,Note donnee par son plus proche voisin
0,generator-rex,11.90359,10.0
1,sym-bionic-titan,11.90359,10.0
2,ultimate-spider-man,11.90359,10.0
3,regular-show,11.90359,10.0
4,teen-titans,11.90359,10.0
5,ben-10-alien-force,11.90359,10.0
6,ben-10-omniverse,11.90359,10.0
7,ben-10,11.90359,10.0
8,hot-wheels-battle-force-5,11.90359,10.0
9,batman-the-brave-and-the-bold,11.90359,10.0


Remarque : peut-être faut-il ramener les notes prédites > 10 à 10 ?