In [1]:
# import library 
import pandas as pd 
import numpy as np

## Load Dataset ##

In [2]:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols, encoding='latin-1')

In [3]:
ratings

Unnamed: 0,user_id,movie_id,rating,unix_timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596
...,...,...,...,...
99995,880,476,3,880175444
99996,716,204,5,879795543
99997,276,1090,1,874795795
99998,13,225,2,882399156


In [4]:
n_users = ratings.user_id.unique().shape[0]
n_items = ratings.movie_id.unique().shape[0]

In [5]:
print("The number of user:", n_users)
print("The number of n_items:", n_items)

# shape[0] means --> row count
# shape[1] means --> column count

The number of user: 943
The number of n_items: 1682


## Create pivot table for user and movie basedon ratings ##

In [6]:
datama = ratings.pivot_table(index='user_id', columns='movie_id', values='rating')

In [7]:
datama

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,,,,,,,,,,
2,4.0,,,,,,,,,2.0,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,3.0,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,...,,,,,,,,,,
940,,,,2.0,,,4.0,5.0,3.0,,...,,,,,,,,,,
941,5.0,,,,,,4.0,,,,...,,,,,,,,,,
942,,,,,,,,,,,...,,,,,,,,,,


In [8]:
# Replace zero value for none values
data_matrix = datama.replace(np.nan, 0)

In [9]:
data_matrix

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
940,0.0,0.0,0.0,2.0,0.0,0.0,4.0,5.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
941,5.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
942,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Find Cosine Similartity for user and item ##

In [10]:
from sklearn.metrics.pairwise import pairwise_distances
user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')

In [11]:
user_similarity

array([[1.33226763e-15, 8.33069016e-01, 9.52540457e-01, ...,
        8.51383057e-01, 8.20492117e-01, 6.01825261e-01],
       [8.33069016e-01, 0.00000000e+00, 8.89408675e-01, ...,
        8.38515222e-01, 8.27732187e-01, 8.94202122e-01],
       [9.52540457e-01, 8.89408675e-01, 0.00000000e+00, ...,
        8.98757435e-01, 8.66583851e-01, 9.73444131e-01],
       ...,
       [8.51383057e-01, 8.38515222e-01, 8.98757435e-01, ...,
        0.00000000e+00, 8.98358201e-01, 9.04880419e-01],
       [8.20492117e-01, 8.27732187e-01, 8.66583851e-01, ...,
        8.98358201e-01, 0.00000000e+00, 8.17535338e-01],
       [6.01825261e-01, 8.94202122e-01, 9.73444131e-01, ...,
        9.04880419e-01, 8.17535338e-01, 0.00000000e+00]])

## Using formula for user and item we are calculation the score ##

In [14]:
# def predict(ratings, similarity, type='user'):
#     if type == 'user':
#         mean_user_rating = ratings.mean(axis=1)
#         #We use np.newaxis so that mean_user_rating has same format as ratings
#         ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
#         pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
#     elif type == 'item':
#         pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
#     return pred  



def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1).to_numpy()
        ratings_diff = ratings.to_numpy() - mean_user_rating[:, np.newaxis]
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.to_numpy().dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred 

In [15]:
# Prediction Table
user_prediction = predict(data_matrix, user_similarity, type='user')
item_prediction = predict(data_matrix, item_similarity, type='item')

In [16]:
user_prediction

array([[ 2.06532606,  0.73430275,  0.62992381, ...,  0.39359041,
         0.39304874,  0.3927712 ],
       [ 1.76308836,  0.38404019,  0.19617889, ..., -0.08837789,
        -0.0869183 , -0.08671183],
       [ 1.79590398,  0.32904733,  0.15882885, ..., -0.13699223,
        -0.13496852, -0.13476488],
       ...,
       [ 1.59151513,  0.27526889,  0.10219534, ..., -0.16735162,
        -0.16657451, -0.16641377],
       [ 1.81036267,  0.40479877,  0.27545013, ..., -0.00907358,
        -0.00846587, -0.00804858],
       [ 1.8384313 ,  0.47964837,  0.38496292, ...,  0.14686675,
         0.14629808,  0.14641455]])

## user based filtering, first we have to find similarity between the input user and others ##

In [17]:
# Step 1: select input user
input_user = 34

In [18]:
# Step 2: convert the user_similarity table into DataFrame
user_sim_table = pd.DataFrame(user_similarity)

In [19]:
user_sim_table

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,933,934,935,936,937,938,939,940,941,942
0,1.332268e-15,0.833069,0.952540,0.935642,0.621525,0.569761,0.559633,0.680928,0.921862,0.623456,...,0.630473,0.880518,0.725124,0.810295,0.802674,0.881905,0.685928,0.851383,0.820492,0.601825
1,8.330690e-01,0.000000,0.889409,0.821879,0.927021,0.754157,0.892672,0.896656,0.838952,0.840138,...,0.843014,0.692058,0.641211,0.575954,0.680111,0.771417,0.773210,0.838515,0.827732,0.894202
2,9.525405e-01,0.889409,0.000000,0.655849,0.978755,0.927585,0.933863,0.916940,0.938960,0.934849,...,0.968125,0.957247,0.836171,0.930962,0.875755,0.973729,0.838110,0.898757,0.866584,0.973444
3,9.356422e-01,0.821879,0.655849,0.000000,0.968196,0.931956,0.908770,0.811940,0.898716,0.939141,...,0.947893,0.963216,0.866885,0.806529,0.853942,0.969862,0.803142,0.847959,0.829914,0.941248
4,6.215248e-01,0.927021,0.978755,0.968196,0.000000,0.762714,0.626400,0.751070,0.943153,0.798573,...,0.661206,0.919420,0.905076,0.920221,0.851393,0.928541,0.760045,0.860405,0.847503,0.686059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,8.819047e-01,0.771417,0.973729,0.969862,0.928541,0.888148,0.892973,0.904102,0.960148,0.928540,...,0.933961,0.568846,0.741979,0.773551,0.567334,0.000000,0.912313,0.819971,0.956736,0.855750
939,6.859280e-01,0.773210,0.838110,0.803142,0.760045,0.647551,0.670075,0.753117,0.879505,0.657039,...,0.672847,0.892976,0.812464,0.818683,0.824842,0.912313,0.000000,0.854848,0.738624,0.758972
940,8.513831e-01,0.838515,0.898757,0.847959,0.860405,0.855554,0.940007,0.853855,0.856755,0.909695,...,0.953048,0.796699,0.711682,0.765789,0.686600,0.819971,0.854848,0.000000,0.898358,0.904880
941,8.204921e-01,0.827732,0.866584,0.829914,0.847503,0.682672,0.717997,0.824678,0.907503,0.787670,...,0.773560,0.926487,0.910412,0.870446,0.900615,0.956736,0.738624,0.898358,0.000000,0.817535


In [20]:
# Step 3: Find similarity user for 34 using cosing table
similar_input_user= user_sim_table[input_user].sort_values(ascending=True).head(5).index

In [21]:
similar_input_user

Index([34, 450, 852, 811, 509], dtype='int64')

In [22]:
# Step 4: Convert it into list
similar_user_input=list(similar_input_user)

In [23]:
# Using similar_user_input, can selet movie id from ratings table

similar_user_movieid_list=[]
for sim_user in similar_user_input:
    sim=list(ratings[ratings['user_id']==sim_user]['movie_id'])
    similar_user_movieid_list.append(sim)

In [24]:
len(similar_user_movieid_list)

5

In [25]:
# Step 6: Convert all the list as single
import itertools
similar_user_movieid_single_list = list(itertools.chain.from_iterable(similar_user_movieid_list))

In [26]:
# Step 7: Unique movieid from the list
Unique_movieid_similar_user=set(similar_user_movieid_single_list)

In [27]:
len(Unique_movieid_similar_user)

590

In [28]:
# Step 8: Input user watched movie_list
input_user_watched_movieid=list(ratings[ratings['user_id']==input_user]['movie_id'].values)

In [29]:
input_user_watched_movieid

[312,
 242,
 690,
 310,
 259,
 299,
 245,
 332,
 329,
 286,
 1024,
 324,
 294,
 292,
 990,
 289,
 898,
 899,
 288,
 991]

In [30]:
# Step 9: Create a list which shold have recommendation movieid to the input user

recom = []
for per_id in Unique_movieid_similar_user:
    if(per_id in input_user_watched_movieid):
        pass
    else:
        recom.append(per_id)

In [31]:
len(recom)

570

In [None]:
sorted(recom)

In [None]:
# Cross Checking
sorted(Unique_movieid_similar_user)

In [None]:
# Cross Checking
sorted(input_user_watched_movieid)

In [36]:
# Checking the common movie list
list(set(Unique_movieid_similar_user) &set(input_user_watched_movieid))

[1024,
 898,
 259,
 899,
 286,
 288,
 289,
 292,
 294,
 299,
 690,
 310,
 312,
 324,
 329,
 332,
 990,
 991,
 242,
 245]

In [37]:
user_pred=pd.DataFrame(user_prediction)

In [38]:
user_pred

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1672,1673,1674,1675,1676,1677,1678,1679,1680,1681
0,2.065326,0.734303,0.629924,1.010669,0.640686,0.476150,1.784569,1.163032,1.513350,0.704478,...,0.394041,0.394434,0.393981,0.392972,0.393344,0.392272,0.394909,0.393590,0.393049,0.392771
1,1.763088,0.384040,0.196179,0.731538,0.225643,0.003892,1.493597,0.876153,1.108467,0.261991,...,-0.086942,-0.085491,-0.087137,-0.088158,-0.087298,-0.089288,-0.087468,-0.088378,-0.086918,-0.086712
2,1.795904,0.329047,0.158829,0.684154,0.173277,-0.035621,1.488230,0.835769,1.135426,0.236383,...,-0.134795,-0.133537,-0.135543,-0.136438,-0.135041,-0.137611,-0.136374,-0.136992,-0.134969,-0.134765
3,1.729951,0.293913,0.127741,0.644932,0.142143,-0.062261,1.437010,0.796249,1.096663,0.211789,...,-0.161413,-0.160220,-0.161542,-0.162586,-0.161634,-0.163877,-0.162283,-0.163080,-0.161442,-0.161248
4,1.796651,0.454474,0.354422,0.763130,0.359539,0.195987,1.547370,0.908904,1.292027,0.437954,...,0.101762,0.102405,0.101923,0.100839,0.101711,0.099951,0.102515,0.101233,0.101075,0.101201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
938,1.676950,0.346339,0.177518,0.689906,0.199740,0.003297,1.429565,0.830905,1.070986,0.262183,...,-0.092434,-0.091197,-0.092851,-0.093801,-0.092953,-0.094539,-0.092217,-0.093378,-0.092686,-0.092423
939,1.822346,0.419125,0.286430,0.715605,0.294442,0.106633,1.514591,0.853050,1.195304,0.359260,...,0.014060,0.014688,0.014123,0.013060,0.013669,0.011978,0.014065,0.013021,0.013639,0.013796
940,1.591515,0.275269,0.102195,0.624383,0.133762,-0.069553,1.320734,0.765529,1.035088,0.192697,...,-0.166179,-0.164981,-0.166278,-0.167392,-0.166679,-0.168486,-0.166217,-0.167352,-0.166575,-0.166414
941,1.810363,0.404799,0.275450,0.726616,0.281316,0.087068,1.550310,0.850057,1.205745,0.342987,...,-0.008362,-0.007757,-0.008225,-0.009218,-0.008232,-0.010138,-0.008009,-0.009074,-0.008466,-0.008049


In [39]:
user_pred_Trans=user_pred.T

In [40]:
user_pred_Trans

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,933,934,935,936,937,938,939,940,941,942
0,2.065326,1.763088,1.795904,1.729951,1.796651,1.996889,2.466055,1.693486,1.681165,2.021688,...,1.924844,1.619096,1.891812,1.696368,1.717658,1.676950,1.822346,1.591515,1.810363,1.838431
1,0.734303,0.384040,0.329047,0.293913,0.454474,0.651172,1.099186,0.296285,0.279528,0.650911,...,0.552456,0.311680,0.550513,0.322611,0.431368,0.346339,0.419125,0.275269,0.404799,0.479648
2,0.629924,0.196179,0.158829,0.127741,0.354422,0.528276,1.016489,0.195398,0.121887,0.530669,...,0.451194,0.140613,0.354913,0.144344,0.247851,0.177518,0.286430,0.102195,0.275450,0.384963
3,1.010669,0.731538,0.684154,0.644932,0.763130,0.921054,1.362861,0.612075,0.616094,0.916215,...,0.834176,0.661829,0.893214,0.662304,0.783208,0.689906,0.715605,0.624383,0.726616,0.780521
4,0.640686,0.225643,0.173277,0.142143,0.359539,0.537388,1.006458,0.198638,0.136368,0.531720,...,0.455212,0.166992,0.398759,0.167841,0.283796,0.199740,0.294442,0.133762,0.281316,0.388442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,0.392272,-0.089288,-0.137611,-0.163877,0.099951,0.259848,0.764340,-0.072866,-0.163505,0.263527,...,0.185602,-0.128949,0.095681,-0.143177,-0.009425,-0.094539,0.011978,-0.168486,-0.010138,0.145539
1678,0.394909,-0.087468,-0.136374,-0.162283,0.102515,0.262375,0.766967,-0.070435,-0.161425,0.266037,...,0.188059,-0.126624,0.097792,-0.141018,-0.007085,-0.092217,0.014065,-0.166217,-0.008009,0.148194
1679,0.393590,-0.088378,-0.136992,-0.163080,0.101233,0.261111,0.765654,-0.071651,-0.162465,0.264782,...,0.186831,-0.127786,0.096736,-0.142097,-0.008255,-0.093378,0.013021,-0.167352,-0.009074,0.146867
1680,0.393049,-0.086918,-0.134969,-0.161442,0.101075,0.261061,0.764983,-0.071744,-0.161396,0.264589,...,0.186850,-0.127045,0.097826,-0.140977,-0.007621,-0.092686,0.013639,-0.166575,-0.008466,0.146298


In [41]:
gg=user_pred_Trans[34]

In [42]:
gg

0       1.740469
1       0.283366
2       0.114987
3       0.645096
4       0.127844
          ...   
1677   -0.183977
1678   -0.182367
1679   -0.183172
1680   -0.181400
1681   -0.181306
Name: 34, Length: 1682, dtype: float64

In [43]:
g = pd.DataFrame(gg)

In [44]:
g

Unnamed: 0,34
0,1.740469
1,0.283366
2,0.114987
3,0.645096
4,0.127844
...,...
1677,-0.183977
1678,-0.182367
1679,-0.183172
1680,-0.181400


In [45]:
s = g.T

In [46]:
s[187].values >=0.4

array([ True])

In [47]:
# From recomd list select highest reated flim which would like by the user. Based on user prediction 
highest_Rated = []
input_user_pre = pd.DataFrame(user_pred_Trans[input_user])
input_user_pred = input_user_pre.T
for re in recom:
    value=input_user_pred[re].values
    if(value>=1):
        highest_Rated.append(re)
        

In [48]:
len(highest_Rated)

27

In [54]:
# Now we give movieid respective movie list
i_cols = ['movie_id', 'movie_title', 'release date', 'video release date', 'IMDb URL', 
          'unknown','Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime',
          'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery',
          'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'
          ]
items = pd.read_csv('ml-100k/u.item', sep='|', names = i_cols, encoding='latin-1')

In [56]:
# Creating Movie List based on recom movieid
movie_title=[]
for movieid in highest_Rated:
    mov=items[items['movie_id']==movieid]['movie_title'].values
    movie_title.append(mov)

In [57]:
movie_title

[array(['Seven (Se7en) (1995)'], dtype=object),
 array(['I.Q. (1994)'], dtype=object),
 array(['Santa Clause, The (1994)'], dtype=object),
 array(['Free Willy (1993)'], dtype=object),
 array(['Sleepless in Seattle (1993)'], dtype=object),
 array(['Aladdin (1992)'], dtype=object),
 array(['Dances with Wolves (1990)'], dtype=object),
 array(['Snow White and the Seven Dwarfs (1937)'], dtype=object),
 array(['Spitfire Grill, The (1996)'], dtype=object),
 array(['Private Benjamin (1980)'], dtype=object),
 array(['Empire Strikes Back, The (1980)'], dtype=object),
 array(['Princess Bride, The (1987)'], dtype=object),
 array(['Apocalypse Now (1979)'], dtype=object),
 array(['GoodFellas (1990)'], dtype=object),
 array(['Henry V (1989)'], dtype=object),
 array(['Sting, The (1973)'], dtype=object),
 array(['Unforgiven (1992)'], dtype=object),
 array(['Field of Dreams (1989)'], dtype=object),
 array(['Breaking the Waves (1996)'], dtype=object),
 array(['Men in Black (1997)'], dtype=object),
 array

In [58]:
# Converting into pure list
movie_title_list = []
for m in movie_title:
    print(m)
    mv=list(m)
    movie_title_list.append(mv)

['Seven (Se7en) (1995)']
['I.Q. (1994)']
['Santa Clause, The (1994)']
['Free Willy (1993)']
['Sleepless in Seattle (1993)']
['Aladdin (1992)']
['Dances with Wolves (1990)']
['Snow White and the Seven Dwarfs (1937)']
['Spitfire Grill, The (1996)']
['Private Benjamin (1980)']
['Empire Strikes Back, The (1980)']
['Princess Bride, The (1987)']
['Apocalypse Now (1979)']
['GoodFellas (1990)']
['Henry V (1989)']
['Sting, The (1973)']
['Unforgiven (1992)']
['Field of Dreams (1989)']
['Breaking the Waves (1996)']
['Men in Black (1997)']
['Chasing Amy (1997)']
['Sense and Sensibility (1995)']
["Marvin's Room (1996)"]
['In & Out (1997)']
['Client, The (1994)']
['Aladdin and the King of Thieves (1996)']
['Some Like It Hot (1959)']


In [59]:
# Converting into whole list
import itertools
Final_Recommend_movie=list(itertools.chain.from_iterable(movie_title_list))

In [60]:
Final_Recommend_movie

['Seven (Se7en) (1995)',
 'I.Q. (1994)',
 'Santa Clause, The (1994)',
 'Free Willy (1993)',
 'Sleepless in Seattle (1993)',
 'Aladdin (1992)',
 'Dances with Wolves (1990)',
 'Snow White and the Seven Dwarfs (1937)',
 'Spitfire Grill, The (1996)',
 'Private Benjamin (1980)',
 'Empire Strikes Back, The (1980)',
 'Princess Bride, The (1987)',
 'Apocalypse Now (1979)',
 'GoodFellas (1990)',
 'Henry V (1989)',
 'Sting, The (1973)',
 'Unforgiven (1992)',
 'Field of Dreams (1989)',
 'Breaking the Waves (1996)',
 'Men in Black (1997)',
 'Chasing Amy (1997)',
 'Sense and Sensibility (1995)',
 "Marvin's Room (1996)",
 'In & Out (1997)',
 'Client, The (1994)',
 'Aladdin and the King of Thieves (1996)',
 'Some Like It Hot (1959)']

In [61]:
# Checking the common movie list
list(set(recom)&set(input_user_watched_movieid))
[]

[]

In [62]:
def userbased(input_user,user_similarity,user_predictions,similar_user_count,thres):
    #Convert the user_sim table into DataFrame
    user_sim_table=pd.DataFrame(user_similarity)
    #Find similarity user for 78 using cosine table
    similar_input_user= user_sim_table[input_user].sort_values(ascending=True).head(similar_user_count).index
    #Convert in to list
    similar_user_input=list(similar_input_user) 
    #Using similar_user_input,can select movie id from ratings table
    similar_user_movieid_list=[]
    for sim_user in similar_user_input:
        sim=list(ratings[ratings['user_id']==sim_user]['movie_id'])
        similar_user_movieid_list.append(sim)
    #Converting as a whole list
    import itertools
    similar_user_movieid_single_list=list(itertools.chain.from_iterable(similar_user_movieid_list))
    #Unique movieid from the list
    Unique_movieid_similar_user=set(similar_user_movieid_single_list)
    #Input user watched movie_list
    input_user_watched_movieid=list(ratings[ratings['user_id']==input_user]['movie_id'].values)
    #Create a list which should have recom movieid to the input user
    recom=[]
    for per_id in Unique_movieid_similar_user:
        if(per_id in input_user_watched_movieid):
            pass
        else:
            recom.append(per_id)
    #From recommendation list selecting only hightest rated(predicted) value
    highest_Rated=[]
    user_pred=pd.DataFrame(user_prediction)
    user_pred_Trans=user_pred.T
    input_user_pre=pd.DataFrame(user_pred_Trans[input_user])
    input_user_pred=input_user_pre.T
    for re in recom:
        value=input_user_pred[re].values
        if(value>=thres):
            highest_Rated.append(re)
    i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
    'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
    'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
    items = pd.read_csv('ml-100k/u.item', sep='|', names=i_cols,encoding='latin-1')
    #Creating Movie List based on recom movieid
    movie_title=[]
    for movieid in highest_Rated:
        mov=items[items['movie id']==movieid]['movie title'].values
        movie_title.append(mov)
    #Converting into pure list
    movie_title_list=[]
    for m in movie_title:
        print(m)
        mv=list(m)
        movie_title_list.append(mv)
    #Converting into whole list
    import itertools
    Final_Recommend_movie=list(itertools.chain.from_iterable(movie_title_list))
    print("The common Movie in Recom & User:",list(set(recom)&set(input_user_watched_movieid)))
    return Final_Recommend_movie
    

In [63]:
#def userbased(input_user,user_similarity,user_predictions,similar_user_count,similar_user_movieid_count,thres):
Recommended_movie=userbased(67,user_similarity,user_pred,5,1.5)

['Professional, The (1994)']
['Dances with Wolves (1990)']
['Snow White and the Seven Dwarfs (1937)']
['Princess Bride, The (1987)']
['Apocalypse Now (1979)']
['Men in Black (1997)']
["Marvin's Room (1996)"]
The common Movie in Recom & User: []


In [64]:
len(Recommended_movie)

7