# Movie Recommendation Project

In [2]:
import pandas as pd
import numpy as np
import turicreate

In [3]:
rating_df = pd.read_csv('../Data/train-PDA2019.csv')
test_df = pd.read_csv('../Data/test-PDA2019.csv')
movie_info_df = pd.read_csv('../Data/content-PDA2019.csv')

In [4]:
rating_df

Unnamed: 0,userID,itemID,rating,timeStamp
0,5,648,5,978297876
1,5,1394,5,978298237
2,5,3534,5,978297149
3,5,104,4,978298558
4,5,2735,5,978297919
...,...,...,...,...
470706,12073,3334,3,956718837
470707,12073,1089,3,956718831
470708,12073,3204,1,956718940
470709,12073,1949,4,956709397


In [5]:
train_data = turicreate.SFrame(rating_df)
test_data = turicreate.SFrame(test_df)

In [6]:
popularity_model = turicreate.popularity_recommender.create(train_data,
                                                            user_id='userID',
                                                            item_id='itemID',
                                                            target='rating')

In [7]:
popularity_recomm = popularity_model.recommend(users=[1,2,3,4,5], k=10)
popularity_recomm.print_rows(num_rows=25)

+--------+--------+-------------------+------+
| userID | itemID |       score       | rank |
+--------+--------+-------------------+------+
|   1    |  3172  |        5.0        |  1   |
|   1    |  2931  | 4.666666666666667 |  2   |
|   1    |  3245  | 4.666666666666667 |  3   |
|   1    |  318   | 4.563188593648737 |  4   |
|   1    |  858   | 4.555626598465473 |  5   |
|   1    |  3338  | 4.526315789473684 |  6   |
|   1    |  1148  | 4.500777604976672 |  7   |
|   1    |  729   |        4.5        |  8   |
|   1    |  3216  |        4.5        |  9   |
|   1    |  2358  |        4.5        |  10  |
|   2    |  3172  |        5.0        |  1   |
|   2    |  2931  | 4.666666666666667 |  2   |
|   2    |  3245  | 4.666666666666667 |  3   |
|   2    |  318   | 4.563188593648737 |  4   |
|   2    |  858   | 4.555626598465473 |  5   |
|   2    |  3338  | 4.526315789473684 |  6   |
|   2    |  1148  | 4.500777604976672 |  7   |
|   2    |  729   |        4.5        |  8   |
|   2    |  3

In [8]:
item_sim_model = turicreate.item_similarity_recommender.create(train_data,
                                                              user_id='userID',
                                                              item_id='itemID',
                                                              target = 'rating',
                                                              similarity_type='cosine',
                                                              training_method = 'sparse',
                                                              threshold = 0.1)

In [53]:
item_sim_recomm = item_sim_model.recommend(users=test_data['userID'], k=10)
item_sim_recomm.print_rows(num_rows=10)

+--------+--------+--------------------+------+
| userID | itemID |       score        | rank |
+--------+--------+--------------------+------+
|   1    |  1097  | 1.5480595827102661 |  1   |
|   1    |  260   | 1.5404484272003174 |  2   |
|   1    |  318   | 1.379409670829773  |  3   |
|   1    |  593   | 1.306387980779012  |  4   |
|   1    |  1270  | 1.301592747370402  |  5   |
|   1    |  1196  | 1.2825698852539062 |  6   |
|   1    |  1198  | 1.268221656481425  |  7   |
|   1    |  2716  | 1.2248622179031372 |  8   |
|   1    |  1197  | 1.2190967003504436 |  9   |
|   1    |  2987  | 1.1923166910807292 |  10  |
+--------+--------+--------------------+------+
[19920 rows x 4 columns]



In [54]:
result_sf = item_sim_recomm['userID','itemID']

In [55]:
result_df = result_sf.to_dataframe()

In [56]:
result_df.head(10)

Unnamed: 0,userID,itemID
0,1,1097
1,1,260
2,1,318
3,1,593
4,1,1270
5,1,1196
6,1,1198
7,1,2716
8,1,1197
9,1,2987


In [57]:
result_group = result_df.groupby('userID')['itemID'].apply(list).reset_index(name='recommended_itemIDs')

In [58]:
result_group

Unnamed: 0,userID,recommended_itemIDs
0,1,"[1097, 260, 318, 593, 1270, 1196, 1198, 2716, ..."
1,3,"[1270, 318, 608, 593, 1097, 858, 1198, 1580, 2..."
2,11,"[2470, 592, 2080, 2100, 914, 3448, 1035, 2405,..."
3,29,"[593, 2028, 377, 457, 1196, 1393, 608, 589, 16..."
4,31,"[2355, 2987, 2762, 2959, 2858, 3175, 2699, 239..."
...,...,...
1987,12047,"[377, 1196, 1240, 1198, 780, 589, 1200, 592, 2..."
1988,12051,"[1240, 1200, 1196, 260, 2916, 1580, 1097, 3527..."
1989,12061,"[1270, 2797, 1580, 1196, 1198, 1291, 1240, 589..."
1990,12063,"[919, 1136, 1394, 1270, 1079, 2797, 1278, 3363..."


In [59]:
result_group['recommended_itemIDs'][0]

[1097, 260, 318, 593, 1270, 1196, 1198, 2716, 1197, 2987]

In [60]:
for elem in result_group['recommended_itemIDs']:
    elem = "".join(str(elem))[1:-1].replace(',',' ')
    print(elem)

1097  260  318  593  1270  1196  1198  2716  1197  2987
1270  318  608  593  1097  858  1198  1580  260  919
2470  592  2080  2100  914  3448  1035  2405  2087  1022
593  2028  377  457  1196  1393  608  589  1617  2762
2355  2987  2762  2959  2858  3175  2699  2396  2716  2706
2916  1200  260  1214  1240  589  541  3527  1580  1356
1196  260  592  1198  1097  1270  1240  1200  1580  2916
1307  3448  2100  2797  1968  3253  1270  1393  3526  1240
919  1617  858  608  593  1136  1270  1196  1198  924
1617  260  1136  1270  1240  1307  592  2797  2716  1097
1240  1291  592  1196  260  1200  780  589  2640  1214
1270  2797  2716  1968  1197  1307  608  1097  1291  318
1307  2396  1097  1270  1197  919  1094  608  1674  318
3471  919  924  913  260  750  2366  858  3035  2640
2858  1784  2762  2599  608  1617  318  2716  593  1197
1270  2716  2797  539  3448  1968  1307  1079  1580  1777
1347  1969  1976  1982  1975  1327  1994  1321  2455  1983
1196  1270  260  1214  1580  1240  2916  924

1270  1197  2716  608  1968  260  1136  1580  2791  1198
919  923  912  1247  608  858  908  3504  1617  1252
1291  1270  260  1580  1196  592  1198  1240  2797  608
1270  1394  1097  1079  3448  1196  1968  1197  2797  2716
608  1136  3481  2997  2858  1079  1270  2791  1197  1307
260  1374  1240  3527  1200  1375  2105  1270  1376  1356
1196  2028  1617  589  260  2916  1240  318  1291  2762
3481  2858  2997  3751  1617  3893  3897  2762  3623  3911
1270  1196  1136  1097  260  1214  592  2716  1197  2194
2858  318  593  2762  608  3176  2997  1358  1393  2028
1240  1270  1214  589  1198  1200  1097  2916  3527  592
2858  1617  2396  2762  3176  2599  608  318  593  2959
260  1198  608  593  1196  1270  1097  1136  919  858
608  1198  592  457  858  2916  593  260  1200  377
919  608  1247  1394  1307  858  1198  260  2797  593
1270  1079  1196  1394  1136  1097  1198  1197  3448  1307
1240  260  2916  589  2640  1291  1580  1198  592  1200
3481  2997  3253  1580  3255  1517  1732  1

In [61]:
i = 0
for elem in result_group['recommended_itemIDs']:
    result_group['recommended_itemIDs'][i] = str("".join(str(elem))[1:-1].replace(',',' '))
    i += 1
print(result_group)

      userID                                recommended_itemIDs
0          1  1097  260  318  593  1270  1196  1198  2716  1...
1          3  1270  318  608  593  1097  858  1198  1580  26...
2         11  2470  592  2080  2100  914  3448  1035  2405  ...
3         29  593  2028  377  457  1196  1393  608  589  161...
4         31  2355  2987  2762  2959  2858  3175  2699  2396...
...      ...                                                ...
1987   12047  377  1196  1240  1198  780  589  1200  592  27...
1988   12051  1240  1200  1196  260  2916  1580  1097  3527 ...
1989   12061  1270  2797  1580  1196  1198  1291  1240  589 ...
1990   12063  919  1136  1394  1270  1079  2797  1278  3363 ...
1991   12073  608  923  1247  1252  750  1219  111  858  260...

[1992 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [62]:
result_group

Unnamed: 0,userID,recommended_itemIDs
0,1,1097 260 318 593 1270 1196 1198 2716 1...
1,3,1270 318 608 593 1097 858 1198 1580 26...
2,11,2470 592 2080 2100 914 3448 1035 2405 ...
3,29,593 2028 377 457 1196 1393 608 589 161...
4,31,2355 2987 2762 2959 2858 3175 2699 2396...
...,...,...
1987,12047,377 1196 1240 1198 780 589 1200 592 27...
1988,12051,1240 1200 1196 260 2916 1580 1097 3527 ...
1989,12061,1270 2797 1580 1196 1198 1291 1240 589 ...
1990,12063,919 1136 1394 1270 1079 2797 1278 3363 ...


In [63]:
result_group.to_csv('newSubmition2.csv',sep=',', index=False)