In [1]:
from surprise import SVD
from surprise import Dataset
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import Reader
from surprise.dataset import DatasetAutoFolds
import pandas as pd

In [2]:
df = pd.read_csv('data/ratings.csv')
df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [3]:
df.to_csv('data/ratings_nohead.csv',index=False,header=None)

In [4]:
df['rating'].unique()

array([4. , 5. , 3. , 2. , 1. , 4.5, 3.5, 2.5, 0.5, 1.5])

In [5]:
reader = Reader(line_format='user item rating timestamp',sep=',',rating_scale=(0.5,5))
data_folds = DatasetAutoFolds(ratings_file='data/ratings_nohead.csv',reader=reader)
model = SVD(n_factors=50 , random_state=3333)
train_data = data_folds.build_full_trainset()
model.fit(train_data)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1f59e08e860>

In [31]:
model.predict('1','50')

Prediction(uid='1', iid='50', r_ui=None, est=5, details={'was_impossible': False})

In [9]:
movies = pd.read_csv('data/movies.csv')
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [13]:
total_movies = movies['movieId'].tolist()
seen_movies = df[df['userId']==1]['movieId'].tolist()

In [14]:
print(total_movies)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 57, 58, 60, 61, 62, 63, 64, 65, 66, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 85, 86, 87, 88, 89, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 116, 117, 118, 119, 121, 122, 123, 125, 126, 128, 129, 132, 135, 137, 140, 141, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 183, 184, 185, 186, 187, 188, 189, 190, 191, 193, 194, 195, 196, 198, 199, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 222, 223, 224, 225, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 246, 247, 248, 249, 250, 251, 252, 253, 254, 2

In [15]:
print(seen_movies)

[1, 3, 6, 47, 50, 70, 101, 110, 151, 157, 163, 216, 223, 231, 235, 260, 296, 316, 333, 349, 356, 362, 367, 423, 441, 457, 480, 500, 527, 543, 552, 553, 590, 592, 593, 596, 608, 648, 661, 673, 733, 736, 780, 804, 919, 923, 940, 943, 954, 1009, 1023, 1024, 1025, 1029, 1030, 1031, 1032, 1042, 1049, 1060, 1073, 1080, 1089, 1090, 1092, 1097, 1127, 1136, 1196, 1197, 1198, 1206, 1208, 1210, 1213, 1214, 1219, 1220, 1222, 1224, 1226, 1240, 1256, 1258, 1265, 1270, 1275, 1278, 1282, 1291, 1298, 1348, 1377, 1396, 1408, 1445, 1473, 1500, 1517, 1552, 1573, 1580, 1587, 1617, 1620, 1625, 1644, 1676, 1732, 1777, 1793, 1804, 1805, 1920, 1927, 1954, 1967, 2000, 2005, 2012, 2018, 2028, 2033, 2046, 2048, 2054, 2058, 2078, 2090, 2093, 2094, 2096, 2099, 2105, 2115, 2116, 2137, 2139, 2141, 2143, 2161, 2174, 2193, 2253, 2268, 2273, 2291, 2329, 2338, 2353, 2366, 2387, 2389, 2395, 2406, 2414, 2427, 2450, 2459, 2470, 2478, 2492, 2502, 2528, 2529, 2542, 2571, 2580, 2596, 2616, 2617, 2628, 2640, 2641, 2644, 2648, 2

In [16]:
unseen_movies = set(total_movies) - set(seen_movies)
unseen_movies

{2,
 4,
 5,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 32799,
 34,
 131098,
 36,
 131104,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 48,
 49,
 65585,
 52,
 53,
 54,
 55,
 65588,
 57,
 58,
 98361,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 65596,
 68,
 69,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 85,
 86,
 87,
 88,
 89,
 163925,
 92,
 93,
 94,
 95,
 96,
 97,
 32862,
 99,
 100,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 32875,
 111,
 112,
 113,
 65642,
 65651,
 116,
 117,
 118,
 119,
 121,
 122,
 123,
 32892,
 125,
 126,
 128,
 129,
 32898,
 132,
 135,
 137,
 32906,
 140,
 141,
 163981,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 152,
 153,
 154,
 155,
 156,
 158,
 159,
 160,
 161,
 162,
 164,
 165,
 166,
 131237,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 183,
 184,
 185,
 186,
 187,
 188,
 189,
 190,
 191,
 98491,
 193,
 194,
 195,
 196,
 98

In [25]:
rating_result = []

for mid in unseen_movies:
    result = model.predict('42',str(mid))
    data = {}
    data['id'] = result.iid
    data['rating'] = result.est
    data['title'] = movies[movies['movieId']==int(result.iid)]['title']
    rating_result.append(data)
    print(f'mid : {mid}  , score : {result.est}')

rating_result

mid : 2  , score : 3.6084324202111953
mid : 4  , score : 3.2324175221355413
mid : 5  , score : 3.3468814904515396
mid : 7  , score : 3.26710710379682
mid : 8  , score : 3.0046966144384375
mid : 9  , score : 3.1796259205354
mid : 10  , score : 3.9072350910970908
mid : 11  , score : 4.067417953290122
mid : 12  , score : 3.0941418235925418
mid : 13  , score : 3.628373000523643
mid : 14  , score : 3.817045303097175
mid : 15  , score : 3.217298120312344
mid : 16  , score : 4.497192647743204
mid : 17  , score : 3.653962388213
mid : 18  , score : 3.751039610770619
mid : 19  , score : 2.9262710548690314
mid : 20  , score : 3.151625562508841
mid : 21  , score : 3.6340752687134303
mid : 22  , score : 3.7361824930088408
mid : 23  , score : 3.5013772944208315
mid : 24  , score : 3.4213508146367904
mid : 25  , score : 3.7035363628129985
mid : 26  , score : 3.6918996959761485
mid : 27  , score : 3.139656389603318
mid : 28  , score : 4.002563598352709
mid : 29  , score : 4.05771596991143
mid : 30  , 

[{'id': '2',
  'rating': 3.6084324202111953,
  'title': 1    Jumanji (1995)
  Name: title, dtype: object},
 {'id': '4',
  'rating': 3.2324175221355413,
  'title': 3    Waiting to Exhale (1995)
  Name: title, dtype: object},
 {'id': '5',
  'rating': 3.3468814904515396,
  'title': 4    Father of the Bride Part II (1995)
  Name: title, dtype: object},
 {'id': '7',
  'rating': 3.26710710379682,
  'title': 6    Sabrina (1995)
  Name: title, dtype: object},
 {'id': '8',
  'rating': 3.0046966144384375,
  'title': 7    Tom and Huck (1995)
  Name: title, dtype: object},
 {'id': '9',
  'rating': 3.1796259205354,
  'title': 8    Sudden Death (1995)
  Name: title, dtype: object},
 {'id': '10',
  'rating': 3.9072350910970908,
  'title': 9    GoldenEye (1995)
  Name: title, dtype: object},
 {'id': '11',
  'rating': 4.067417953290122,
  'title': 10    American President, The (1995)
  Name: title, dtype: object},
 {'id': '12',
  'rating': 3.0941418235925418,
  'title': 11    Dracula: Dead and Loving I

In [26]:
my_ratings_df = pd.DataFrame(rating_result)
my_ratings_df

Unnamed: 0,id,rating,title
0,2,3.608432,"1 Jumanji (1995) Name: title, dtype: object"
1,4,3.232418,"3 Waiting to Exhale (1995) Name: title, dty..."
2,5,3.346881,4 Father of the Bride Part II (1995) Name: ...
3,7,3.267107,"6 Sabrina (1995) Name: title, dtype: object"
4,8,3.004697,"7 Tom and Huck (1995) Name: title, dtype: o..."
...,...,...,...
9505,163809,3.519365,9387 Over the Garden Wall (2013) Name: titl...
9506,32743,3.429452,"5862 Ringu 0: Bâsudei (2000) Name: title, d..."
9507,98279,4.131796,"8040 Fantastic Fear of Everything, A (2012)..."
9508,65514,3.752917,"6947 Ip Man (2008) Name: title, dtype: object"


In [27]:
recomm_result = my_ratings_df.sort_values('rating',ascending=False)
recomm_result

Unnamed: 0,id,rating,title
7808,54503,4.697979,"6537 Superbad (2007) Name: title, dtype: ob..."
1007,1276,4.677981,"975 Cool Hand Luke (1967) Name: title, dtyp..."
744,912,4.664093,"694 Casablanca (1942) Name: title, dtype: o..."
637,778,4.661097,"613 Trainspotting (1996) Name: title, dtype..."
926,1172,4.651240,878 Cinema Paradiso (Nuovo cinema Paradiso)...
...,...,...,...
2101,2701,2.286143,"2029 Wild Wild West (1999) Name: title, dty..."
278,312,2.284588,271 Stuart Saves His Family (1995) Name: ti...
2803,3593,2.268111,"2683 Battlefield Earth (2000) Name: title, ..."
6028,8666,2.112105,"5270 Catwoman (2004) Name: title, dtype: ob..."
