#### Library Import

In [2]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

#### Data Import 

In [3]:
ratings = pd.read_csv('./datasets/ratings_small.csv')

ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [4]:
movies = pd.read_csv('./datasets/movies_small.csv')

movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
# ratings 와 movies merge

df = pd.merge(ratings, movies , on='movieId', how='inner')
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [11]:
matrix = df.pivot_table(index='userId', columns='title', values='rating').fillna(0)
matrix.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
matrix.describe()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
count,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,...,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0,610.0
mean,0.006557,0.006557,0.011475,0.008197,0.013115,0.002459,0.088525,0.004918,0.252459,0.037705,...,0.002459,0.053279,0.018033,0.009836,0.004918,0.139344,0.109016,0.016393,0.133607,0.001639
std,0.161955,0.161955,0.200245,0.202444,0.235917,0.060733,0.560348,0.121466,0.968825,0.353571,...,0.060733,0.470856,0.26042,0.171638,0.121466,0.736293,0.570916,0.184969,0.661963,0.040489
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,4.0,4.0,3.5,5.0,5.0,1.5,5.0,3.0,5.0,4.0,...,1.5,5.0,4.5,3.0,3.0,5.0,4.0,2.5,5.0,1.0


In [45]:
movie_user_rating = matrix.values.T
movie_user_rating

array([[0. , 0. , 0. , ..., 0. , 0. , 4. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 1.5],
       [4. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ]])

In [46]:
movie_user_rating.shape

(9719, 610)

#### sklearn.decomposition.TruncatedSVD
* class sklearn.decomposition.TruncatedSVD(n_components=2, *, algorithm='randomized', n_iter=5, n_oversamples=10, power_iteration_normalizer='auto', random_state=None, tol=0.0)

In [47]:
SVD = TruncatedSVD(n_components=12, n_iter=7, random_state=42)
svd_matrix = SVD.fit_transform(movie_user_rating)
svd_matrix.shape

(9719, 12)

In [48]:
item_similarity = np.corrcoef(svd_matrix)

In [49]:
item_similarity.shape

(9719, 9719)

In [50]:
sns.heatmap(item_similarity)

MemoryError: Unable to allocate 2.82 GiB for an array with shape (94458961, 4) and data type float64

MemoryError: Unable to allocate 721. MiB for an array with shape (94458961,) and data type float64

<Figure size 640x480 with 2 Axes>

In [51]:
movie_title = matrix.columns
movie_title_list = list(movie_title)

In [52]:
coffey_hands = movie_title_list.index("Guardians of the Galaxy (2014)")
coffey_hands

3667

In [53]:
corr_coffey_hands = item_similarity[coffey_hands]


list(movie_title[corr_coffey_hands >=0.9])[:10]

['Adjustment Bureau, The (2011)',
 'Amazing Spider-Man, The (2012)',
 'Ant-Man (2015)',
 'Avatar (2009)',
 'Avengers, The (2012)',
 'Avengers: Age of Ultron (2015)',
 'Big Hero 6 (2014)',
 'Brave (2012)',
 'Captain America: Civil War (2016)',
 'Captain America: The First Avenger (2011)']

In [44]:
corr_coffey_hands > 0.9

array([False, False, False, ...,  True,  True,  True])

#### 특정 사용자에게 추천해주기

In [54]:
matrix

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.5,3.5,0.0,0.0,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
matrix_1 = matrix.to_numpy()

In [58]:
matrix_1_mean = np.mean(matrix_1 , axis=1)

In [60]:
matrix_norm = matrix_1 - matrix_1_mean.reshape(-1,1)

In [61]:
matrix_norm

array([[-0.10422883, -0.10422883, -0.10422883, ..., -0.10422883,
         3.89577117, -0.10422883],
       [-0.01178105, -0.01178105, -0.01178105, ..., -0.01178105,
        -0.01178105, -0.01178105],
       [-0.00977467, -0.00977467, -0.00977467, ..., -0.00977467,
        -0.00977467, -0.00977467],
       ...,
       [-0.26798024, -0.26798024, -0.26798024, ..., -0.26798024,
        -0.26798024, -0.26798024],
       [-0.01244984, -0.01244984, -0.01244984, ..., -0.01244984,
        -0.01244984, -0.01244984],
       [ 3.5058648 , -0.4941352 , -0.4941352 , ...,  1.0058648 ,
        -0.4941352 , -0.4941352 ]])

In [62]:
matrix_norm.shape

(610, 9719)

In [63]:
pd.DataFrame(matrix_norm , columns=matrix.columns).head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
0,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,...,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,-0.104229,3.895771,-0.104229
1,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,...,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781,-0.011781
2,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,...,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775,-0.009775
3,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,...,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902,-0.07902
4,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,...,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463,-0.016463


In [64]:
U , sigma, Vt = svds(matrix_norm, k =12)

In [65]:
U.shape

(610, 12)

In [66]:
sigma.shape

(12,)

In [67]:
Vt.shape

(12, 9719)

In [69]:
sigma = np.diag(sigma)

In [72]:
svd_user_predict_rating = np.dot(np.dot(U, sigma), Vt) + matrix_1_mean.reshape(-1,1)

In [74]:
df_svd_user_predict_rating = pd.DataFrame(svd_user_predict_rating , columns=matrix.columns)

In [75]:
df_svd_user_predict_rating.shape

(610, 9719)

In [76]:
df_svd_user_predict_rating

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
0,-0.069158,0.012687,0.024235,0.005430,-0.001508,-0.000583,0.748544,-0.089214,-0.319342,0.312102,...,0.031285,0.065909,-0.025746,-0.021112,-0.002118,0.400769,-0.146417,-0.039785,0.908187,0.013443
1,-0.000801,0.007793,0.007109,0.001960,0.000735,0.003828,0.027385,-0.009824,0.252211,0.038861,...,0.021619,-0.064877,-0.026271,-0.034558,0.010922,-0.065936,-0.074423,-0.007743,0.105182,0.002614
2,0.011917,0.010399,0.011050,0.010187,0.009527,0.010003,0.046613,0.000021,-0.031331,0.013926,...,0.006728,0.030970,0.020342,0.021190,0.010258,0.023288,0.002176,0.011007,0.024866,0.010835
3,-0.039344,-0.007341,-0.000581,-0.003111,0.020031,-0.015621,0.100013,-0.027061,0.308163,0.025091,...,-0.025476,-0.091988,-0.075162,-0.062375,-0.016141,0.269965,-0.172481,-0.051313,0.221385,-0.008455
4,0.004148,-0.003865,-0.003467,-0.003539,0.002042,-0.006096,-0.055977,0.006741,0.053579,-0.020054,...,-0.015477,0.002786,0.004457,0.000332,-0.003253,-0.088469,-0.053899,0.001716,-0.061439,-0.003275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,-0.117764,0.121284,0.090429,0.063393,0.130046,0.116174,-0.832561,0.337311,1.489700,-0.160237,...,0.011835,-0.357654,-0.246455,-0.136559,0.041526,2.923684,0.801226,-0.388038,-0.604918,0.021907
606,-0.003030,0.028473,0.031327,0.023096,0.026716,0.021293,0.265882,-0.023028,-0.334411,-0.054914,...,-0.003565,0.133447,0.064010,0.052350,0.011132,0.157651,0.206955,0.070172,0.307729,0.027622
607,0.060937,0.013712,0.003544,-0.017817,-0.046183,-0.026144,0.476469,-0.067136,0.069544,0.313355,...,-0.001111,0.620591,0.192911,0.112722,-0.061207,1.555006,1.919196,0.264351,0.711569,-0.020953
608,0.011031,0.007713,0.007291,0.005044,0.005528,0.004657,-0.024085,0.005901,-0.042899,0.008317,...,0.006980,0.001869,0.011508,0.003964,0.006798,-0.043521,-0.032859,0.003197,-0.008222,0.005633
