<a href="https://colab.research.google.com/github/epsilon-deltta/movie-recommendation/blob/master/item_based_Collaborative_filtering.ipynb" target="_parent">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# if you're in COLAB? get files from this commend
#import os
#os.mkdir('data')
#!curl raw.githubusercontent.com/epsilon-deltta/movie-recommendation/master/data/movies.csv -o ./data/movies.csv
#!curl raw.githubusercontent.com/epsilon-deltta/movie-recommendation/master/data/ratings.csv -o ./data/ratings.csv

In [1]:
import pandas as pd
import numpy as np
movies = pd.read_csv('./data/movies.csv')
ratings = pd.read_csv('./data/ratings.csv')

### user-movie : rating matrix

In [2]:
# 필요한 컬럼만 추출
ratings = ratings[['userId', 'movieId', 'rating']]
# pivot_table 메소드를 사용해서 행렬 변환
ratings_matrix = ratings.pivot_table('rating', index='userId', columns='movieId')

In [4]:
# title 컬럼을 얻기 이해 movies 와 조인 수행
rating_movies = pd.merge(ratings, movies, on='movieId')
# columns='title' 로 title 컬럼으로 pivot 수행. 
ratings_matrix = rating_movies.pivot_table('rating', index='userId', columns='title')
# NaN 값을 모두 0 으로 변환
ratings_matrix = ratings_matrix.fillna(0)

### movie-movie : similarity matrix (based on items)

In [6]:
# 아이템-사용자 행렬로 transpose 한다.
ratings_matrix_T = ratings_matrix.transpose()

# 영화와 영화들 간 코사인 유사도 산출
from sklearn.metrics.pairwise import cosine_similarity

item_sim = cosine_similarity(ratings_matrix_T, ratings_matrix_T)

# cosine_similarity() 로 반환된 넘파이 행렬을 영화명을 매핑하여 DataFrame으로 변환
item_sim_df = pd.DataFrame(data=item_sim, index=ratings_matrix.columns,
                          columns=ratings_matrix.columns)

In [7]:
# 자기 것 빼고 인셉션과 유사한 영화 5개 확인해보기
item_sim_df["Inception (2010)"].sort_values(ascending=False)[1:6]

title
Dark Knight, The (2008)          0.727263
Inglourious Basterds (2009)      0.646103
Shutter Island (2010)            0.617736
Dark Knight Rises, The (2012)    0.617504
Fight Club (1999)                0.615417
Name: Inception (2010), dtype: float64

### item-movie : predicted rating (not included real rating) matrix 

In [10]:
# 평점 벡터(행 벡터)와 유사도 벡터(열 벡터)를 내적(dot)해서 예측 평점을 계산하는 함수 정의
def predict_rating(ratings_arr, item_sim_arr):
    ratings_pred = ratings_arr.dot(item_sim_arr)/ np.array([np.abs(item_sim_arr).sum(axis=1)])
    return ratings_pred

ratings_pred = predict_rating(ratings_matrix.values , item_sim_df.values)

# 데이터프레임으로 변환
ratings_pred_matrix = pd.DataFrame(data=ratings_pred, index= ratings_matrix.index,
                                   columns = ratings_matrix.columns)

In [32]:
ratings_pred_matrix

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...All the Marbles (1981),...And Justice for All (1979),00 Schneider - Jagd auf Nihil Baxter (1994),1-900 (06) (1994),10 (1979),10 Cent Pistol (2015),10 Cloverfield Lane (2016),10 Items or Less (2006),10 Things I Hate About You (1999),10 Years (2011),"10,000 BC (2008)",100 Girls (2000),100 Streets (2016),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),101 Dalmatians II: Patch's London Adventure (2003),101 Reykjavik (101 Reykjavík) (2000),102 Dalmatians (2000),10th & Wolf (2006),"10th Kingdom, The (2000)","10th Victim, The (La decima vittima) (1965)","11'09""01 - September 11 (2002)",11:14 (2003),"11th Hour, The (2007)",12 Angry Men (1957),12 Angry Men (1997),12 Chairs (1971),12 Chairs (1976),12 Rounds (2009),12 Years a Slave (2013),127 Hours (2010),13 Assassins (Jûsan-nin no shikaku) (2010),13 Ghosts (1960),13 Going on 30 (2004),13 Hours (2016),13 Sins (2014),13 Tzameti (2005),13th (2016),"13th Warrior, The (1999)",1408 (2007),1492: Conquest of Paradise (1992),15 Minutes (2001),16 Blocks (2006),17 Again (2009),1776 (1972),18 Again! (1988),187 (One Eight Seven) (1997),1900 (Novecento) (1976),1941 (1979),1969 (1988),1984 (Nineteen Eighty-Four) (1984),2 Days in New York (2012),2 Days in Paris (2007),2 Days in the Valley (1996),"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)",2 Guns (2013),20 Dates (1998),20 Feet from Stardom (Twenty Feet from Stardom) (2013),20 Million Miles to Earth (1957),"20,000 Days on Earth (2014)","20,000 Leagues Under the Sea (1916)","20,000 Leagues Under the Sea (1954)",200 Cigarettes (1999),2001 Maniacs (2005),2001: A Space Odyssey (1968),2010: The Year We Make Contact (1984),2012 (2009),2046 (2004),2048: Nowhere to Run (2017),21 (2008),21 Grams (2003),21 Jump Street (2012),21 Up (1977),21 and Over (2013),22 Jump Street (2014),24 Hour Party People (2002),25th Hour (2002),27 Dresses (2008),28 Days (2000),28 Days Later (2002),28 Up (1985),28 Weeks Later (2007),3 Days to Kill (2014),3 Extremes (Three... Extremes) (Saam gaang yi) (2004),3 Idiots (2009),3 Ninjas (1992),3 Ninjas Kick Back (1994),3 Ninjas Knuckle Up (1995),3 Ninjas: High Noon On Mega Mountain (1998),3 Women (Three Women) (1977),...,X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),Yankee Doodle Dandy (1942),"Yards, The (2000)",Year One (2009),"Year of Living Dangerously, The (1982)",Year of the Dragon (1985),Year of the Horse (1997),"Yearling, The (1946)","Yellow Sea, The (a.k.a. The Murderer) (Hwanghae) (2010)",Yellow Submarine (1968),Yentl (1983),Yes Man (2008),"Yes Men Fix the World, The (2009)","Yes Men, The (2003)","Yesterday, Today and Tomorrow (Ieri, oggi, domani) (1963)",Yi Yi (2000),Yogi Bear (2010),Yojimbo (1961),Yongary: Monster from the Deep (1967),You Again (2010),You Are the Apple of My Eye (2011),You Can Count on Me (2000),You Can't Take It with You (1938),You Don't Mess with the Zohan (2008),You Got Served (2004),You Only Live Once (1937),You Only Live Twice (1967),You Will Meet a Tall Dark Stranger (2010),You'll Never Get Rich (1941),You're Next (2011),You've Got Mail (1998),"You, Me and Dupree (2006)",Young Adult (2011),Young Doctors in Love (1982),Young Einstein (1988),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),Young People Fucking (a.k.a. YPF) (2007),"Young Poisoner's Handbook, The (1995)",Young Sherlock Holmes (1985),"Young Victoria, The (2009)","Young and the Damned, The (Olvidados, Los) (1950)",Youngblood (1986),Your Friends and Neighbors (1998),Your Highness (2011),Your Name. (2016),Your Sister's Sister (2011),"Yours, Mine and Ours (1968)","Yours, Mine and Ours (2005)",Youth (2015),Youth Without Youth (2007),Youth in Revolt (2009),Youth of the Beast (Yaju no seishun) (1963),Z (1969),Zabriskie Point (1970),Zack and Miri Make a Porno (2008),Zapped! (1982),Zathura (2005),Zatoichi and the Chest of Gold (Zatôichi senryô-kubi) (Zatôichi 6) (1964),Zazie dans le métro (1960),Zebraman (2004),"Zed & Two Noughts, A (1985)",Zeitgeist: Addendum (2008),Zeitgeist: Moving Forward (2011),Zeitgeist: The Movie (2007),Zelary (2003),Zelig (1983),Zero Dark Thirty (2012),Zero Effect (1998),"Zero Theorem, The (2013)",Zero de conduite (Zero for Conduct) (Zéro de conduite: Jeunes diables au collège) (1933),Zeus and Roxanne (1997),Zipper (2015),Zodiac (2007),Zombeavers (2014),Zombie (a.k.a. Zombie 2: The Dead Are Among Us) (Zombi 2) (1979),Zombie Strippers! (2008),Zombieland (2009),Zone 39 (1997),"Zone, The (La Zona) (2007)",Zookeeper (2011),Zoolander (2001),Zoolander 2 (2016),Zoom (2006),Zoom (2015),Zootopia (2016),Zulu (1964),Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1
1,0.070345,0.577855,0.321696,0.227055,0.206958,0.194615,0.249883,0.102542,0.157084,0.178197,0.119402,0.185026,0.269199,0.521031,0.141683,0.116623,0.135441,0.224885,0.226528,0.113608,0.185277,0.303638,0.113608,0.255040,0.260446,0.326968,0.305769,0.155031,0.348717,0.186870,0.119402,0.099756,0.206331,0.348717,0.267407,0.237128,0.050947,0.050947,0.200747,0.156893,0.125078,0.100407,0.333792,0.215463,0.106881,0.070345,0.050947,0.099394,0.285560,0.183271,0.112690,0.175026,0.165877,0.162368,0.472124,0.121196,0.165027,0.141475,0.142333,0.156421,0.282148,0.113608,0.298505,0.239259,0.176018,0.184231,1.083590,0.207461,0.119402,0.269199,0.272504,0.286952,0.334048,0.136689,0.255738,0.291535,0.150791,0.184206,0.112690,0.161327,0.186044,0.148961,0.170634,0.058736,0.118460,0.174618,0.169133,0.212279,0.298094,0.219370,0.170634,0.205592,0.071035,0.151191,0.059292,0.190253,0.356982,0.214605,0.392063,0.385782,...,0.180674,0.199359,0.234399,0.129098,0.184084,0.140739,0.193533,0.112690,0.198532,0.102542,0.112690,0.215927,0.102542,0.169190,0.171466,0.172604,0.099462,0.223486,0.118666,0.182973,0.113608,0.033424,0.177818,0.159331,0.206301,0.162802,0.141684,0.102542,0.206150,0.113608,0.102542,0.115453,0.234571,0.219538,0.150915,0.390283,0.125295,0.290504,0.246188,0.208563,0.156726,0.182054,0.333610,0.000000,0.207020,0.159945,0.197882,0.122468,0.055679,0.156421,0.415176,0.040466,0.143475,0.094357,0.112087,0.070345,0.132181,0.050947,0.155392,0.168583,0.186554,0.050947,0.040443,0.121184,0.178482,0.104488,0.104488,0.110808,0.102542,0.175859,0.179162,0.231606,0.093467,0.094357,0.112690,0.113608,0.164231,0.086360,0.277215,0.262709,0.180320,0.112690,0.111653,0.130131,0.248312,0.132009,0.285913,0.113608,0.155861,0.155927,0.113608,0.181738,0.133962,0.128574,0.006179,0.212070,0.192921,0.136024,0.292955,0.720347
2,0.018260,0.042744,0.018861,0.000000,0.000000,0.035995,0.013413,0.002314,0.032213,0.014863,0.000000,0.005220,0.093722,0.000000,0.014296,0.016398,0.043685,0.019004,0.020071,0.015640,0.028349,0.043477,0.015640,0.019634,0.016893,0.008251,0.010919,0.013711,0.000000,0.020300,0.000000,0.002726,0.022639,0.000000,0.032268,0.031130,0.040699,0.040699,0.024950,0.043495,0.039927,0.014959,0.005076,0.021766,0.027095,0.018260,0.040699,0.030828,0.016549,0.024193,0.011425,0.015421,0.018894,0.025580,0.012931,0.005894,0.012434,0.022181,0.013162,0.012655,0.012865,0.015640,0.023254,0.009488,0.025689,0.051196,0.011821,0.028587,0.000000,0.093722,0.010907,0.011337,0.006746,0.020167,0.020685,0.010202,0.030132,0.017790,0.011425,0.040218,0.017453,0.035721,0.010581,0.030541,0.035089,0.014684,0.014521,0.031878,0.019877,0.028872,0.010581,0.038753,0.058687,0.009776,0.041176,0.015385,0.024871,0.012024,0.000000,0.010271,...,0.036570,0.025070,0.025467,0.008000,0.012124,0.033330,0.005108,0.011425,0.004361,0.002314,0.011425,0.013179,0.002314,0.041617,0.021781,0.018116,0.013515,0.011266,0.018371,0.011045,0.015640,0.008925,0.022155,0.007959,0.003701,0.035772,0.017982,0.002314,0.014859,0.015640,0.002314,0.016312,0.017878,0.024894,0.039716,0.004272,0.012457,0.019223,0.014376,0.010825,0.013449,0.000000,0.010412,0.042304,0.006528,0.012465,0.004914,0.031938,0.042672,0.012655,0.030551,0.007835,0.034166,0.003525,0.022321,0.018260,0.008861,0.040699,0.029233,0.012524,0.021269,0.040699,0.030610,0.019721,0.002215,0.023352,0.023352,0.028403,0.002314,0.006791,0.033143,0.010933,0.018806,0.003525,0.011425,0.015640,0.030904,0.017290,0.019250,0.039449,0.038895,0.011425,0.035400,0.038101,0.034181,0.026764,0.000000,0.015640,0.037980,0.006859,0.015640,0.020855,0.020119,0.015745,0.049983,0.014876,0.021616,0.024528,0.017563,0.000000
3,0.011884,0.030279,0.064437,0.003762,0.003749,0.002722,0.014625,0.002085,0.005666,0.006272,0.091413,0.007483,0.018710,0.080626,0.006995,0.006766,0.006988,0.005427,0.006743,0.006923,0.005389,0.008943,0.006923,0.008559,0.009333,0.006316,0.031652,0.007376,0.009832,0.022056,0.091413,0.002548,0.008762,0.009832,0.008773,0.004379,0.001117,0.001117,0.007007,0.005163,0.006408,0.008007,0.060883,0.006845,0.008627,0.011884,0.001117,0.000471,0.013655,0.008546,0.005636,0.006163,0.008288,0.005876,0.035962,0.003268,0.006226,0.000684,0.008611,0.005500,0.016669,0.006923,0.020500,0.012294,0.008325,0.007143,0.041595,0.001334,0.091413,0.018710,0.077529,0.021332,0.017112,0.010414,0.012108,0.018136,0.009967,0.006167,0.005636,0.005169,0.005772,0.005779,0.004383,0.003476,0.006839,0.006179,0.006977,0.007489,0.013581,0.008970,0.004383,0.009032,0.000000,0.008694,0.001042,0.006238,0.004293,0.004442,0.000000,0.005754,...,0.007728,0.007331,0.008545,0.002538,0.007864,0.004793,0.007932,0.005636,0.001372,0.002085,0.005636,0.008539,0.002085,0.006532,0.002231,0.004299,0.003586,0.004686,0.008110,0.006649,0.006923,0.001029,0.023467,0.003837,0.010399,0.009077,0.004106,0.002085,0.011882,0.006923,0.002085,0.012148,0.009950,0.006938,0.001923,0.018954,0.007231,0.010931,0.010774,0.007658,0.005467,0.000000,0.013504,0.000000,0.014003,0.005418,0.006934,0.006645,0.000762,0.005500,0.026113,0.001378,0.004664,0.003444,0.005363,0.011884,0.002860,0.001117,0.006655,0.005279,0.008810,0.001117,0.000000,0.010758,0.007362,0.003726,0.003726,0.004479,0.002085,0.005546,0.006601,0.010054,0.008080,0.003444,0.005636,0.006923,0.007347,0.010048,0.009912,0.010571,0.007156,0.005636,0.003597,0.001240,0.008107,0.006664,0.006615,0.006923,0.006186,0.006225,0.006923,0.011665,0.011800,0.012225,0.000000,0.008194,0.007017,0.009229,0.010420,0.084501
4,0.049145,0.277628,0.160448,0.206892,0.309632,0.042337,0.130048,0.116442,0.099785,0.097432,0.067909,0.133207,0.129891,0.117370,0.105762,0.050937,0.072990,0.087561,0.160899,0.051269,0.104399,0.160360,0.051269,0.155419,0.144847,0.230765,0.193323,0.067295,0.109786,0.053613,0.067909,0.124971,0.086210,0.109786,0.219526,0.153906,0.059173,0.059173,0.067190,0.101450,0.081182,0.058553,0.187341,0.135379,0.056130,0.049145,0.059173,0.143536,0.135362,0.114137,0.069823,0.103078,0.091012,0.102567,0.280844,0.112704,0.093273,0.204569,0.088042,0.105645,0.182243,0.051269,0.253758,0.155948,0.087350,0.081362,0.279549,0.203643,0.067909,0.129891,0.122899,0.153040,0.255796,0.060948,0.167613,0.184651,0.079271,0.150061,0.069823,0.088958,0.146443,0.078957,0.130801,0.043481,0.059742,0.113990,0.114910,0.129951,0.148580,0.126940,0.130801,0.102078,0.043906,0.109318,0.067488,0.094764,0.175614,0.115739,0.114547,0.239236,...,0.095093,0.111507,0.128987,0.123834,0.126120,0.071547,0.189182,0.069823,0.128953,0.116442,0.069823,0.117357,0.116442,0.088014,0.105254,0.218806,0.103181,0.221135,0.054440,0.138515,0.051269,0.072239,0.047691,0.174823,0.173530,0.075531,0.066442,0.116442,0.109183,0.051269,0.116442,0.053013,0.153088,0.102819,0.067137,0.184911,0.073489,0.182937,0.140976,0.122174,0.106448,0.124537,0.153152,0.000000,0.154638,0.106571,0.310530,0.062526,0.051919,0.105645,0.232388,0.030471,0.111766,0.141501,0.062209,0.049145,0.138145,0.059173,0.081442,0.107755,0.065617,0.059173,0.031769,0.058190,0.306069,0.065454,0.065454,0.081623,0.116442,0.152796,0.090504,0.177071,0.054258,0.141501,0.069823,0.051269,0.102716,0.049932,0.110026,0.120224,0.095931,0.069823,0.067707,0.087358,0.141641,0.060310,0.118407,0.051269,0.084930,0.099847,0.051269,0.076051,0.055563,0.054137,0.008343,0.159242,0.100941,0.062253,0.146054,0.231187
5,0.007278,0.066951,0.041879,0.013880,0.024842,0.018240,0.026405,0.018673,0.021591,0.018841,0.022079,0.024403,0.042941,0.077186,0.015049,0.010008,0.014591,0.028366,0.032548,0.009689,0.024614,0.044854,0.009689,0.040566,0.034612,0.092801,0.048303,0.018548,0.000000,0.028510,0.022079,0.020275,0.022272,0.000000,0.041390,0.035876,0.006225,0.006225,0.023277,0.021220,0.016991,0.009083,0.052674,0.035033,0.010633,0.007278,0.006225,0.018858,0.025706,0.021284,0.009722,0.020253,0.018958,0.022099,0.065125,0.018019,0.016944,0.027372,0.014807,0.016786,0.032453,0.009689,0.067696,0.029184,0.020633,0.020316,0.061424,0.034701,0.022079,0.042941,0.049308,0.039075,0.044119,0.012960,0.034744,0.033477,0.018618,0.024353,0.009722,0.021377,0.029948,0.017665,0.025570,0.008132,0.013228,0.021639,0.021294,0.038083,0.035581,0.028219,0.025570,0.024180,0.004141,0.015460,0.012096,0.023522,0.047541,0.031256,0.085996,0.062773,...,0.022556,0.025308,0.031340,0.022067,0.022632,0.019800,0.030918,0.009722,0.059923,0.018673,0.009722,0.022123,0.018673,0.021297,0.033370,0.041860,0.017934,0.032465,0.010966,0.022953,0.009689,0.015220,0.025000,0.024446,0.027991,0.018479,0.026751,0.018673,0.025577,0.009689,0.018673,0.011229,0.036955,0.027648,0.015686,0.029999,0.010235,0.034775,0.030152,0.024120,0.016968,0.255148,0.028487,0.018443,0.013777,0.017554,0.038395,0.016017,0.006062,0.016786,0.052071,0.006155,0.020842,0.023381,0.012341,0.007278,0.021169,0.006225,0.017281,0.019811,0.022087,0.006225,0.006125,0.011632,0.033706,0.012572,0.012572,0.014366,0.018673,0.024122,0.020433,0.028461,0.009489,0.023381,0.009722,0.009689,0.020697,0.008171,0.031638,0.032066,0.022083,0.009722,0.014581,0.020699,0.032505,0.013389,0.066923,0.009689,0.020141,0.021915,0.009689,0.022246,0.013360,0.012378,0.000000,0.025839,0.023712,0.018012,0.028133,0.052315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.246832,1.293006,0.752661,0.935924,1.032354,0.407038,0.516819,0.594590,0.489913,0.408661,0.325939,0.636279,0.710028,0.464155,0.361498,0.233790,0.369282,0.491429,0.627252,0.227092,0.537352,0.769500,0.227092,0.591011,0.606241,0.717578,1.668519,0.323476,0.277967,0.330779,0.325939,1.654876,0.462076,0.277967,0.737058,0.708321,0.514382,0.514382,0.356128,0.512233,0.415440,0.267896,0.716615,0.595303,0.287780,0.246832,0.514382,0.374292,0.583172,0.544633,0.299007,0.439053,0.413721,0.476029,0.981689,0.542038,0.406510,0.518433,0.373997,0.442805,0.643748,0.227092,1.343732,0.563361,0.448020,0.463176,0.754810,0.639824,0.325939,0.710028,0.575817,0.567580,0.709525,0.298560,0.765321,1.210942,0.408113,1.701416,0.299007,0.461595,1.025555,0.388885,0.650169,0.188979,0.306364,0.502500,0.725750,0.604465,0.585156,0.587040,0.650169,0.513433,0.262733,0.471347,0.522104,0.426650,0.699982,0.523578,0.722038,1.015069,...,0.462490,0.531676,0.584068,0.617031,0.488419,0.380496,0.695410,0.299007,0.642912,0.594590,0.299007,0.452617,0.594590,0.496554,0.515572,0.681135,2.044421,0.663818,0.242977,0.526694,0.227092,0.176660,0.307016,0.609962,0.665946,0.424764,0.423251,0.594590,1.041418,0.227092,0.594590,0.269538,0.870971,0.521854,0.366490,0.511686,0.320501,0.751025,0.545223,0.456261,0.447754,0.409140,0.502256,0.604732,0.683885,0.447678,0.782456,0.293838,0.414632,0.442805,0.782944,0.172177,0.492925,3.709511,0.317488,0.246832,0.601269,0.514382,0.394367,0.459688,0.344208,0.514382,0.188581,0.286470,0.760942,0.405037,0.405037,0.461042,0.594590,1.313797,0.427572,0.573430,0.283517,3.709511,0.299007,0.227092,0.498010,0.239524,0.430363,0.666211,0.451449,0.299007,0.470084,0.392280,0.609234,0.291520,0.590763,0.227092,0.441728,0.500997,0.227092,0.405114,0.291452,0.276143,0.055006,0.636525,0.510522,0.346652,0.550174,0.893777
607,0.052248,0.305255,0.180669,0.218834,0.179443,0.115288,0.165817,0.075548,0.104890,0.109500,0.082280,0.126868,0.202055,0.682903,0.079650,0.061592,0.086801,0.195908,0.143843,0.059516,0.118149,0.190477,0.059516,0.162208,0.170936,0.223185,0.142125,0.100998,0.082432,0.125603,0.082280,0.075442,0.152430,0.082432,0.177989,0.147608,0.038718,0.038718,0.132019,0.099603,0.095134,0.064822,0.284759,0.143186,0.065466,0.052248,0.038718,0.058066,0.150718,0.122914,0.074355,0.117314,0.105739,0.108134,0.267897,0.081993,0.114242,0.082606,0.083326,0.094162,0.190374,0.059516,0.297602,0.147158,0.117017,0.115403,0.363873,0.116326,0.082280,0.202055,0.254032,0.194215,0.220967,0.098852,0.171359,0.179075,0.100797,0.114153,0.074355,0.101796,0.128579,0.089268,0.112690,0.037924,0.072355,0.105622,0.110491,0.145557,0.194246,0.147680,0.112690,0.138532,0.061040,0.088087,0.062557,0.143822,0.250161,0.138781,0.601658,0.230027,...,0.115255,0.131742,0.158173,0.089767,0.114081,0.092646,0.144297,0.074355,0.292204,0.075548,0.074355,0.128377,0.075548,0.112749,0.057717,0.087219,0.066501,0.137474,0.063958,0.114617,0.059516,0.026793,0.124917,0.110464,0.138966,0.100248,0.122518,0.075548,0.136455,0.059516,0.075548,0.078893,0.167693,0.150962,0.088933,0.268206,0.073502,0.177261,0.165561,0.133692,0.094111,0.412456,0.172851,0.000000,0.113021,0.096441,0.156631,0.070777,0.038970,0.094162,0.339016,0.040089,0.084821,0.075235,0.060442,0.052248,0.088176,0.038718,0.096084,0.103488,0.122625,0.038718,0.021073,0.087960,0.138458,0.057902,0.057902,0.070493,0.075548,0.110557,0.110221,0.160271,0.056110,0.075235,0.074355,0.059516,0.108439,0.054939,0.201841,0.169707,0.117228,0.074355,0.060342,0.077570,0.159759,0.075347,0.133108,0.059516,0.099472,0.114118,0.059516,0.135763,0.093843,0.086643,0.003707,0.144222,0.134705,0.107674,0.156614,0.576894
608,0.324435,1.022541,0.598467,0.425468,0.349562,0.494081,0.529903,0.227746,0.480980,0.442384,0.263567,0.343872,0.682535,1.055968,0.310131,0.284681,0.425296,1.717719,0.589286,0.276586,0.567552,0.986251,0.276586,0.606227,0.612824,0.510396,0.795719,0.431900,0.699667,0.555932,0.263567,0.235132,0.764925,0.699667,0.614722,0.543974,0.322714,0.322714,0.577163,0.435614,0.403749,0.307459,0.640755,0.613086,0.337221,0.324435,0.322714,0.173522,0.588690,0.521829,0.297605,0.514832,0.491193,0.475524,0.938529,0.290775,0.707768,0.234396,0.364993,0.409793,0.804131,0.276586,0.843454,0.482260,0.598053,0.487693,0.975978,0.427793,0.263567,0.682535,0.499527,0.551741,0.604594,0.504939,0.592285,0.589923,0.456588,0.463078,0.297605,0.508584,0.563207,0.439525,0.431071,0.246164,0.381067,0.450654,0.525662,0.624414,0.822863,0.645485,0.431071,0.663281,0.289214,0.369952,0.362592,0.541818,1.142838,0.577520,0.869758,0.832514,...,0.513568,0.603518,0.687761,0.290144,0.449989,0.490706,0.351003,0.297605,0.542448,0.227746,0.297605,0.537725,0.227746,0.536361,0.468525,0.405590,0.249941,0.495206,0.287283,0.402390,0.276586,0.089160,0.562813,0.361241,0.385455,0.481971,0.708517,0.227746,0.490372,0.276586,0.227746,0.417396,0.578382,0.738982,0.522473,0.683151,0.305555,0.561192,0.523682,0.438366,0.410468,0.803712,0.497388,0.048991,0.422948,0.414865,0.361814,0.414467,0.261756,0.409793,0.710710,0.225527,0.328790,0.249445,0.287180,0.324435,0.274003,0.322714,0.467315,0.444852,0.594653,0.322714,0.116521,0.462753,0.292339,0.250429,0.250429,0.300280,0.227746,0.413894,0.491948,0.488482,0.289011,0.249445,0.297605,0.276586,0.465310,0.306721,0.772389,0.855528,0.523806,0.297605,0.261791,0.484945,0.737058,0.371122,0.480644,0.276586,0.486049,0.415432,0.276586,0.594918,0.457094,0.444436,0.038681,0.616733,0.717768,0.538586,0.527639,0.698871
609,0.004835,0.053593,0.026251,0.000000,0.002827,0.015528,0.017849,0.007791,0.013172,0.014981,0.004659,0.014972,0.033142,0.158358,0.008502,0.006908,0.012088,0.020895,0.017605,0.006575,0.015118,0.028237,0.006575,0.022483,0.018393,0.016429,0.023747,0.011184,0.000000,0.017123,0.004659,0.009048,0.018727,0.000000,0.022879,0.016996,0.003777,0.003777,0.014814,0.015195,0.013996,0.008364,0.038059,0.017723,0.008082,0.004835,0.003777,0.007976,0.017998,0.013464,0.012284,0.014240,0.011276,0.011105,0.042018,0.008374,0.014033,0.011576,0.008449,0.009475,0.020824,0.006575,0.034893,0.019384,0.013809,0.014183,0.038959,0.014502,0.004659,0.033142,0.041900,0.025025,0.027688,0.010551,0.021892,0.023432,0.012121,0.016031,0.012284,0.012763,0.017921,0.011992,0.012873,0.004785,0.008828,0.013510,0.013301,0.017812,0.023708,0.018569,0.012873,0.017592,0.006336,0.015272,0.005577,0.019254,0.038842,0.021269,0.000000,0.035642,...,0.015166,0.016773,0.020843,0.009608,0.013211,0.012015,0.017810,0.012284,0.043077,0.007791,0.012284,0.012855,0.007791,0.014062,0.005253,0.015450,0.009154,0.017396,0.007665,0.016197,0.006575,0.002621,0.016183,0.011374,0.014920,0.011782,0.016497,0.007791,0.016388,0.006575,0.007791,0.007087,0.022549,0.018668,0.011676,0.027528,0.010931,0.020633,0.019928,0.015916,0.009642,0.231668,0.017600,0.045373,0.019264,0.010176,0.024719,0.009262,0.006202,0.009475,0.027411,0.003918,0.011441,0.011482,0.007088,0.004835,0.012431,0.003777,0.011646,0.011940,0.012957,0.003777,0.003936,0.009215,0.020095,0.006862,0.006862,0.009214,0.007791,0.013814,0.014389,0.019912,0.005943,0.011482,0.012284,0.006575,0.014162,0.005479,0.024458,0.020846,0.014974,0.012284,0.008657,0.010581,0.020305,0.009906,0.027413,0.006575,0.012981,0.011108,0.006575,0.014368,0.010334,0.007742,0.000000,0.018070,0.015600,0.013108,0.018328,0.033377


### evaluate loss using RMSE 

In [27]:
from sklearn import metrics

In [31]:
from sklearn.metrics import mean_squared_error

# 사용자가 평점을 부여한 영화에 대해서만 예측 성능 평가 MSE 를 구함. 
def get_mse(pred, actual):
    # Ignore nonzero terms.
    pred = pred[actual.nonzero()].flatten()
    actual = actual[actual.nonzero()].flatten()
    return mean_squared_error(pred, actual)

print('아이템 기반 모든 인접 이웃 MSE: ', get_mse(ratings_pred, ratings_matrix.values ))


아이템 기반 모든 인접 이웃 MSE:  9.895354759094706


### top-n 유사도를 가진 데이터들에 대해서만 예측 평점 계산

In [34]:
def predict_rating_topsim(ratings_arr, item_sim_arr, n=20):
    # 사용자-아이템 평점 행렬 크기만큼 0으로 채운 예측 행렬 초기화
    pred = np.zeros(ratings_arr.shape)

    # 사용자-아이템 평점 행렬의 열 크기만큼 Loop 수행. 
    for col in range(ratings_arr.shape[1]):
        # 유사도 행렬에서 유사도가 큰 순으로 n개 데이터 행렬의 index 반환
        top_n_items = [np.argsort(item_sim_arr[:, col])[:-n-1:-1]]
        # 개인화된 예측 평점을 계산
        for row in range(ratings_arr.shape[0]):
            pred[row, col] = item_sim_arr[col, :][top_n_items].dot(ratings_arr[row, :][top_n_items].T) 
            pred[row, col] /= np.sum(np.abs(item_sim_arr[col, :][top_n_items]))        
    return pred


In [35]:
# 실행시간 2분 정도 걸림
ratings_pred = predict_rating_topsim(ratings_matrix.values , item_sim_df.values, n=20)
print('아이템 기반 인접 TOP-20 이웃 MSE: ', get_mse(ratings_pred, ratings_matrix.values ))

# 계산된 예측 평점 데이터는 DataFrame으로 재생성
ratings_pred_matrix = pd.DataFrame(data=ratings_pred, index= ratings_matrix.index,
                                   columns = ratings_matrix.columns)

  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':


아이템 기반 인접 TOP-20 이웃 MSE:  3.6949999176225483


## 사용자에게 영화 추천을 해보자

In [36]:
# 사용자 9번에게 영화를 추천해보자
user_rating_id = ratings_matrix.loc[9, :]
user_rating_id[ user_rating_id > 0].sort_values(ascending=False)[:10]

title
Adaptation (2002)                                                                 5.0
Austin Powers in Goldmember (2002)                                                5.0
Lord of the Rings: The Fellowship of the Ring, The (2001)                         5.0
Lord of the Rings: The Two Towers, The (2002)                                     5.0
Producers, The (1968)                                                             5.0
Citizen Kane (1941)                                                               5.0
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)    5.0
Back to the Future (1985)                                                         5.0
Glengarry Glen Ross (1992)                                                        4.0
Sunset Blvd. (a.k.a. Sunset Boulevard) (1950)                                     4.0
Name: 9, dtype: float64

#### 사용자가 관람하지 않은 영화 중에서 영화를 추천해보자

In [37]:
def get_unseen_movies(ratings_matrix, userId):
    # userId로 입력받은 사용자의 모든 영화정보 추출하여 Series로 반환함. 
    # 반환된 user_rating 은 영화명(title)을 index로 가지는 Series 객체임. 
    user_rating = ratings_matrix.loc[userId,:]
    
    # user_rating이 0보다 크면 기존에 관람한 영화임. 대상 index를 추출하여 list 객체로 만듬
    already_seen = user_rating[ user_rating > 0].index.tolist()
    
    # 모든 영화명을 list 객체로 만듬. 
    movies_list = ratings_matrix.columns.tolist()
    
    # list comprehension으로 already_seen에 해당하는 movie는 movies_list에서 제외함. 
    unseen_list = [ movie for movie in movies_list if movie not in already_seen]
    
    return unseen_list


In [39]:
# pred_df : 앞서 계산된 영화 별 예측 평점
# unseen_list : 사용자가 보지 않은 영화들
# top_n : 상위 n개를 가져온다.

def recomm_movie_by_userid(pred_df, userId, unseen_list, top_n=10):
    # 예측 평점 DataFrame에서 사용자id index와 unseen_list로 들어온 영화명 컬럼을 추출하여
    # 가장 예측 평점이 높은 순으로 정렬함. 
    recomm_movies = pred_df.loc[userId, unseen_list].sort_values(ascending=False)[:top_n]
    return recomm_movies

In [40]:
# 사용자가 관람하지 않는 영화명 추출   
unseen_list = get_unseen_movies(ratings_matrix, 9)

# 아이템 기반의 인접 이웃 협업 필터링으로 영화 추천 
recomm_movies = recomm_movie_by_userid(ratings_pred_matrix, 9, unseen_list, top_n=10)

# 평점 데이타를 DataFrame으로 생성. 
recomm_movies = pd.DataFrame(data=recomm_movies.values,index=recomm_movies.index,columns=['pred_score'])
recomm_movies

Unnamed: 0_level_0,pred_score
title,Unnamed: 1_level_1
Shrek (2001),0.866202
Spider-Man (2002),0.857854
"Last Samurai, The (2003)",0.817473
Indiana Jones and the Temple of Doom (1984),0.816626
"Matrix Reloaded, The (2003)",0.80099
Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001),0.765159
Gladiator (2000),0.740956
"Matrix, The (1999)",0.732693
Pirates of the Caribbean: The Curse of the Black Pearl (2003),0.689591
"Lord of the Rings: The Return of the King, The (2003)",0.676711
