In [1]:
!pip install surprise
import pandas as pd
import numpy as np

import surprise
from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
import warnings; warnings.simplefilter('ignore')

Collecting surprise
  Downloading https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl
Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/f5/da/b5700d96495fb4f092be497f02492768a3d96a3f4fa2ae7dea46d4081cfa/scikit-surprise-1.1.0.tar.gz (6.4MB)
[K     |████████████████████████████████| 6.5MB 2.3MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.0-cp36-cp36m-linux_x86_64.whl size=1675733 sha256=1d0808382060450e334cb42d19bca26eebbf69b872631bd00a1b0a5409d73706
  Stored in directory: /root/.cache/pip/wheels/cc/fa/8c/16c93fccce688ae1bde7d979ff102f7bee980d9cfeb8641bcf
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.0 surprise-0.1


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
rootdir = '/content/drive/My Drive/Colab Notebooks/Movie  Recommendation System/Movie Dataset/'

In [4]:
ratings = pd.read_csv(rootdir+'ratings.csv')

In [5]:
ratings.shape

(100836, 4)

In [6]:
movies = pd.read_csv(rootdir+'movies.csv')
ratings = pd.merge(movies,ratings).drop(['genres','timestamp'],axis=1)
print(ratings.shape)
ratings.head()

(100836, 4)


Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),1,4.0
1,1,Toy Story (1995),5,4.0
2,1,Toy Story (1995),7,4.5
3,1,Toy Story (1995),15,2.5
4,1,Toy Story (1995),17,4.5


**Data Preprocessing**

In [7]:
for dataset in [ratings,movies]:
  dataset.drop_duplicates(inplace=True)

In [8]:
ratings[ratings['userId'].isnull()]

Unnamed: 0,movieId,title,userId,rating


In [9]:
ratings[ratings['rating'].isnull()]

Unnamed: 0,movieId,title,userId,rating


**CF based recommendation system**

Our content based engine suffers from some severe limitations.

* It is only capable of suggesting movies which are close to a certain movie. That is, it is not capable of capturing tastes and providing recommendations across genres.
* Also, the engine that we built is not really personal in that it doesn't capture the personal tastes and biases of a user. Anyone querying our engine for recommendations based on a movie will receive the same recommendations for that movie, regardless of who (s)he is.
* Therefore, in this section, we will use Collaborative Filtering to make recommendations to Movie Watchers. Collaborative Filtering is based on the idea that users similar to a me can be used to predict how much I will like a particular product or service those users have used/experienced but I have not.


**Item based**

In [11]:
userRatings = ratings.pivot_table(index=['userId'],columns=['title'],values='rating')
userRatings.head()
print("Before: ",userRatings.shape)
userRatings = userRatings.dropna(thresh=10, axis=1).fillna(0,axis=1)
#userRatings.fillna(0, inplace=True)
print("After: ",userRatings.shape)

Before:  (610, 9719)
After:  (610, 2269)


In [12]:
userRatings

title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),13 Going on 30 (2004),"13th Warrior, The (1999)",1408 (2007),15 Minutes (2001),17 Again (2009),1984 (Nineteen Eighty-Four) (1984),2 Days in the Valley (1996),"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)","20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),2010: The Year We Make Contact (1984),2012 (2009),21 (2008),21 Grams (2003),21 Jump Street (2012),22 Jump Street (2014),25th Hour (2002),27 Dresses (2008),28 Days (2000),28 Days Later (2002),28 Weeks Later (2007),30 Days of Night (2007),300 (2007),"39 Steps, The (1935)",3:10 to Yuma (2007),40 Days and 40 Nights (2002),"40-Year-Old Virgin, The (2005)","400 Blows, The (Les quatre cents coups) (1959)",48 Hrs. (1982),50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","Wolverine, The (2013)",Women on the Verge of a Nervous Breakdown (Mujeres al borde de un ataque de nervios) (1988),Wonder Boys (2000),Wonder Woman (2017),Working Girl (1988),"World Is Not Enough, The (1999)",World War Z (2013),"World's End, The (2013)",Wreck-It Ralph (2012),"Wrestler, The (2008)",Wyatt Earp (1994),"X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men Origins: Wolverine (2009),X-Men: Apocalypse (2016),X-Men: Days of Future Past (2014),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),Yellow Submarine (1968),Yes Man (2008),Yojimbo (1961),You Don't Mess with the Zohan (2008),You Only Live Twice (1967),You've Got Mail (1998),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),Young Sherlock Holmes (1985),Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,4.5,3.5,0.0,4.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,3.5,3.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,0.0,3.0,0.0,0.0,0.0,2.5,0.0,0.0,4.0,0.0,2.5,3.5,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,3.5,4.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,4.5,3.5,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
def standardize(row):
    new_row = (row - row.mean())/(row.max()-row.min())
    return new_row

* since each user has its own standard we can't assume A's rating 7 is same as another users' 7. So we use Correlation similarity. We use Shifted Cosine similarity also known as Pearson correlation

In [15]:
shiftedratings = userRatings.apply(standardize,axis=1)
shiftedratings


title,"'burbs, The (1989)",(500) Days of Summer (2009),10 Cloverfield Lane (2016),10 Things I Hate About You (1999),"10,000 BC (2008)",101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),12 Years a Slave (2013),127 Hours (2010),13 Going on 30 (2004),"13th Warrior, The (1999)",1408 (2007),15 Minutes (2001),17 Again (2009),1984 (Nineteen Eighty-Four) (1984),2 Days in the Valley (1996),"2 Fast 2 Furious (Fast and the Furious 2, The) (2003)","20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),2010: The Year We Make Contact (1984),2012 (2009),21 (2008),21 Grams (2003),21 Jump Street (2012),22 Jump Street (2014),25th Hour (2002),27 Dresses (2008),28 Days (2000),28 Days Later (2002),28 Weeks Later (2007),30 Days of Night (2007),300 (2007),"39 Steps, The (1935)",3:10 to Yuma (2007),40 Days and 40 Nights (2002),"40-Year-Old Virgin, The (2005)","400 Blows, The (Les quatre cents coups) (1959)",48 Hrs. (1982),50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","Wolverine, The (2013)",Women on the Verge of a Nervous Breakdown (Mujeres al borde de un ataque de nervios) (1988),Wonder Boys (2000),Wonder Woman (2017),Working Girl (1988),"World Is Not Enough, The (1999)",World War Z (2013),"World's End, The (2013)",Wreck-It Ralph (2012),"Wrestler, The (2008)",Wyatt Earp (1994),"X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men Origins: Wolverine (2009),X-Men: Apocalypse (2016),X-Men: Days of Future Past (2014),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),Yellow Submarine (1968),Yes Man (2008),Yojimbo (1961),You Don't Mess with the Zohan (2008),You Only Live Twice (1967),You've Got Mail (1998),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),Young Sherlock Holmes (1985),Zack and Miri Make a Porno (2008),Zero Dark Thirty (2012),Zero Effect (1998),Zodiac (2007),Zombieland (2009),Zoolander (2001),Zootopia (2016),eXistenZ (1999),xXx (2002),¡Three Amigos! (1986)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,0.718466,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,...,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,0.918466,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,0.918466,-0.081534,-0.081534,0.518466,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,-0.081534,0.718466
2,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,...,0.990877,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123,0.590877,-0.009123,-0.009123,-0.009123,-0.009123,-0.009123
3,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,0.096871,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,...,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,0.096871,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129,-0.003129
4,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,0.943323,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,...,-0.056677,-0.056677,-0.056677,0.743323,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677,-0.056677
5,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,...,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103,-0.014103
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,0.795637,0.595637,-0.204363,-0.204363,0.595637,-0.204363,-0.204363,0.595637,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,...,-0.204363,-0.204363,0.695637,0.495637,-0.204363,0.595637,-0.204363,-0.204363,-0.204363,-0.204363,0.395637,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,0.595637,0.495637,0.495637,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363,-0.204363
607,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,...,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,0.542970,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030,-0.057030
608,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,0.691406,-0.208594,-0.208594,-0.208594,0.391406,-0.208594,-0.208594,-0.208594,0.291406,-0.208594,-0.208594,0.591406,-0.208594,0.291406,0.491406,-0.208594,-0.208594,0.791406,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,0.391406,-0.208594,...,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,0.591406,-0.208594,-0.208594,-0.208594,-0.208594,0.591406,0.491406,0.591406,-0.208594,-0.208594,-0.208594,-0.208594,0.591406,0.591406,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,-0.208594,0.391406,-0.208594,0.691406,0.491406,-0.208594
609,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,...,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696,-0.009696


In [19]:
cosine_sim = cosine_similarity(shiftedratings,shiftedratings)
cosine_sim

array([[ 1.        , -0.00173309,  0.09966747, ...,  0.19357607,
         0.0869046 ,  0.02798659],
       [-0.00173309,  1.        , -0.00735733, ..., -0.00776071,
         0.02292384,  0.09352694],
       [ 0.09966747, -0.00735733,  1.        , ...,  0.00391998,
        -0.00755131,  0.0176842 ],
       ...,
       [ 0.19357607, -0.00776071,  0.00391998, ...,  1.        ,
         0.10807964,  0.15574605],
       [ 0.0869046 ,  0.02292384, -0.00755131, ...,  0.10807964,
         1.        ,  0.02178186],
       [ 0.02798659,  0.09352694,  0.0176842 , ...,  0.15574605,
         0.02178186,  1.        ]])

In [20]:
resultDf = pd.DataFrame(cosine_sim,index=shiftedratings.index,columns=shiftedratings.index)

In [21]:
resultDf

userId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.000000,-0.001733,0.099667,0.153894,0.099991,0.066548,0.111214,0.107345,0.048534,-0.043047,0.101242,-0.018980,0.070437,0.083838,0.109460,0.126614,0.227620,0.135110,0.275238,0.090011,0.068833,-0.004652,0.069520,0.105194,0.084028,0.086522,0.207019,0.137863,0.108790,0.071566,0.147902,0.102842,0.115435,0.044720,0.050894,0.044557,0.129516,0.097859,0.307304,0.048167,...,0.057605,0.077610,0.184523,0.071834,0.054388,0.029792,0.292309,-0.021423,0.105683,0.176721,0.021529,0.004871,0.014348,0.052755,0.078171,0.063683,0.072081,0.160294,0.089494,0.235651,0.109042,0.101250,0.133055,0.106948,0.099682,0.034810,0.258018,-0.005714,0.224501,0.223467,0.028237,0.117828,0.187443,0.022531,0.100502,0.084468,0.233070,0.193576,0.086905,0.027987
2,-0.001733,1.000000,-0.007357,-0.024479,0.003352,-0.002074,0.006163,0.014236,-0.011436,0.061922,0.032107,-0.012505,0.036212,0.003742,0.106980,0.080259,0.089144,0.157001,-0.037538,-0.015259,0.069828,0.137196,-0.005307,0.117939,0.243811,-0.010062,-0.024106,0.029467,0.095729,0.146104,-0.014620,-0.002107,0.014205,0.031262,-0.010479,0.049587,0.020261,0.005886,-0.021767,-0.001386,...,-0.014906,0.011198,0.139219,0.027307,-0.009693,-0.009147,-0.010325,-0.007215,-0.018164,0.029308,0.131725,0.218131,0.033301,-0.019921,0.122212,0.077441,-0.026313,0.045023,0.014385,0.009170,0.024141,0.012797,0.046109,-0.010881,-0.009286,0.089792,-0.031541,0.060441,0.077426,-0.028858,0.204684,-0.005633,-0.037888,-0.020663,-0.028730,-0.017526,-0.014355,-0.007761,0.022924,0.093527
3,0.099667,-0.007357,1.000000,-0.014240,-0.000012,-0.013604,-0.016316,-0.000476,-0.007494,-0.014290,-0.011090,-0.008195,-0.007566,-0.003719,0.017082,0.047484,0.003479,0.030440,0.008855,-0.002801,-0.019633,-0.007800,-0.007733,-0.010463,0.003080,-0.006594,-0.011187,0.038381,-0.001126,-0.008507,-0.002662,0.015827,-0.001098,-0.010511,-0.006868,-0.009699,-0.006561,-0.004520,-0.007305,-0.008792,...,0.021064,-0.000976,-0.025744,-0.006847,-0.006352,0.049624,0.061718,-0.004728,-0.000877,0.040431,0.006795,-0.010895,-0.010192,-0.013055,-0.011242,-0.017759,0.015523,-0.002758,-0.000068,0.020695,-0.009923,-0.013550,-0.001783,-0.013726,-0.006085,0.029506,0.031795,-0.004869,0.058677,-0.008352,-0.004443,-0.006443,0.031262,-0.013542,0.004571,-0.006954,0.019612,0.003920,-0.007551,0.017684
4,0.153894,-0.024479,-0.014240,1.000000,0.109161,0.023696,0.073891,0.033297,-0.014463,-0.014970,0.020781,0.024418,0.059358,0.018991,0.019915,0.134726,0.107526,0.038365,0.144285,0.060402,-0.041391,-0.003303,0.065432,0.022521,0.011860,0.044018,0.091476,0.068665,0.022175,0.032518,0.113239,0.180206,0.149957,0.008413,0.029750,0.106339,0.043240,0.054206,0.223314,0.008294,...,-0.012467,0.155398,0.052255,0.046134,0.058302,-0.016855,0.134722,-0.018583,0.068058,0.068143,0.013490,0.011455,0.017750,-0.004053,0.058533,0.004102,0.099847,0.016549,0.037682,0.133756,0.095357,0.025962,0.196737,0.032027,0.076023,0.028222,0.238673,0.004780,0.119455,0.147437,0.045880,0.085006,0.306709,0.011433,0.038186,0.157163,0.085560,0.032674,0.013164,-0.003865
5,0.099991,0.003352,-0.000012,0.109161,1.000000,0.318639,0.084529,0.418207,-0.014645,0.008338,0.171943,0.043796,0.003017,0.210633,0.083886,0.057372,0.138333,0.083477,0.054910,0.067508,0.013714,0.007739,0.049660,0.069859,0.030048,0.285428,0.077082,0.093986,0.047849,0.050004,0.054785,0.219140,0.291428,0.009285,0.302439,0.006147,0.294397,0.362995,0.131058,0.318672,...,0.005977,0.102790,0.091268,0.332765,0.047874,-0.011715,0.097531,-0.009240,0.166781,0.076773,0.040507,-0.021291,0.113306,0.332050,0.008575,0.044683,0.140236,0.370543,0.217374,0.108216,-0.019391,0.347888,0.151400,0.134088,0.065773,0.061436,0.075935,-0.009516,0.036079,0.098895,0.044547,0.415714,0.091166,0.256538,0.140034,0.083842,0.129781,0.082558,0.296775,0.000837
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.084468,-0.017526,-0.006954,0.157163,0.083842,-0.017625,0.183220,0.070171,0.055874,0.033452,0.021197,0.045330,0.055928,0.075530,0.141399,0.173904,0.228378,0.165228,0.049718,0.040570,0.033246,0.114334,0.175131,0.057239,0.059875,0.032570,0.054396,0.178598,0.066801,0.056019,0.041930,0.054597,0.146746,0.106770,0.018286,0.073581,0.032112,0.065507,0.114583,0.031941,...,0.030732,0.188036,0.064715,0.018874,0.043565,-0.013438,0.112905,0.003309,0.076969,0.260496,0.157610,0.025390,0.003153,0.001809,0.065079,0.038980,0.137948,0.070412,0.088070,0.220379,0.078752,0.003334,0.198585,0.084347,0.092443,0.091439,0.164837,0.081653,0.145705,0.238759,0.165727,0.047362,0.255091,-0.011190,0.091707,1.000000,0.086672,0.108166,0.054757,0.039223
607,0.233070,-0.014355,0.019612,0.085560,0.129781,0.115705,0.149998,0.163924,-0.014695,-0.043406,0.261230,0.002789,0.016661,0.133102,0.129726,0.063777,0.197121,0.119982,0.218660,0.008761,0.048118,-0.018085,0.018580,0.087543,0.051261,0.161399,0.105447,0.118070,0.067735,0.107047,0.130706,0.115970,0.147718,0.092537,0.031714,0.039763,0.164946,0.130626,0.223205,0.090937,...,0.172680,0.134335,0.168507,0.163381,0.058341,-0.017731,0.244255,-0.018761,0.111802,0.157578,0.061102,0.003159,-0.019697,0.138916,-0.002584,0.090158,0.120509,0.176640,0.152474,0.239169,0.068472,0.146966,0.122709,0.231779,0.020421,0.071520,0.206313,-0.015792,0.155016,0.140350,0.049038,0.159200,0.168077,0.108132,0.076907,0.086672,1.000000,0.196427,0.161082,0.041962
608,0.193576,-0.007761,0.003920,0.032674,0.082558,0.055298,0.277161,0.144739,0.076773,-0.020090,0.111615,-0.019337,0.122316,0.082635,0.159195,0.100481,0.146716,0.188054,0.213630,0.152290,0.087798,0.123386,0.091934,0.075417,0.037428,0.082960,0.042672,0.238589,0.022120,0.042723,0.075533,0.072015,0.101878,0.214802,0.035941,0.033766,0.111548,0.085926,0.185218,0.054369,...,0.046166,0.059185,0.254890,0.103773,0.034552,0.018032,0.108000,-0.044141,0.088261,0.375527,0.138572,-0.028623,0.035976,0.086645,0.129839,0.088635,0.027435,0.110159,0.058358,0.214449,0.138864,0.155628,0.198063,0.142946,0.006976,0.146277,0.073598,0.035355,0.251818,0.215010,0.074698,0.106582,0.079397,0.083411,0.085836,0.108166,0.196427,1.000000,0.108080,0.155746
609,0.086905,0.022924,-0.007551,0.013164,0.296775,0.267619,0.088745,0.490688,-0.011737,0.007815,0.282506,-0.012835,-0.011850,0.336351,0.088585,0.045856,0.192587,0.101740,0.093447,-0.032739,0.047711,0.082817,0.013347,0.101187,0.019619,0.455678,0.058278,0.086508,0.081156,0.083763,0.078989,0.112419,0.188253,0.100870,0.242968,0.033578,0.428239,0.364753,0.065603,0.261880,...,0.033401,0.129640,0.130526,0.441996,0.060328,-0.009388,0.132358,-0.007405,0.110236,0.122952,0.066604,-0.017063,0.062669,0.449543,0.017999,0.091060,0.077131,0.491640,0.289767,0.095634,0.022395,0.441776,0.112635,0.137061,-0.009530,0.033139,0.078062,-0.007626,0.089054,0.093093,0.022059,0.393976,0.025996,0.281789,0.106382,0.054757,0.161082,0.108080,1.000000,0.021782


In [23]:
def get_similar_users(user):
    if user not in resultDf.index:
        return None
    else:
        similarList = zip(resultDf[user],list(resultDf.columns.values))
        similarList = sorted(similarList, key=lambda x: x[0], reverse=True)
        return similarList[1:10]
get_similar_users(609)

[(0.6960567001669429, 340),
 (0.6311402543681582, 379),
 (0.5907199512187908, 130),
 (0.5802949202764973, 485),
 (0.5788502206494248, 498),
 (0.5778127338085962, 54),
 (0.5705500645474516, 126),
 (0.5553812930984423, 56),
 (0.5534646560675168, 179)]