# User-Similarity System
We pick a user first and find similar users and recommend movies accordingly.

In [1]:
import numpy as np
import pandas as pd

In [2]:
movie_ratings = pd.read_csv('ratings.csv',sep=',')
movie_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [3]:
movie_id = pd.read_csv('movies.csv')
movie_id.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
movies_data = pd.merge(movie_ratings,movie_id,on='movieId')
movies_data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [5]:
movies_data.drop('genres',axis=1,inplace=True)
movies_data.head()

Unnamed: 0,userId,movieId,rating,timestamp,title
0,1,1,4.0,964982703,Toy Story (1995)
1,5,1,4.0,847434962,Toy Story (1995)
2,7,1,4.5,1106635946,Toy Story (1995)
3,15,1,2.5,1510577970,Toy Story (1995)
4,17,1,4.5,1305696483,Toy Story (1995)


In [6]:
user_movie_grid = movies_data.pivot_table(index='title',columns='userId',values='rating')
user_movie_grid.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),,,,,,,,,,,...,,,,,,,,,,4.0
'Hellboy': The Seeds of Creation (2004),,,,,,,,,,,...,,,,,,,,,,
'Round Midnight (1986),,,,,,,,,,,...,,,,,,,,,,
'Salem's Lot (2004),,,,,,,,,,,...,,,,,,,,,,
'Til There Was You (1997),,,,,,,,,,,...,,,,,,,,,,


In [7]:
temp = movies_data.groupby('userId')['rating']
recommender_data = pd.DataFrame(temp.mean())
recommender_data['count'] = pd.DataFrame(temp.count())
recommender_data.rename(columns = {'rating':'mean ratings'},inplace=True)
recommender_data.head()

Unnamed: 0_level_0,mean ratings,count
userId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4.366379,232
2,3.948276,29
3,2.435897,39
4,3.555556,216
5,3.636364,44


Let's recommend some movies to the user with id 1

In [72]:
user_data = user_movie_grid[1]
user_data

title
'71 (2014)                                   NaN
'Hellboy': The Seeds of Creation (2004)      NaN
'Round Midnight (1986)                       NaN
'Salem's Lot (2004)                          NaN
'Til There Was You (1997)                    NaN
                                            ... 
eXistenZ (1999)                              NaN
xXx (2002)                                   NaN
xXx: State of the Union (2005)               NaN
¡Three Amigos! (1986)                        4.0
À nous la liberté (Freedom for Us) (1931)    NaN
Name: 1, Length: 9719, dtype: float64

In [73]:
similar_to_user = user_movie_grid.corrwith(user_data)
similar_to_user.dropna(inplace=True)
similar_to_user.head()

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)


userId
1    1.000000
3    0.079819
4    0.207983
5    0.268749
6   -0.291636
dtype: float64

In [74]:
rec_user = pd.DataFrame(similar_to_user,columns=['Similarity'])
rec_user.head()

Unnamed: 0_level_0,Similarity
userId,Unnamed: 1_level_1
1,1.0
3,0.079819
4,0.207983
5,0.268749
6,-0.291636


In [75]:
rec_user = rec_user.join(recommender_data['count'])
rec_user.head()

Unnamed: 0_level_0,Similarity,count
userId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.0,232
3,0.079819,39
4,0.207983,216
5,0.268749,44
6,-0.291636,314


In [76]:
rec_user[rec_user['count']>100].sort_values('Similarity',ascending=False).head(6)[1:]

Unnamed: 0_level_0,Similarity,count
userId,Unnamed: 1_level_1,Unnamed: 2_level_1
139,0.790569,194
210,0.767649,138
369,0.612098,129
351,0.6,141
596,0.563226,411


So user 1 is similar to these users and we can assume that he would like the same movies liked by them.

In [77]:
similar_users = rec_user[rec_user['count']>100].sort_values('Similarity',ascending=False).head(2)[1:]
similar_userId = np.array(similar_users.index)
similar_userId

array([139], dtype=int64)

In [81]:
recommendation_list = []
for i in range(0,len(similar_userId)):
    for j in range(0,len(user_movie_grid[similar_userId[i]])):
        if((user_movie_grid[similar_userId[i]][j] >= 3.5) & (pd.isna(user_data[j])) ):
            recommendation_list.append(user_data.index[j])
recommendation_list

['Casino Royale (2006)',
 'Corpse Bride (2005)',
 'Dark Knight Rises, The (2012)',
 'Dark Knight, The (2008)',
 "Dr. Horrible's Sing-Along Blog (2008)",
 'Enemy at the Gates (2001)',
 'Hobbit: An Unexpected Journey, The (2012)',
 'Hobbit: The Desolation of Smaug, The (2013)',
 'Incredibles, The (2004)',
 'Lord of the Rings: The Fellowship of the Ring, The (2001)',
 'Lord of the Rings: The Return of the King, The (2003)',
 'Lord of the Rings: The Two Towers, The (2002)',
 'Patriot, The (2000)',
 'Serenity (2005)',
 'Skyfall (2012)',
 'Space Cowboys (2000)',
 'Star Wars: Episode VII - The Force Awakens (2015)',
 'U-571 (2000)',
 'Watchmen (2009)',
 'We Were Soldiers (2002)',
 'X-Men: First Class (2011)']

I guess these are pretty good but this reco