In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn
from sklearn.decomposition import TruncatedSVD
import warnings

In [2]:
movie = pd.read_csv('movies.csv')
rating = pd.read_csv('ratings.csv')

In [3]:
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [4]:
rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [5]:
movie.drop('genres', axis=1, inplace=True)
rating.drop('timestamp', axis=1, inplace=True)

In [6]:
data = movie.merge(rating, on='movieId')

In [7]:
data.head()

Unnamed: 0,movieId,title,userId,rating
0,1,Toy Story (1995),1,4.0
1,1,Toy Story (1995),5,4.0
2,1,Toy Story (1995),7,4.5
3,1,Toy Story (1995),15,2.5
4,1,Toy Story (1995),17,4.5


In [8]:
New_Data = data.dropna(axis=0, subset=['title'])

In [9]:
Rating_Count = (New_Data.groupby('title')['rating'].count().reset_index())

In [10]:
Rating_Count.head()

Unnamed: 0,title,rating
0,'71 (2014),1
1,'Hellboy': The Seeds of Creation (2004),1
2,'Round Midnight (1986),2
3,'Salem's Lot (2004),1
4,'Til There Was You (1997),2


In [11]:
Rating_Count['Total_Rating_Count']=Rating_Count['rating']
Rating_Count.drop('rating', axis=1, inplace=True)

In [12]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(Rating_Count['Total_Rating_Count'].describe())

count   9719.000
mean      10.375
std       22.406
min        1.000
25%        1.000
50%        3.000
75%        9.000
max      329.000
Name: Total_Rating_Count, dtype: float64


In [13]:
print(Rating_Count['Total_Rating_Count'].quantile(np.arange(.9,1,.01)))

0.900    27.000
0.910    30.000
0.920    33.560
0.930    38.000
0.940    42.000
0.950    47.000
0.960    55.000
0.970    64.460
0.980    83.000
0.990   114.640
Name: Total_Rating_Count, dtype: float64


In [14]:
Final = New_Data.merge(Rating_Count, left_on='title', right_on='title', how='left')

In [15]:
Final.head()

Unnamed: 0,movieId,title,userId,rating,Total_Rating_Count
0,1,Toy Story (1995),1,4.0,215
1,1,Toy Story (1995),5,4.0,215
2,1,Toy Story (1995),7,4.5,215
3,1,Toy Story (1995),15,2.5,215
4,1,Toy Story (1995),17,4.5,215


In [16]:
Threshold = 50
New_Final = Final.query('Total_Rating_Count >= @Threshold')
New_Final.head()

Unnamed: 0,movieId,title,userId,rating,Total_Rating_Count
0,1,Toy Story (1995),1,4.0,215
1,1,Toy Story (1995),5,4.0,215
2,1,Toy Story (1995),7,4.5,215
3,1,Toy Story (1995),15,2.5,215
4,1,Toy Story (1995),17,4.5,215


In [17]:
Features = New_Final.pivot_table(values='rating', index='userId', columns='title').fillna(0)
Features.head()

title,10 Things I Hate About You (1999),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",A.I. Artificial Intelligence (2001),"Abyss, The (1989)",Ace Ventura: Pet Detective (1994),Ace Ventura: When Nature Calls (1995),...,Willy Wonka & the Chocolate Factory (1971),"Wizard of Oz, The (1939)","Wolf of Wall Street, The (2013)",X-Men (2000),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,...,5.0,5.0,0.0,5.0,0.0,0.0,0.0,5.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
4,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
Features.shape

(606, 450)

In [19]:
X = Features.values.T
X.shape

(450, 606)

In [20]:
SVD = TruncatedSVD(n_components=12, random_state=17)
Matrix = SVD.fit_transform(X)
Matrix.shape

(450, 12)

In [21]:
warnings.filterwarnings("ignore", category=RuntimeWarning)
Correlation = np.corrcoef(Matrix)
Correlation.shape

(450, 450)

In [22]:
Movie_Title = Features.columns
Movie_List = list(Movie_Title)

In [23]:
Wizard = Movie_List.index("Wizard of Oz, The (1939)")
print(Wizard)

441


In [24]:
Recommend = Correlation[Wizard]
list(Movie_Title[(Recommend >= 0.9)])

['Animal House (1978)',
 'Beetlejuice (1988)',
 'Big (1988)',
 "Bug's Life, A (1998)",
 'Christmas Story, A (1983)',
 'Crocodile Dundee (1986)',
 'E.T. the Extra-Terrestrial (1982)',
 'Fantasia (1940)',
 'Goonies, The (1985)',
 'Grease (1978)',
 'Honey, I Shrunk the Kids (1989)',
 'Jaws (1975)',
 'Jungle Book, The (1967)',
 'Lady and the Tramp (1955)',
 'Little Mermaid, The (1989)',
 'Mary Poppins (1964)',
 'Romancing the Stone (1984)',
 'Sound of Music, The (1965)',
 'Splash (1984)',
 'To Kill a Mockingbird (1962)',
 'Who Framed Roger Rabbit? (1988)',
 'Wizard of Oz, The (1939)']

In [50]:
print(Correlation[Wizard])

[0.73044285 0.7513778  0.73524344 0.52948559 0.52768435 0.64948809
 0.65196762 0.71106869 0.52861515 0.61668866 0.48296858 0.63839798
 0.88125068 0.69302763 0.75027864 0.67546771 0.78019872 0.82995116
 0.44444486 0.7168575  0.53100436 0.74194728 0.55603249 0.66826941
 0.64722022 0.91444482 0.76846269 0.72982195 0.64271208 0.8911716
 0.6864339  0.64187694 0.72982156 0.70212359 0.86003567 0.81903962
 0.44220451 0.30277233 0.73896933 0.86751694 0.80582104 0.82135927
 0.50828019 0.5948156  0.54538799 0.45010052 0.8049684  0.5790126
 0.69221723 0.92856431 0.81075923 0.78502378 0.42542129 0.93286964
 0.6467682  0.68219408 0.40928976 0.51859924 0.55990364 0.63747181
 0.79962635 0.87668766 0.51049408 0.65761819 0.86713251 0.56479538
 0.55261292 0.51867583 0.38607443 0.66132203 0.48382906 0.59216981
 0.89000198 0.73470931 0.24509501 0.65404389 0.90238803 0.80223698
 0.66231197 0.86374275 0.82745927 0.56145564 0.47759364 0.52532199
 0.68799694 0.60557019 0.68152916 0.73072421 0.7006067  0.789660

In [46]:
print(Movie_Title[(Recommend >= 0.9)])

Index(['Animal House (1978)', 'Beetlejuice (1988)', 'Big (1988)',
       'Bug's Life, A (1998)', 'Christmas Story, A (1983)',
       'Crocodile Dundee (1986)', 'E.T. the Extra-Terrestrial (1982)',
       'Fantasia (1940)', 'Goonies, The (1985)', 'Grease (1978)',
       'Honey, I Shrunk the Kids (1989)', 'Jaws (1975)',
       'Jungle Book, The (1967)', 'Lady and the Tramp (1955)',
       'Little Mermaid, The (1989)', 'Mary Poppins (1964)',
       'Romancing the Stone (1984)', 'Sound of Music, The (1965)',
       'Splash (1984)', 'To Kill a Mockingbird (1962)',
       'Who Framed Roger Rabbit? (1988)', 'Wizard of Oz, The (1939)'],
      dtype='object', name='title')


In [49]:
print(Movie_Title[(Recommend >= 0.8)])

Index(['Airplane! (1980)', 'Amadeus (1984)', 'Animal House (1978)',
       'Arachnophobia (1990)',
       'Austin Powers: International Man of Mystery (1997)',
       'Austin Powers: The Spy Who Shagged Me (1999)',
       'Back to the Future (1985)', 'Back to the Future Part II (1989)',
       'Back to the Future Part III (1990)', 'Batman Returns (1992)',
       'Beetlejuice (1988)', 'Being John Malkovich (1999)', 'Big (1988)',
       'Blazing Saddles (1974)', 'Blues Brothers, The (1980)',
       'Breakfast Club, The (1985)', 'Bug's Life, A (1998)',
       'Butch Cassidy and the Sundance Kid (1969)', 'Caddyshack (1980)',
       'Casablanca (1942)', 'Christmas Story, A (1983)', 'Citizen Kane (1941)',
       'Close Encounters of the Third Kind (1977)', 'Cool Hand Luke (1967)',
       'Crocodile Dundee (1986)', 'E.T. the Extra-Terrestrial (1982)',
       'Edward Scissorhands (1990)', 'Exorcist, The (1973)', 'Fantasia (1940)',
       'Ferris Bueller's Day Off (1986)', 'Few Good Men, A (199