# Recommendation using SVD

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from numpy.linalg import svd

In [3]:
mnv = pd.read_csv("/content/drive/My Drive/temp/movies_and_reviews.csv")

In [10]:
mnv.head()

Unnamed: 0,movie_title,movie_popularity,production_companies,movie_description,movie_cast,movie_duration,movie_genre,movie_director,movie_writer,movie_revenue,movie_year_of_release,reviewer_name,reviewer_rating,review_text,average_cpi,adjustment_factor,adjusted_revenue,revenue_per_minute,user_idx,item_idx
0,the birth of a nation (1915),0.00182,['Epoch Film Co.'],"Two families, abolitionist Northerners the Sto...","['Alberta Lee', 'Allan Sears', 'Alma Rubens', ...",194.0,"['Biography', 'Drama', 'History', 'War']","['D.W. Griffith', 'Nate Parker']","['D.W. Griffith', 'Frank E. Woods', 'Nate Park...",15800000.0,1915,Cineanalyst,10.0,"Before ""The Birth of a Nation,"" motion picture...",10.108333,30.970091,489327400.0,2522306.0,9869,1509
1,the birth of a nation (1915),0.00182,['Epoch Film Co.'],"Two families, abolitionist Northerners the Sto...","['Alberta Lee', 'Allan Sears', 'Alma Rubens', ...",194.0,"['Biography', 'Drama', 'History', 'War']","['D.W. Griffith', 'Nate Parker']","['D.W. Griffith', 'Frank E. Woods', 'Nate Park...",15800000.0,1915,Auburn668,10.0,D.W. Griffith's Civil War shorts were only a p...,10.108333,30.970091,489327400.0,2522306.0,4059,1509
2,the birth of a nation (1915),0.00182,['Epoch Film Co.'],"Two families, abolitionist Northerners the Sto...","['Alberta Lee', 'Allan Sears', 'Alma Rubens', ...",194.0,"['Biography', 'Drama', 'History', 'War']","['D.W. Griffith', 'Nate Parker']","['D.W. Griffith', 'Frank E. Woods', 'Nate Park...",15800000.0,1915,mozart182,9.0,This is a film which every movie buff really d...,10.108333,30.970091,489327400.0,2522306.0,255454,1509
3,the birth of a nation (1915),0.00182,['Epoch Film Co.'],"Two families, abolitionist Northerners the Sto...","['Alberta Lee', 'Allan Sears', 'Alma Rubens', ...",194.0,"['Biography', 'Drama', 'History', 'War']","['D.W. Griffith', 'Nate Parker']","['D.W. Griffith', 'Frank E. Woods', 'Nate Park...",15800000.0,1915,smithmjsjsmith,10.0,I can completely understand your frustration w...,10.108333,30.970091,489327400.0,2522306.0,321644,1509
4,the birth of a nation (1915),0.00182,['Epoch Film Co.'],"Two families, abolitionist Northerners the Sto...","['Alberta Lee', 'Allan Sears', 'Alma Rubens', ...",194.0,"['Biography', 'Drama', 'History', 'War']","['D.W. Griffith', 'Nate Parker']","['D.W. Griffith', 'Frank E. Woods', 'Nate Park...",15800000.0,1915,sharkey197,7.0,As I read these comments on this most controve...,10.108333,30.970091,489327400.0,2522306.0,314911,1509


In [4]:
# getting indexes for unique users and items
le = LabelEncoder()
mnv['user_idx'] = le.fit_transform(mnv['reviewer_name'])
mnv['item_idx'] = le.fit_transform(mnv['movie_title'])

In [5]:
data = mnv[['user_idx', 'item_idx', 'reviewer_rating']]
data = data.drop_duplicates(subset=['user_idx', 'item_idx'], keep='first')

# pivot the dataframe
pivoted_df = data.pivot(index='user_idx', columns='item_idx', values='reviewer_rating')

# remove index and column names
pivoted_df.index.name = None
pivoted_df.columns.name = None
pivoted_df = pivoted_df.dropna(axis=1, how='all')
pivoted_df = pivoted_df.fillna(0)
pivoted_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2092,2093,2094,2095,2096,2097,2098,2099,2100,2101
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371198,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
371199,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
371200,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
371201,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
matrix = pivoted_df.values
u, s, vh = svd(matrix, full_matrices=False)

In [7]:
u.shape, s.shape, vh.shape

((371203, 2102), (2102,), (2102, 2102))

In [8]:
# cosine simlilarity for 1d arrays
def cosine_similarity(x, y):
    return np.dot(x, y)/ (np.linalg.norm(x) * np.linalg.norm(y))

def similar_movies(movie_id, vh1, n_elements=10):
    similarity = {}
    vh2 = vh1[:n_elements, :]
    for col in range(0, vh2.shape[1]):
        similarity[col] = cosine_similarity(vh2[:, movie_id], vh2[:, col])
    return similarity

In [14]:
# function to get recommendations based on the movie index
def get_recommendations(movie_name, vh, top_n):
    id = mnv[mnv['movie_title'].str.lower() == movie_name.lower()].index
    if len(id) == 0:
        return f"Movie titled '{movie_name}' not found in the dataset."
    id = id[0]

    target_movie = mnv.iloc[id]
    print("Target Movie:")
    print(f"Title: {target_movie['movie_title']}")
    print(f"Description: {target_movie['movie_description']}")
    print(f"Genre: {target_movie['movie_genre']}")
    print("-----")

    movie_predictions = similar_movies(id, vh, top_n)
    recommendations = [(movie_id, sim) for movie_id, sim in movie_predictions.items() if movie_id != id]
    recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)
    top_10_recommendations = recommendations[:10]

    # Display the top recommendations
    print("Top 10 Recommendations:")
    for movie_id, similarity_score in top_10_recommendations:
        movie_info = mnv[mnv['item_idx'] == movie_id]
        if not movie_info.empty:
            title = movie_info['movie_title'].values[0]
            genres = movie_info['movie_genre'].values[0]
            print(f"Title: {title}")
            print(f"Genres: {genres}")
            print(f"Similarity Score: {similarity_score}\n")
        else:
            print(f"Movie ID {movie_id} not found in the movies dataset.\n")

In [15]:
get_recommendations('the birth of a nation (1915)', vh, 10)

Target Movie:
Title: the birth of a nation (1915)
Description: Two families, abolitionist Northerners the Stonemans and Southern landowners the Camerons, intertwine. When Confederate colonel Ben Cameron is captured in battle, nurse Elsie Stoneman petitions for his pardon. In Reconstruction-era South Carolina, Cameron founds the Ku Klux Klan, battling Elsie's congressman father and his African-American protégé, Silas Lynch.
Genre: ['Biography', 'Drama', 'History', 'War']
-----
Top 10 Recommendations:
Title: finding dory (2016)
Genres: ['Adventure', 'Animation', 'Comedy', 'Family', 'Kids']
Similarity Score: 0.9935968597632173

Title: fantastic beasts and where to find them (2016)
Genres: ['Adventure', 'Family', 'Fantasy']
Similarity Score: 0.9601014070746123

Title: it follows (2014)
Genres: ['Horror', 'Mystery', 'Thriller']
Similarity Score: 0.9513839291432175

Title: ex machina (2014)
Genres: ['Drama', 'Mystery', 'Science Fiction', 'Thriller']
Similarity Score: 0.9373116592777028

Titl

  return np.dot(x, y)/ (np.linalg.norm(x) * np.linalg.norm(y))
