# Movie Recommendation System Project - Final Recommendation System

### Importing packages

In [1]:
import numpy as np 
import pandas as pd 

from sklearn import preprocessing
from sklearn.feature_extraction import text
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity

### Reading the CSV

In [2]:
movie_df = pd.read_csv('movie_model.csv')

#making movie title with year the index
movie_df.set_index('Title Year', inplace=True)

## 1. Creating the recommendation system

In [3]:
#Create vectors from tokenized documents 
my_stop_words = text.ENGLISH_STOP_WORDS.union(['film', 'wa', 'ha', 'asks', 'say', 'tell', 'live'])
vectorizer = TfidfVectorizer(stop_words=my_stop_words)
doc_word = vectorizer.fit_transform(movie_df['Plot modeling'])

#Create topic model
nmf_model = NMF(30, random_state=10, max_iter=1000)
doc_topic = nmf_model.fit_transform(doc_word)

#Create dataframe showing probabilities for each movie in each topic
doc_topic_nmf = pd.DataFrame(doc_topic.round(5), index = movie_df.index)

#Finding similarity between each movie based on the degree to which they belong to each topic
cosine_sim = cosine_similarity(doc_topic_nmf, doc_topic_nmf)



### The function

In [4]:
# creating a Series for the movie titles so they are associated to an ordered numerical 
#list I will use in the function to match the indexes
indices = pd.Series(movie_df.index)
indices[:5]

#  defining the function that takes in movie title with year as input and returns the top 10 recommended movies
def recommendations(title, origin, cosine_sim = cosine_sim):
    
    # gettin the index of the movie that matches the title
    idx = indices[indices == title].index[0]

    # creating a Series with the similarity scores in descending order
    similar_idx = list((pd.Series(cosine_sim[idx]).sort_values(ascending = False)).index)
    
     # initializing the empty list of recommended movies
    recommended_movies = []

    # populating the list with the titles of the best 10 matching movies (with chosen origin/ethnicity)
    if origin is None:
        for i in similar_idx[1:11]:
            recommended_movies.append(list(movie_df.index)[i])
        
    else:    
        for i in similar_idx[1:]:
            while len(recommended_movies) < 10:
                if movie_df.iloc[i]['Origin/Ethnicity'] == origin: 
                    recommended_movies.append(movie_df.index[i])
                else:
                    pass
                break 
            
    return recommended_movies

## 2. Testing the recommendation system

In [5]:
recommendations('The Notebook 2004', origin=None)

['Mr. Duck Steps Out 1940',
 'Ammakkilikkoodu 2003',
 'Bwakaw 2012',
 'Vanakkam Chennai 2013',
 'Woman Hater 1948',
 'Girlfriend 2004',
 'Prelude to a Kiss 1992',
 'Road to Paradise 1930',
 'Wuthering Heights 1939',
 'Seedan 2011']

In [7]:
recommendations('The Notebook 2004', origin='American')

['Mr. Duck Steps Out 1940',
 'Prelude to a Kiss 1992',
 'Road to Paradise 1930',
 'Wuthering Heights 1939',
 'Spread 2009',
 'Love Potion No. 9 1992',
 'The Mostly Unfabulous Social Life of Ethan Green 2005',
 'Hello, My Name Is Doris 2016',
 'Winnie the Pooh: Seasons of Giving 1999',
 'Devotion 1946']

In [8]:
recommendations('The Notebook 2004', origin='Telugu')

['Prema Tarangalu 1980',
 'Atharintiki Daaredi 2013',
 'Chitram Bhalare Vichitram 2016',
 'Chitram Bhalare Vichitram 1992',
 'Chitram Bhalare Vichitram 1991',
 'Avunu Valliddaru Ista Paddaru 2002',
 'Sobha 1958',
 'Ilavelpu 1956',
 'Shankardada Zindabad 2007',
 'Manam 2014']

In [9]:
recommendations('The Godfather 1972', origin='American')

['The Godfather Part II 1974',
 'Addams Family Values 1993',
 'Gotti 1996',
 'Aftermath 2017',
 'Two Against the World 1932',
 'House of Cards 1969',
 'A Stranger Among Us 1992',
 'Men of Respect 1991',
 'Men of Respect 1990',
 'The Romance of Rosy Ridge 1947']

In [10]:
recommendations('The Godfather 1972', origin='Chinese')

['Shanghai Story 2004',
 'Jingzhe 2004',
 'To Live 1994',
 'Moon Castle: The Space Adventure 2011',
 'Sacrifice 2010',
 'Time Raiders 2016',
 'The Treatment 2001',
 'The Flying Machine 2013',
 'Tie Xi Qu: West of the Tracks 2003',
 'The House That Never Dies 2014']

In [11]:
recommendations('Bonnie and Clyde 1967', origin='American')

['Cairo 1963',
 'Taxi 2004',
 'Plunder Road 1957',
 'A Cop 1972',
 'Thunderbolt and Lightfoot 1974',
 'Pain & Gain 2013',
 'They Made Me a Killer 1946',
 'Topkapi 1964',
 'Falling Down 1993',
 'The Driver 1978']

In [12]:
recommendations('Avatar 2009', origin='American')

['Wizards 1977',
 'American Ninja 3: Blood Hunt 1989',
 'Swamp Thing 1982',
 'Indiana Jones and the Kingdom of the Crystal Skull 2008',
 'Daybreakers 2010',
 'Omega Doom 1997',
 'The Great Wall 2017',
 'Terminator Genisys 2015',
 'Gog 1954',
 'The Day the Earth Stood Still 2008']