In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
###### helper functions. Use them when needed #######
def get_title_from_index(index):
	return df[df.index == index]["title"].values[0]

def get_index_from_title(title):
	return df[df.title == title]["index"].values[0]
##################################################

##Step 1: Read CSV File
df = pd.read_csv("movie_dataset.csv")
#print(df.head())
#print(df.columns)

##Step 2: Select Features

features = ['keywords','cast','genres','director']

##Step 3: Create a column in DF which combines all selected features
for feature in features:
    df[feature] = df[feature].fillna("")

def combine_features(row):
    try:
        return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director']
    except:
        print("Error:",row)

    
df["combined_features"] = df.apply(combine_features,axis=1)

#print("Combined Features:", df["combined_features"].head())

##Step 4: Create count matrix from this new combined column

cv = CountVectorizer()
count_matrix = cv.fit_transform(df["combined_features"])

##Step 5: Compute the Cosine Similarity based on the count_matrix
cosine_sim = cosine_similarity(count_matrix)
movie_user_likes = input("Enter the movie you have watched ")

## Step 6: Get index of this movie from its title

movie_index = get_index_from_title(movie_user_likes)

similar_movies = list(enumerate(cosine_sim[movie_index]))

## Step 7: Get a list of similar movies in descending order of similarity score
sorted_similar_movies = sorted(similar_movies,key=lambda x:x[1], reverse = True)


## Step 8: Print titles of first 50 movies
i = 0
for movie in sorted_similar_movies:
    print(get_title_from_index(movie[0]))
    i += 1
    if i>50:
        break

Enter the movie you have watched Shutter Island
Shutter Island
The Departed
Body of Lies
Gone Girl
Just Like Heaven
The Town
Fabled
The Revenant
The Walk
The Great Gatsby
My Week with Marilyn
Jack Reacher
The Jungle Book
The Bourne Ultimatum
The General's Daughter
High Crimes
The Mothman Prophecies
The Number 23
GoodFellas
The Girl with the Dragon Tattoo
Buffalo Soldiers
The Bourne Identity
The Aviator
Now Is Good
Heaven is for Real
2:13
The Wolf of Wall Street
Winter's Tale
A Walk Among the Tombstones
The Spanish Prisoner
All the King's Men
Killing Them Softly
Room
Escape from Alcatraz
Pi
Angels & Demons
The Da Vinci Code
The Bourne Supremacy
Dark Water
The Time Traveler's Wife
Self/less
Nerve
A Dangerous Method
Gangs of New York
Zodiac
The Pink Panther
The Ring Two
The Beach
Something Borrowed
Lucky Number Slevin
Derailed
