<a href="https://colab.research.google.com/github/Zohaib-Sathio/Movie_Recommendation_Using_Python/blob/main/Movies_Recommendation_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Import the libraries
import pandas as pd
import numpy as np

In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
#Upload file from your machine
from google.colab import files
uploaded = files.upload()

Saving IMDB-Movie-Data.csv to IMDB-Movie-Data.csv


In [4]:
#load the dataset
movies = pd.read_csv('IMDB-Movie-Data.csv')
movies.head(3)

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0


In [5]:
#Select important columns
columns = ['Actors', 'Director', 'Genre', 'Title', 'Description']

In [6]:
#Check for null values
movies[columns].isnull().values.any()

False

In [7]:
movies[columns].head(5)

Unnamed: 0,Actors,Director,Genre,Title,Description
0,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",James Gunn,"Action,Adventure,Sci-Fi",Guardians of the Galaxy,A group of intergalactic criminals are forced ...
1,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",Ridley Scott,"Adventure,Mystery,Sci-Fi",Prometheus,"Following clues to the origin of mankind, a te..."
2,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",M. Night Shyamalan,"Horror,Thriller",Split,Three girls are kidnapped by a man with a diag...
3,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",Christophe Lourdelet,"Animation,Comedy,Family",Sing,"In a city of humanoid animals, a hustling thea..."
4,"Will Smith, Jared Leto, Margot Robbie, Viola D...",David Ayer,"Action,Adventure,Fantasy",Suicide Squad,A secret government agency recruits some of th...


In [8]:
# Define function to store the useful data into a list as a String
def get_important_features(data):
  important_features = []
  for i in range(0, data.shape[0]):
    important_features.append(movies['Actors'][i] + " " + movies['Director'][i] + " " + movies['Genre'][i] + " " + movies['Title'][i]+ " " + movies['Description'][i])
  return important_features

In [9]:
movies['important_features'] = get_important_features(movies)

In [10]:
movies['important_features'][0]

'Chris Pratt, Vin Diesel, Bradley Cooper, Zoe Saldana James Gunn Action,Adventure,Sci-Fi Guardians of the Galaxy A group of intergalactic criminals are forced to work together to stop a fanatical warrior from taking control of the universe.'

In [11]:
# Get count of the words
cm = CountVectorizer().fit_transform(movies['important_features'])

In [12]:
# Determine the similarity between the movie word counts
cs = cosine_similarity(cm)

In [13]:
cs

array([[1.        , 0.25692801, 0.21021383, ..., 0.2306328 , 0.25391836,
        0.13543224],
       [0.25692801, 1.        , 0.15384615, ..., 0.10127394, 0.1672484 ,
        0.08920516],
       [0.21021383, 0.15384615, 1.        , ..., 0.10127394, 0.13937367,
        0.0594701 ],
       ...,
       [0.2306328 , 0.10127394, 0.10127394, ..., 1.        , 0.13762047,
        0.11744404],
       [0.25391836, 0.1672484 , 0.13937367, ..., 0.13762047, 1.        ,
        0.09697623],
       [0.13543224, 0.08920516, 0.0594701 , ..., 0.11744404, 0.09697623,
        1.        ]])

In [14]:
cs.shape

(1000, 1000)

In [15]:
movies[movies.Title == "Sing"]

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore,important_features
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0,"Matthew McConaughey,Reese Witherspoon, Seth Ma..."


In [16]:
title = 'Sing'

movie_id = movies[movies.Title == title]['Rank'].values[0]

In [17]:
scores = list(enumerate(cs[movie_id]))

In [18]:
sorted_scores = sorted(scores, key = lambda x : x[1], reverse = True)

In [19]:
sorted_scores = sorted_scores[1:]

In [20]:
# print(sorted_scores)

In [21]:
#Printing top 7 recommendations
j = 0
print("Top 7 recommendations for " , title , " are: ")
for item in sorted_scores:
  movie_title = movies[movies.Rank == item[0]]['Title'].values[0]
  print(j+1, " ", movie_title, ' with' , round(item[1], 2)*100 , "% score" )
  j = j + 1
  if j > 6:
    break

Top 7 recommendations for  Sing  are: 
1   The Last Face  with 43.0 % score
2   Mommy  with 40.0 % score
3   Boyka: Undisputed IV  with 40.0 % score
4   Slumdog Millionaire  with 39.0 % score
5   Nocturnal Animals  with 37.0 % score
6   Atonement  with 37.0 % score
7   A Cure for Wellness  with 37.0 % score
