<a href="https://colab.research.google.com/github/Zohaib-Sathio/Movie_Recommendation_Using_Python/blob/main/Movies_Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [121]:
#Import the libraries
import pandas as pd
import numpy as np

In [122]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [123]:
#Upload file from your machine
from google.colab import files
uploaded = files.upload()

In [124]:
#load the dataset
movies = pd.read_csv('IMDB-Movie-Data.csv')
movies.head(3)

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore
0,1,Guardians of the Galaxy,"Action,Adventure,Sci-Fi",A group of intergalactic criminals are forced ...,James Gunn,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",2014,121,8.1,757074,333.13,76.0
1,2,Prometheus,"Adventure,Mystery,Sci-Fi","Following clues to the origin of mankind, a te...",Ridley Scott,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",2012,124,7.0,485820,126.46,65.0
2,3,Split,"Horror,Thriller",Three girls are kidnapped by a man with a diag...,M. Night Shyamalan,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",2016,117,7.3,157606,138.12,62.0


In [125]:
#Select important columns
columns = ['Actors', 'Director', 'Genre', 'Title']

In [126]:
#Check for null values
movies[columns].isnull().values.any()

False

In [127]:
movies[columns].head(5)

Unnamed: 0,Actors,Director,Genre,Title
0,"Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S...",James Gunn,"Action,Adventure,Sci-Fi",Guardians of the Galaxy
1,"Noomi Rapace, Logan Marshall-Green, Michael Fa...",Ridley Scott,"Adventure,Mystery,Sci-Fi",Prometheus
2,"James McAvoy, Anya Taylor-Joy, Haley Lu Richar...",M. Night Shyamalan,"Horror,Thriller",Split
3,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",Christophe Lourdelet,"Animation,Comedy,Family",Sing
4,"Will Smith, Jared Leto, Margot Robbie, Viola D...",David Ayer,"Action,Adventure,Fantasy",Suicide Squad


In [128]:
# Define function to store the useful data into a list as a String
def get_important_features(data):
  important_features = []
  for i in range(0, data.shape[0]):
    important_features.append(movies['Actors'][i] + " " + movies['Director'][i] + " " + movies['Genre'][i] + " " + movies['Title'][i])
  return important_features

In [129]:
movies['important_features'] = get_important_features(movies)

In [130]:
movies['important_features'][0]

'Chris Pratt, Vin Diesel, Bradley Cooper, Zoe Saldana James Gunn Action,Adventure,Sci-Fi Guardians of the Galaxy'

In [131]:
# Get count of the words
cm = CountVectorizer().fit_transform(movies['important_features'])

In [132]:
# Determine the similarity between the movie word counts
cs = cosine_similarity(cm)

In [133]:
cs

array([[1.        , 0.1767767 , 0.06085806, ..., 0.0571662 , 0.06537205,
        0.        ],
       [0.1767767 , 1.        , 0.        , ..., 0.        , 0.06933752,
        0.        ],
       [0.06085806, 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.0571662 , 0.        , 0.        , ..., 1.        , 0.06726728,
        0.        ],
       [0.06537205, 0.06933752, 0.        , ..., 0.06726728, 1.        ,
        0.07161149],
       [0.        , 0.        , 0.        , ..., 0.        , 0.07161149,
        1.        ]])

In [134]:
cs.shape

(1000, 1000)

In [135]:
movies[movies.Title == "Sing"]

Unnamed: 0,Rank,Title,Genre,Description,Director,Actors,Year,Runtime (Minutes),Rating,Votes,Revenue (Millions),Metascore,important_features
3,4,Sing,"Animation,Comedy,Family","In a city of humanoid animals, a hustling thea...",Christophe Lourdelet,"Matthew McConaughey,Reese Witherspoon, Seth Ma...",2016,108,7.2,60545,270.32,59.0,"Matthew McConaughey,Reese Witherspoon, Seth Ma..."


In [136]:
title = 'Sing'

movie_id = movies[movies.Title == title]['Rank'].values[0]

In [137]:
scores = list(enumerate(cs[movie_id]))

In [138]:
sorted_scores = sorted(scores, key = lambda x : x[1], reverse = True)

In [139]:
sorted_scores = sorted_scores[1:]

In [140]:
# print(sorted_scores)

In [141]:
#Printing top 7 recommendations
j = 0
print("Top 7 recommendations for " , title , " are: ")
for item in sorted_scores:
  movie_title = movies[movies.Rank == item[0]]['Title'].values[0]
  print(j+1, " ", movie_title, ' with' , round(item[1], 2)*100 , "% score" )
  j = j + 1
  if j > 6:
    break

Top 7 recommendations for  Sing  are: 
1   King Cobra  with 37.0 % score
2   Safe Haven  with 28.000000000000004 % score
3   I Am Legend  with 25.0 % score
4   Bridge of Spies  with 24.0 % score
5   A Monster Calls  with 21.0 % score
6   Live by Night  with 21.0 % score
7   Prisoners  with 21.0 % score
