<a href="https://colab.research.google.com/github/animesharma3/Movie-Recommendation/blob/main/Sigmoid_Kernel_based_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing necessary libraries

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel

import pickle

## Loading Dataset

In [None]:
url = 'https://raw.githubusercontent.com/animesharma3/Movie-Recommendation/main/movies_dataset.csv'
df = pd.read_csv(url)[:5001]
df.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,budget,genres,id,imdb_id,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,0,15480,160000000,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",27205,tt1375666,en,Inception,"Cobb, a skilled thief who commits corporate es...",29.108149,"[{'name': 'Legendary Pictures', 'id': 923}, {'...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2010-07-14,825532800.0,148.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Your mind is the scene of the crime.,Inception,8.1,14075.0
1,1,12481,185000000,"[{'id': 18, 'name': 'Drama'}, {'id': 28, 'name...",155,tt0468569,en,The Dark Knight,Batman raises the stakes in his war on crime. ...,123.167259,"[{'name': 'DC Comics', 'id': 429}, {'name': 'L...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2008-07-16,1004558000.0,152.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Why So Serious?,The Dark Knight,8.3,12269.0
2,2,14551,237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",19995,tt0499549,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",185.070892,"[{'name': 'Ingenious Film Partners', 'id': 289...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,2787965000.0,162.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Enter the World of Pandora.,Avatar,7.2,12114.0
3,3,17818,220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",24428,tt0848228,en,The Avengers,When an unexpected enemy emerges and threatens...,89.887648,"[{'name': 'Paramount Pictures', 'id': 4}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",2012-04-25,1519558000.0,143.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Some assembly required.,The Avengers,7.4,12000.0
4,4,26564,58000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",293660,tt1431045,en,Deadpool,Deadpool tells the origin story of former Spec...,187.860492,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",2016-02-09,783113000.0,108.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Witness the beginning of a happy ending,Deadpool,7.4,11444.0


## Delete unnecessary Columns

In [None]:
df.drop(df.columns[:2].values.tolist(), axis=1, inplace=True)

In [None]:
df.columns

Index(['budget', 'genres', 'id', 'imdb_id', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')

## Cleaning Text Data

In [None]:
df['tagline'].fillna('', inplace=True)
df['title'].fillna('', inplace=True)
df['overview'].fillna('', inplace=True)

In [None]:
df['text'] = df['tagline'] + df['title'] + df['overview']
df['text'].isna().sum()

0

In [None]:
tfidf = TfidfVectorizer(
            min_df=1, 
            max_features=None, 
            strip_accents='unicode', 
            analyzer='word',
            token_pattern=r'\w{1,}',
            ngram_range=(1, 3),
            stop_words='english'
        )

In [None]:
word_matrix = tfidf.fit_transform(df['text'])

In [None]:
word_matrix.shape

(5001, 315374)

## Model Building

In [None]:
sig = sigmoid_kernel(word_matrix, word_matrix)
sig

array([[0.76159549, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159416],
       [0.76159416, 0.76159549, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159417],
       [0.76159416, 0.76159416, 0.76159549, ..., 0.76159416, 0.76159416,
        0.76159416],
       ...,
       [0.76159416, 0.76159416, 0.76159416, ..., 0.76159549, 0.76159416,
        0.76159416],
       [0.76159416, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159549,
        0.76159416],
       [0.76159416, 0.76159417, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159549]])

In [None]:
sig.shape

(5001, 5001)

In [None]:
movie_indices = pd.Series(df.index, index=df['original_title'])

In [None]:
def recommend(title):
    idx = movie_indices[title]
    sig_scores = list(enumerate(sig[idx]))
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
    sig_scores = sig_scores[1: 11]
    indices = [i[0] for i in sig_scores]
    return df[['original_title', 'title']].iloc[indices]
recommend('Inception')

Unnamed: 0,original_title,title
4031,Cypher,Cypher
4195,Hollywoo,Hollywoo
3461,The Pink Panther,The Pink Panther
247,Mission: Impossible - Rogue Nation,Mission: Impossible - Rogue Nation
3836,What Ever Happened to Baby Jane?,What Ever Happened to Baby Jane?
4613,Frequencies,Frequencies
513,Pitch Perfect 2,Pitch Perfect 2
281,Hancock,Hancock
3743,Batman: Mask of the Phantasm,Batman: Mask of the Phantasm
1007,Once Upon a Time in America,Once Upon a Time in America


In [None]:
file = open('sig.sav', 'wb')
pickle.dump(sig, file)

In [None]:
pickle.load(open('sig.sav', 'rb'))

array([[0.76159549, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159416],
       [0.76159416, 0.76159549, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159417],
       [0.76159416, 0.76159416, 0.76159549, ..., 0.76159416, 0.76159416,
        0.76159416],
       ...,
       [0.76159416, 0.76159416, 0.76159416, ..., 0.76159549, 0.76159416,
        0.76159416],
       [0.76159416, 0.76159416, 0.76159416, ..., 0.76159416, 0.76159549,
        0.76159416],
       [0.76159416, 0.76159417, 0.76159416, ..., 0.76159416, 0.76159416,
        0.76159549]])