# **Types of Recommender System**
1. Content Based (used in this project)
2. Collabrative Filtering Based
3. Hybrid Based

# Import Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# Import Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

movies = pd.read_csv('/content/drive/MyDrive/tmdb_5000_movies.csv')
credits = pd.read_csv('/content/drive/MyDrive/tmdb_5000_credits.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
movies.shape
credits.shape

(4803, 4)

In [None]:
print(movies.columns)
credits.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')


Index(['movie_id', 'title', 'cast', 'crew'], dtype='object')

# Merge both datasets

In [None]:
movies = (movies.merge(credits, on='title'))
movies.shape

(4809, 23)

In [None]:
movies.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count', 'movie_id', 'cast', 'crew'],
      dtype='object')

# Separate Important columns

In [None]:
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
print(movies.shape)

(4809, 7)


In [None]:
movies = movies.dropna()
movies.shape

(4806, 7)

In [None]:
movies.isnull().sum()

Unnamed: 0,0
movie_id,0
title,0
overview,0
genres,0
keywords,0
cast,0
crew,0


In [None]:
movies.iloc[0]['movie_id']

19995

**literal_eval()**
a function that convert string into a list

In [None]:
import ast

def convert(text):
  if isinstance(text, list): # Check if text is already a list
    return text
  l = []
  for i in ast.literal_eval(text):
    l.append(i['name'])
  return l

# Assuming 'fetch_director' is meant to extract director's name from 'crew'
def fetch_director(text):
    if isinstance(text, list):
      return text
    l = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            l.append(i['name'])
    return l

# Convert genres, keywords, and cast columns
movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)

#Change for 'cast' column to handle lists
def convert_cast(text):
  if isinstance(text, list):
    return text
  else:
    l = []
    counter = 0
    for i in ast.literal_eval(text):
      if counter < 3:
        l.append(i['name'])
      counter += 1
    return l

movies['cast'] = movies['cast'].apply(convert_cast)

# Apply transformation to the crew column to fetch the director
movies['crew'] = movies['crew'].apply(fetch_director)

# Split the overview column
movies['overview'] = movies['overview'].apply(lambda x: x.split())

# Now print the updated DataFrame with the applied changes
movies.head(3)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[Sam Worthington, Zoe Saldana, Sigourney Weaver]",[James Cameron]
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[Johnny Depp, Orlando Bloom, Keira Knightley]",[Gore Verbinski]
2,206647,Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[Daniel Craig, Christoph Waltz, Léa Seydoux]",[Sam Mendes]


In [None]:
def remove_spaces(L):
  l = []
  for i in L:
    l.append(i.replace(" ", ""))
  return l

In [None]:
movies['cast'] = movies['cast'].apply(remove_spaces)
movies['crew'] = movies['crew'].apply(remove_spaces)
movies['overview'] = movies['overview'].apply(remove_spaces)
movies['genres'] = movies['genres'].apply(remove_spaces)
movies['keywords'] = movies['keywords'].apply(remove_spaces)
movies.head(2)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron]
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski]


In [None]:
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
movies.head(2)

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,tags
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],"[In, the, 22nd, century,, a, paraplegic, Marin..."
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],"[Captain, Barbossa,, long, believed, to, be, d..."


In [None]:
new_df = movies[['movie_id', 'title', 'tags']]
new_df.head(2)

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin..."
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d..."


In [None]:
new_df['tags'] = new_df['tags'].apply(lambda x: " ".join(x))
new_df.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x: " ".join(x))


Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha..."


In [None]:
new_df.iloc[0]['tags']

'In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. Action Adventure Fantasy ScienceFiction cultureclash future spacewar spacecolony society spacetravel futuristic romance space alien tribe alienplanet cgi marine soldier battle loveaffair antiwar powerrelations mindandsoul 3d SamWorthington ZoeSaldana SigourneyWeaver JamesCameron'

In [None]:
new_df['tags'] = new_df['tags'].apply(lambda x: x.lower())
new_df.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x: x.lower())


Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"in the 22nd century, a paraplegic marine is di..."
1,285,Pirates of the Caribbean: At World's End,"captain barbossa, long believed to be dead, ha..."


In [None]:
new_df.shape

(4806, 3)

In [None]:
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [None]:
def stems(text):
  l = []
  for i in text.split():
    l.append(ps.stem(i))
  return " ".join(l)

In [None]:
new_df['tags'] = new_df['tags'].apply(stems)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(stems)


In [None]:
new_df.iloc[0]['tags']

'in the 22nd century, a parapleg marin is dispatch to the moon pandora on a uniqu mission, but becom torn between follow order and protect an alien civilization. action adventur fantasi sciencefict cultureclash futur spacewar spacecoloni societi spacetravel futurist romanc space alien tribe alienplanet cgi marin soldier battl loveaffair antiwar powerrel mindandsoul 3d samworthington zoesaldana sigourneyweav jamescameron'

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words='english')

In [None]:
vector = cv.fit_transform(new_df['tags']).toarray()
vector.shape

(4806, 5000)

In [None]:
vector

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(vector)
similarity

array([[1.        , 0.08346223, 0.0860309 , ..., 0.04499213, 0.        ,
        0.        ],
       [0.08346223, 1.        , 0.06063391, ..., 0.02378257, 0.        ,
        0.02615329],
       [0.0860309 , 0.06063391, 1.        , ..., 0.02451452, 0.        ,
        0.        ],
       ...,
       [0.04499213, 0.02378257, 0.02451452, ..., 1.        , 0.03962144,
        0.04229549],
       [0.        , 0.        , 0.        , ..., 0.03962144, 1.        ,
        0.08714204],
       [0.        , 0.02615329, 0.        , ..., 0.04229549, 0.08714204,
        1.        ]])

In [None]:
similarity.shape

(4806, 4806)

In [None]:
new_df[new_df['title'] == 'The Lego Movie'].index[0]

744

In [None]:
def recommend(movie):
    index = new_df[new_df['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(new_df.iloc[i[0]].title)

In [None]:
recommend('Gandhi')

Gandhi, My Father
Guiana 1838
The Wind That Shakes the Barley
Mr. Turner
A Passage to India


In [143]:
import pickle
pickle.dump(new_df, open('movies.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))

In [144]:
from setuptools import setup, find_packages

with open('requirements.txt', 'w') as f:
    for package in ['numpy', 'pandas', 'scikit-learn', 'nltk']:
        f.write(f'{package}\n')

In [145]:
try:
  setup(
    name='movie_recommender_system',
    version='0.1',
    packages=find_packages(),
    install_requires=['numpy', 'pandas', 'scikit-learn', 'nltk'],
)
except SystemExit:
    print("SystemExit occurred. Likely due to -f option. Ignoring for now.")

SystemExit occurred. Likely due to -f option. Ignoring for now.


In [160]:
# app.py
import streamlit as st
import pickle

# Load pre-trained model and data
with open('movies.pkl', 'rb') as f:
    movies_df = pickle.load(f)
with open('similarity.pkl', 'rb') as f:
    similarity = pickle.load(f)

def recommend(movie_title):
    movie_index = movies_df[movies_df['title'] == movie_title].index[0]
    distances = sorted(list(enumerate(similarity[movie_index])), reverse=True, key=lambda x: x[1])
    recommended_movies = [movies_df.iloc[i[0]]['title'] for i in distances[1:6]]
    return recommended_movies

st.title('Movie Recommender System')
movie_title = st.text_input('Enter a movie title:')
if movie_title:
    recommendations = recommend(movie_title)
    st.write('Recommendations:')
    for rec in recommendations:
        st.write(rec)




In [162]:
from pyngrok import ngrok
import streamlit as st

# Set up ngrok with your authtoken
ngrok.set_auth_token("auth_token")  # Replace with your actual ngrok authtoken

# Create a tunnel for Streamlit (running on port 8501)
public_url = ngrok.connect(8501)
print(f"Streamlit app is live at: {public_url}")

# Run the Streamlit app
!streamlit run app.py


Streamlit app is live at: NgrokTunnel: "https://d791-34-86-127-10.ngrok-free.app" -> "http://localhost:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.86.127.10:8501[0m
[0m
[34m  Stopping...[0m
[34m  Stopping...[0m
