In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import ast
import pickle

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
movies = pd.read_csv('data/tmdb_5000_movies.csv')
credits = pd.read_csv('data/tmdb_5000_credits.csv') 

movies = movies.merge(credits, on='title')
movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]
movies.dropna(inplace=True)


In [4]:
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [6]:
movies['genres'] = movies['genres'].apply(lambda x : [dict_['name'] for dict_ in ast.literal_eval(x)])
movies['keywords'] = movies['keywords'].apply(lambda x : [dict_['name'] for dict_ in ast.literal_eval(x)])
movies['cast'] = movies['cast'].apply(lambda x: [dict_['name'] for i, dict_ in enumerate(ast.literal_eval(x)) if i < 3])
movies['crew'] = movies['crew'].apply(lambda x: [dict_['name'] for dict_ in ast.literal_eval(x) if dict_['job'].lower() == 'director'])

movies['overview'] = movies['overview'].apply(lambda x:x.split())

In [5]:
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [7]:
# Remove whitespace between names
movies['cast'] = movies['cast'].apply(lambda x: [ele.replace(" ", "") for ele in x])
movies['crew'] = movies['crew'].apply(lambda x: [ele.replace(" ", "") for ele in x])
movies['genres'] = movies['genres'].apply(lambda x: [ele.replace(" ", "") for ele in x])

In [8]:
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
new = movies.drop(columns=['overview','genres','keywords','cast','crew'])
new['tags'] = new['tags'].apply(lambda x: " ".join(x))

In [9]:
new.head()

Unnamed: 0,movie_id,title,tags
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...
4,49529,John Carter,"John Carter is a war-weary, former military ca..."


In [10]:
import re
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
porter_stemmer=PorterStemmer()

def my_preprocessor(text):
    text=text.lower() 
    text=re.sub("\\W"," ",text) # remove special chars
    words = text.split()
    filtered_words = [word for word in words if word not in stop_words] # stop words
    stemmed_words=[porter_stemmer.stem(word=word) for word in filtered_words] #stemming
    return ' '.join(stemmed_words)

In [20]:
new.shape

(4806, 3)

In [15]:
cvectorizer = CountVectorizer(max_features=4500, preprocessor=my_preprocessor)
    
vector = cvectorizer.fit_transform(new['tags']).toarray()

In [19]:
vector.shape

(4806, 4500)

In [16]:
similarity = cosine_similarity(vector)

In [17]:
similarity = np.array(similarity, dtype=np.float16)

In [18]:
similarity.shape

(4806, 4806)

In [21]:
def recommend(movie):
    index = new[new['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for tup in distances[1:6]:
        print(new.iloc[tup[0]].title)

In [22]:
recommend('The Dark Knight Rises')

The Dark Knight
Batman Begins
Batman
Batman
Batman Returns


In [23]:
pickle.dump(similarity, open('similarity.pkl','wb'))

In [24]:
new[['movie_id', 'title']].to_csv("movie_info.csv", index=False)

In [30]:
from typing import List, Tuple, Dict, Any
import requests


In [31]:
class TMDBMovieDataProvider:
    def __init__(self, bearer_token: str):
        self.bearer_token = bearer_token
        self.base_url = "https://api.themoviedb.org/3"
        self.image_base_url = "http://image.tmdb.org/t/p/w500"
        self.headers = {
            "accept": "application/json",
            "Authorization": f"Bearer {bearer_token}"
        }
        self.session = requests.Session()
        self.session.headers.update(self.headers)

    def get_movie_data(self, movie_id: int) -> Dict[str, Any]:
        try:
            url = f"{self.base_url}/movie/{movie_id}?language=en-US"
            response = self.session.get(url, timeout=10)
            response.raise_for_status()  # Raise an exception for bad status codes
            return response.json()
        except requests.exceptions.RequestException as e:
            st.error(f"Error fetching movie data: {str(e)}")
            # Return a default movie data structure to prevent app crash
            return {
                'overview': 'Movie data unavailable',
                'release_date': 'Unknown',
                'vote_average': 0.0,
                'genres': [],
                'poster_path': None
            }

    def __del__(self):
        self.session.close()
data_provider = TMDBMovieDataProvider(
        bearer_token="eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJjZWYzMWJkOTZmOTdlNDFkNmUzNGIzYjAzMjkyZmFlNSIsIm5iZiI6MTY0NTk4NDIxOS44NzEsInN1YiI6IjYyMWJiOWRiODM5ZDkzMDAxYzVlNTU0YSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.s-rdtw3qtLm_tXUWkMtw1HIW0WP8-KeD2opeRm-sQP8"
    )

In [32]:
data_provider.get_movie_data(19995)

NameError: name 'st' is not defined

In [33]:
import requests

url = "https://api.themoviedb.org/3/movie/19995?language=en-US"

headers = {
    "accept": "application/json",
    "Authorization": "Bearer eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiJjZWYzMWJkOTZmOTdlNDFkNmUzNGIzYjAzMjkyZmFlNSIsIm5iZiI6MTY0NTk4NDIxOS44NzEsInN1YiI6IjYyMWJiOWRiODM5ZDkzMDAxYzVlNTU0YSIsInNjb3BlcyI6WyJhcGlfcmVhZCJdLCJ2ZXJzaW9uIjoxfQ.s-rdtw3qtLm_tXUWkMtw1HIW0WP8-KeD2opeRm-sQP8"
}

response = requests.get(url, headers=headers)

print(response.text)

ConnectionError: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))