In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("movie_dataset.csv")

In [3]:
df.head()

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [4]:
import pandas as pd
import ast

# Convert string representation of lists/dicts to actual lists/dicts
df['cast'] = df['cast'].apply(ast.literal_eval)
df['crew'] = df['crew'].apply(ast.literal_eval)

In [None]:
# Extract important features from 'cast' and 'crew' columns (e.g., actors, directors)
df['actors'] = df['cast'].apply(lambda x: [actor['name'] for actor in x])
df['directors'] = df['crew'].apply(lambda x: [crew_member['name'] for crew_member in x if crew_member['job'] == 'Director'])

In [None]:
# Create a new 'features' column containing a combination of relevant features
df['features'] = df['actors'] + df['directors'] 
df['features'] = df['features'].apply(lambda x: ' '.join(x))  # Convert the list of features into a string
# Now df contains a new column 'features' which combines actors and directors' names

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Feature extraction using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['features'])
cosine_similarities = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [6]:
def get_recommendations(movie_title, cosine_similarities, df):
    movie_index = df[df['title'] == movie_title].index[0]
    similar_movies = list(enumerate(cosine_similarities[movie_index]))
    similar_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)
    recommended_movies = []
    for movie in similar_movies[1:11]: 
        recommended_movies.append(df.iloc[movie[0]]['title'])
    return recommended_movies

In [15]:
# Implementing the Model
input_movie = "Batman"
recommendations = get_recommendations(input_movie, cosine_similarities, df)
print("Recommended movies for {}: {}".format(input_movie, recommendations))

Recommended movies for Batman: ['Batman Returns', 'Corpse Bride', 'Superman IV: The Quest for Peace', 'Batman & Robin', 'Alpha and Omega: The Legend of the Saw Tooth Cave', 'Batman Forever', 'Some Like It Hot', 'I Dreamed of Africa', 'Shaft', 'Conan the Destroyer']


In [16]:
# Implementing the Model
input_movie = "Titanic"
recommendations = get_recommendations(input_movie, cosine_similarities, df)
print("Recommended movies for {}: {}".format(input_movie, recommendations))

Recommended movies for Titanic: ['Les Misérables', 'The Departed', 'Gangs of New York', 'The Bounty', 'Jason Bourne', 'Return of the Jedi', "The World's End", 'Becoming Jane', 'Terminator 2: Judgment Day', 'Star Trek']


In [28]:
# Implementing the Model
input_movie = "Men in Black"
recommendations = get_recommendations(input_movie, cosine_similarities, df)
print("Recommended movies for {}: {}".format(input_movie, recommendations))

Recommended movies for Men in Black: ['Men in Black II', 'Get Shorty', 'The Addams Family', 'Death to Smoochy', 'Big Trouble', 'The Oh in Ohio', 'Men in Black 3', "What's the Worst That Could Happen?", 'What Planet Are You From?', 'Rocky Balboa']
