<a href="https://colab.research.google.com/github/aravindh1209/delivering-personalized-movie-recommendations-with-an-ai-driven-matchmaking-system/blob/main/project_aravindh.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from google.colab import files
uploaded = files.upload()
# Load the dataset
df = pd.read_csv("tmdb_5000_credits.csv")

# Function to extract 'name' fields from JSON-like strings
def extract_names(data):
    try:
        return [item['name'] for item in ast.literal_eval(data)]
    except (ValueError, SyntaxError):
        return []

# Extract cast and crew names
df['cast_names'] = df['cast'].apply(extract_names)
df['crew_names'] = df['crew'].apply(extract_names)

# Create a single combined text column of features
df['features'] = df['cast_names'].apply(lambda x: ' '.join(x)) + ' ' + df['crew_names'].apply(lambda x: ' '.join(x))

# Vectorize using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['features'])

# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Build a reverse map of movie titles to index
indices = pd.Series(df.index, index=df['title']).drop_duplicates()

# Recommendation function
def get_recommendations(title, top_n=10):
    if title not in indices:
        return f"Movie '{title}' not found in the dataset."

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices].tolist()

# Example usage
if __name__ == "__main__":
    movie_title = "Avatar"  # Change this to test other movies
    recommendations = get_recommendations(movie_title)
    print(f"Top recommendations for '{movie_title}':")
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec}")

Saving tmdb_5000_credits.csv to tmdb_5000_credits.csv
Top recommendations for 'Avatar':
1. Jurassic World
2. Titanic
3. The Hobbit: The Battle of the Five Armies
4. The Dark Knight Rises
5. The Hobbit: The Desolation of Smaug
6. 15 Minutes
7. Live Free or Die Hard
8. Dawn of the Planet of the Apes
9. The Adventures of Tintin
10. A.I. Artificial Intelligence
