In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

items_final = pd.read_csv('items_enriched_final.csv')

In [2]:
# Preprocess the movie DataFrame
def preprocess_movies(df):
    # Create a combined column of relevant features for recommendations
    def extract_genres(row):
        # Filter columns for genres where value is 1
        return " ".join([col for col in df.columns[2:15] if row[col] == 1])

    df['combined_features'] = (
        df['title'] + " " +
        df['OMDb_Plot'].fillna('') + " " +
        df['OMDb_Year'].astype(str) + " " +
        df.apply(extract_genres, axis=1)  # Add genres dynamically based on values
    )
    return df

In [3]:
# Vectorize the combined features using TF-IDF
def vectorize_features(movie_data):
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(movie_data['combined_features'])
    return tfidf, tfidf_matrix

In [27]:
def recommend_movies(selected_ids, movie_data, tfidf_matrix, num_recommendations=10, filters=None):
    # Get the indices of the selected movies
    selected_indices = movie_data[movie_data['id'].isin(selected_ids)].index
    
    # Calculate the average similarity of the selected movies
    selected_tfidf = tfidf_matrix[selected_indices]
    similarity_scores = cosine_similarity(selected_tfidf, tfidf_matrix).mean(axis=0)
    
    # Get the most similar movies
    similar_indices = np.argsort(similarity_scores)[::-1]
    recommended = movie_data.iloc[similar_indices]
    
    # Apply additional filters if provided
    if filters:
        for key, condition in filters.items():
            if key in recommended.columns:
                if callable(condition):  # If the filter is a function (e.g., lambda)
                    recommended = recommended[recommended[key].apply(condition)]
                elif isinstance(condition, (list, tuple)):  # If it's a list of valid values
                    recommended = recommended[recommended[key].isin(condition)]
                else:  # If it's a single value
                    recommended = recommended[recommended[key] == condition]
            else:
                print(f"Warning: Filter key '{key}' not found in movie data columns. Skipping this filter.")
    
    # Exclude the already selected movies
    recommended = recommended[~recommended['id'].isin(selected_ids)]
    
    return recommended.head(num_recommendations)

In [28]:
# Preprocess the movie data
movies_preprocessed = preprocess_movies(items_final)

# Vectorize the movie features
tfidf, tfidf_matrix = vectorize_features(movies_preprocessed)

# Example recommendation: recommend movies similar to "Toy Story" (ID: 1) and more...
recommendation_example = recommend_movies([1, 71, 95, 102, 225], movies_preprocessed, tfidf_matrix)
recommendation_example

Unnamed: 0.1,Unnamed: 0,id,title,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,...,Thriller,War,Western,OMDb_Year,OMDb_Runtime,OMDb_Plot,OMDb_imdbVotes,OMDb_Poster,IMDB_Score,combined_features
397,420,422,Aladdin and the King of Thieves,0,0,1,1,1,0,0,...,0,0,0,1996,81,The King of Thieves disrupts Aladdin and Jasmi...,33562,https://m.media-amazon.com/images/M/MV5BODFkMj...,6.4,Aladdin and the King of Thieves The King of T...
928,991,993,Hercules,0,1,1,1,1,0,0,...,0,0,0,1997,93,The son of Zeus and Hera is stripped of his im...,261230,https://m.media-amazon.com/images/M/MV5BMmUyZG...,7.3,Hercules The son of Zeus and Hera is stripped...
1312,1407,1409,The Swan Princess,0,0,1,1,0,0,0,...,0,0,0,1994,90,A power-hungry sorcerer curses a princess to l...,27743,https://m.media-amazon.com/images/M/MV5BMjU3NG...,6.4,The Swan Princess A power-hungry sorcerer curs...
407,430,432,Fantasia,0,0,1,1,0,0,0,...,0,0,0,1940,124,A series of eight famous pieces of classical m...,106455,https://m.media-amazon.com/images/M/MV5BMmM1NG...,7.7,Fantasia A series of eight famous pieces of c...
834,892,894,Home Alone 3,0,0,0,1,1,0,0,...,0,0,0,1997,102,"Alex Pruitt, an 8-year-old boy living in Chica...",130470,https://m.media-amazon.com/images/M/MV5BNmI0Mj...,4.6,"Home Alone 3 Alex Pruitt, an 8-year-old boy l..."
100,102,103,All Dogs Go to Heaven 2,0,0,1,1,0,0,0,...,0,0,0,1996,82,Charlie and Itchy return to Earth to find Gabr...,6831,https://m.media-amazon.com/images/M/MV5BODZmOT...,5.5,All Dogs Go to Heaven 2 Charlie and Itchy ret...
550,586,588,Beauty and the Beast,0,0,1,1,0,0,0,...,0,0,0,1991,84,A prince cursed to spend his days as a hideous...,488205,https://m.media-amazon.com/images/M/MV5BYTY3ND...,8.0,Beauty and the Beast A prince cursed to spend...
96,98,99,Snow White and the Seven Dwarfs,0,0,1,1,0,0,0,...,0,0,0,1937,83,Exiled into the dangerous forest by her wicked...,220477,https://m.media-amazon.com/images/M/MV5BMTQwMz...,7.6,Snow White and the Seven Dwarfs Exiled into t...
382,402,404,Pinocchio,0,0,1,1,0,0,0,...,0,0,0,1940,88,"A living puppet, with the help of a cricket as...",163941,https://m.media-amazon.com/images/M/MV5BZGU3MT...,7.5,"Pinocchio A living puppet, with the help of a..."
558,594,596,The Hunchback of Notre Dame,0,0,1,1,0,0,0,...,0,0,0,1996,91,A deformed bell-ringer must assert his indepen...,175388,https://m.media-amazon.com/images/M/MV5BY2QzMG...,7.0,The Hunchback of Notre Dame A deformed bell-ri...


In [29]:
# Example recommendation: recommend movies similar to "Star wars" (ID: 50) and more...
recommendation_example2 = recommend_movies([50, 82, 89, 135, 204], movies_preprocessed, tfidf_matrix)
recommendation_example2

Unnamed: 0.1,Unnamed: 0,id,title,Action,Adventure,Animation,Childrens,Comedy,Crime,Documentary,...,Thriller,War,Western,OMDb_Year,OMDb_Runtime,OMDb_Plot,OMDb_imdbVotes,OMDb_Poster,IMDB_Score,combined_features
242,251,252,The Lost World: Jurassic Park,1,1,0,0,0,0,0,...,1,0,0,1997,129,A research team is sent to the Jurassic Park S...,453410,https://m.media-amazon.com/images/M/MV5BMGI3ND...,6.6,The Lost World: Jurassic Park A research team ...
167,171,172,The Empire Strikes Back,1,1,0,0,0,0,0,...,0,1,0,1980,124,After the Empire overpowers the Rebel Alliance...,1416078,https://m.media-amazon.com/images/M/MV5BMTkxNG...,8.7,The Empire Strikes Back After the Empire overp...
175,180,181,Return of the Jedi,1,1,0,0,0,0,0,...,0,1,0,1983,9,Deleted scenes from Star Wars: Episode VI - Re...,403,https://m.media-amazon.com/images/M/MV5BOWZlMj...,8.3,Return of the Jedi Deleted scenes from Star W...
1121,1194,1196,Savage Nights,0,0,0,0,0,0,0,...,0,0,0,1992,126,"Jean is 30 years old. He lives with Laura, 17 ...",2451,https://m.media-amazon.com/images/M/MV5BYTQ1Zj...,6.9,Savage Nights Jean is 30 years old. He lives ...
605,644,646,Once Upon a Time in the West,0,0,0,0,0,0,0,...,0,0,1,1968,166,A mysterious stranger with a harmonica joins f...,362269,https://m.media-amazon.com/images/M/MV5BZjYyNG...,8.5,Once Upon a Time in the West A mysterious str...
156,160,161,Top Gun,1,0,0,0,0,0,0,...,0,0,0,1986,109,The Top Gun Naval Fighter Weapons School is wh...,518456,https://m.media-amazon.com/images/M/MV5BZmVjNz...,6.9,Top Gun The Top Gun Naval Fighter Weapons Sch...
837,895,897,Time Tracers,1,1,0,0,0,0,0,...,0,0,0,1997,101,When the discovery of a five-thousand year old...,74,https://m.media-amazon.com/images/M/MV5BMjhkMz...,4.7,Time Tracers When the discovery of a five-tho...
428,451,453,Jaws 3/D,1,0,0,0,0,0,0,...,0,0,0,1983,99,A giant thirty-five-foot shark becomes trapped...,49591,https://m.media-amazon.com/images/M/MV5BYmY3YW...,3.7,Jaws 3/D A giant thirty-five-foot shark becom...
542,576,578,Demolition Man,1,0,0,0,0,0,0,...,0,0,0,1993,115,A police officer is brought out of suspended a...,198520,https://m.media-amazon.com/images/M/MV5BNWY3ZT...,6.7,Demolition Man A police officer is brought ou...
237,246,247,Turbo: A Power Rangers Movie,1,1,0,1,0,0,0,...,0,0,0,1997,99,A space alien is threatening to bring destruct...,10075,https://m.media-amazon.com/images/M/MV5BZGFlYT...,3.7,Turbo: A Power Rangers Movie A space alien is...


In [30]:
# Function to evaluate the recommendation system
def evaluate_recommendation_system(movie_data, tfidf_matrix, test_cases):
    precision_scores = []
    coverage_scores = []
    
    for case in test_cases:
        selected_ids = case['selected_ids']
        relevant_ids = case['relevant_ids']
        
        recommendations = recommend_movies(selected_ids, movie_data, tfidf_matrix, num_recommendations=10)
        recommended_ids = recommendations['id'].tolist()
        
        # Calculate precision
        relevant_and_recommended = set(recommended_ids).intersection(relevant_ids)
        precision = len(relevant_and_recommended) / len(recommended_ids) if recommended_ids else 0
        precision_scores.append(precision)
        
        # Calculate coverage
        coverage = len(relevant_and_recommended) / len(relevant_ids) if relevant_ids else 0
        coverage_scores.append(coverage)
    
    # Return average results
    return {
        'average_precision': np.mean(precision_scores),
        'average_coverage': np.mean(coverage_scores)
    }


In [31]:
# Evaluate the system (fictitious example)
test_cases = [
    {'selected_ids': [1, 71, 95, 102, 225], 'relevant_ids': [623, 542, 247, 812, 989, 266, 1470, 624, 1639, 894, 1066, 1271, 103, 1032, 548, 1015, 596, 418, 501, 99, 705, 965, 404, 625, 1076, 1412, 432, 588, 1219, 420]},  # Test case 1
    {'selected_ids': [50, 82, 89, 135, 204], 'relevant_ids': [73, 398, 546, 222, 223, 183, 897, 963, 636, 1037, 453, 446, 541, 141, 184, 1596, 28, 1411, 1063, 161, 646, 260, 247, 271, 959, 181, 1619, 1196, 457, 80]}   # Test case 2
]

evaluation_results = evaluate_recommendation_system(movies_preprocessed, tfidf_matrix, test_cases)

In [32]:
evaluation_results

{'average_precision': 0.7, 'average_coverage': 0.23333333333333334}