# 1. Importing Libraries

In [14]:
import polars as pl
import os
import joblib

# 2. Loading The Dataset & Define Paths For Files

In [12]:
DATA_PATH = "/content/netflix_mood_recommender_test_corrected.csv"
TFIDF_MATRIX_PATH = 'tfidf_matrix.joblib'
VECTORIZER_PATH = 'tfidf_vectorizer.joblib'

In [18]:
df = pl.read_csv(DATA_PATH)

#3. Process Features

In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer

if os.path.exists(VECTORIZER_PATH) and os.path.exists(TFIDF_MATRIX_PATH):
    tfidf = joblib.load(VECTORIZER_PATH)
    tfidf_matrix = joblib.load(TFIDF_MATRIX_PATH)
else:
    df = df.with_columns(
    (pl.col('mood') + " " + pl.col('description')).alias('combined_text')
    )

    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['combined_text'].to_list())

    # Save the vectorizer and matrix using Joblib
    joblib.dump(tfidf, VECTORIZER_PATH)
    joblib.dump(tfidf_matrix, TFIDF_MATRIX_PATH)

# 6. Function to recommend shows

In [25]:
import random
from sklearn.metrics.pairwise import cosine_similarity

def recommend_show(user_mood, num_recommendations, df=df, tfidf=tfidf, tfidf_matrix=tfidf_matrix):
    user_input_vector = tfidf.transform([user_mood])
    similarity_scores = cosine_similarity(user_input_vector, tfidf_matrix).flatten()

    # Add similarity column
    df = df.with_columns(
        pl.Series("similarity", similarity_scores)
    )

    # Filter shows matching the mood (case insensitive)
    user_mood_lower = user_mood.lower()
    filtered_df = df.filter(
        pl.col("mood").str.to_lowercase().str.contains(user_mood_lower)
    )

    # Sort by similarity and return top recommendations
    recommendations = filtered_df.sort("similarity",
                                       descending=True).head(num_recommendations)

    if len(recommendations) == 0:
        return f"No shows found for the mood '{user_mood}'."

    return recommendations["title"].to_list()

# Testing

In [26]:
user_mood = ", ".join(sorted("anger, surprise".split(", ")))
num_recommendations = 10

recommended_shows = recommend_show(user_mood, num_recommendations)
print(f"Top {num_recommendations} recommended shows for mood '{user_mood}':")
for show in recommended_shows:
    print(f"- {show}")

Top 10 recommended shows for mood 'anger, surprise':
- Motu Patlu VS Robo Kids
- Antariksha Ke Rakhwale
- Reaction
- Aussie Gold Hunters
- Welcome Mr. President
- Anjaan
- Show Me the Money
- Who Would You Take to a Deserted Island?
- Love Family
- All The Reasons To Forget


Top 10 recommended shows for mood 'anger, surprise':
- Motu Patlu VS Robo Kids
- Antariksha Ke Rakhwale
- Reaction
- Aussie Gold Hunters
- Welcome Mr. President
- Anjaan
- Show Me the Money
- Who Would You Take to a Deserted Island?
- Love Family
- All The Reasons To Forget

In [29]:
mood_counts = df.select(pl.col("mood").value_counts())

In [32]:
for row in mood_counts.iter_rows():
    mood = row[0]['mood']
    count = row[0]['count']
    print(f"{mood}: {count}")

joy, surprise: 1122
fear, joy: 296
anger, disgust: 1985
anger, surprise: 58
fear, sadness: 628
joy, sadness: 461
disgust, joy: 904
fear, surprise: 370
disgust, fear: 977
disgust, surprise: 135
disgust, sadness: 822
anger, joy: 174
anger, fear: 621
anger, sadness: 150
sadness, surprise: 106
