# **Content Based Model**

## 1. Importing Libraries

In [None]:
import polars as pl
import os
import joblib

## 2. Loading The Dataset & Define Paths For Files

In [None]:
DATA_PATH = "/content/new.csv"
TFIDF_MATRIX_PATH = 'tfidf_matrix.joblib'
VECTORIZER_PATH = 'tfidf_vectorizer.joblib'

In [None]:
df = pl.read_csv(DATA_PATH)

##3. Process Features

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

if os.path.exists(VECTORIZER_PATH) and os.path.exists(TFIDF_MATRIX_PATH):
    tfidf = joblib.load(VECTORIZER_PATH)
    tfidf_matrix = joblib.load(TFIDF_MATRIX_PATH)
else:
    df = df.with_columns(
    (pl.col('mood') + " " + pl.col('description')).alias('combined_text')
    )

    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['combined_text'].to_list())

    # Save the vectorizer and matrix using Joblib
    joblib.dump(tfidf, VECTORIZER_PATH)
    joblib.dump(tfidf_matrix, TFIDF_MATRIX_PATH)

## 4. Function to recommend shows

In [None]:
import random
from sklearn.metrics.pairwise import cosine_similarity

def recommend_show(user_mood, num_recommendations, df = df, tfidf = tfidf, tfidf_matrix = tfidf_matrix):
    user_input_vector = tfidf.transform([user_mood])
    similarity_scores = cosine_similarity(user_input_vector, tfidf_matrix).flatten()

    # Add similarity column
    df = df.with_columns(
        pl.Series("similarity", similarity_scores)
    )

    # Filter shows matching the mood (case insensitive)
    user_mood_lower = user_mood.lower()
    filtered_df = df.filter(
        pl.col("mood").str.to_lowercase().str.contains(user_mood_lower)
    )

    # Sort by similarity and return top recommendations
    recommendations = filtered_df.sort("similarity", descending=True).head(num_recommendations)

    if len(recommendations) == 0:
        return f"No shows found for the mood '{user_mood}'."

    return recommendations["title"].to_list()

#**Testing**

In [None]:
user_mood = "annoyed"
num_recommendations = 20

recommended_shows = recommend_show(user_mood, num_recommendations)
print(f"Top {num_recommendations} recommended shows for mood '{user_mood}':")
for show in recommended_shows:
    print(f"- {show}")

Top 20 recommended shows for mood 'annoyed':
- title
- mood
- description
- similarity


Top 10 recommended shows for mood 'anger, surprise':
- Motu Patlu VS Robo Kids
- Antariksha Ke Rakhwale
- Reaction
- Aussie Gold Hunters
- Welcome Mr. President
- Anjaan
- Show Me the Money
- Who Would You Take to a Deserted Island?
- Love Family
- All The Reasons To Forget

In [None]:
mood_counts = df.select(pl.col("mood").value_counts())

In [None]:
for row in mood_counts.iter_rows():
    mood = row[0]['mood']
    count = row[0]['count']
    print(f"{mood}: {count}")

lonely, surprise: 1
fear, sadness: 41
anger, annoyed: 15
excited, fear: 5
relaxed, tense: 13
relaxed, sadness: 88
disgust, sadness: 4
anger, romantic: 5
joy, romantic: 118
fear, scared: 16
annoyed, curious: 75
curious, sadness: 174
annoyed, joy: 13
annoyed, disgust: 7
anger, tense: 302
anger, scared: 10
disgust, tense: 9
fear, surprise: 3
excited, scared: 74
lonely, tense: 2
excited, romantic: 161
excited, joy: 450
anger, disgust: 4
surprise, tense: 56
excited, lonely: 2
annoyed, romantic: 47
surprised, tense: 2
curious, relaxed: 619
excited, surprise: 85
curious, romantic: 98
curious, scared: 62
annoyed, sadness: 79
anger, sadness: 138
annoyed, scared: 7
curious, excited: 444
excited, relaxed: 157
disgust, fear: 1
romantic, tense: 296
relaxed, romantic: 105
joy, scared: 7
joy, sadness: 109
annoyed, surprised: 1
sadness, tense: 460
romantic, surprise: 69
annoyed, relaxed: 77
curious, joy: 20
scared, surprise: 19
curious, surprised: 12
curious, sad: 3
anger, curious: 2
curious, lonely: 