## NetReco — Genre-Based Recommendation System

Design a recommendation system that suggests Netflix content based on user-selected genres and available watch time, reducing decision fatigue and improving user satisfaction.

In [88]:
import pandas as pd
import numpy as np

In [90]:
df = pd.read_csv("netflix_features.csv")
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,is_movie,duration_num,movie_duration,tv_seasons,num_genres,num_countries,year_added,content_age
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,unknown,United States,2021-09-25,2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",1,90.0,90.0,0.0,1,1,2021.0,1.0
1,s2,TV Show,Blood & Water,unknown,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",0,2.0,0.0,2.0,3,1,2021.0,0.0
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",unknown,2021-09-24,2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,0,1.0,0.0,1.0,3,1,2021.0,0.0
3,s4,TV Show,Jailbirds New Orleans,unknown,unknown,unknown,2021-09-24,2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",0,1.0,0.0,1.0,2,1,2021.0,0.0
4,s5,TV Show,Kota Factory,unknown,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,2021-09-24,2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,0,2.0,0.0,2.0,3,1,2021.0,0.0


#### STEP 1 - CLEANING AND PREPARING GENRE

In [43]:
#Covert genre to list
df['genre_list'] = df['listed_in'].str.split(',')

#Checking the safety
df[['title', 'listed_in', 'genre_list']].head()

Unnamed: 0,title,listed_in,genre_list
0,Dick Johnson Is Dead,Documentaries,[Documentaries]
1,Blood & Water,"International TV Shows, TV Dramas, TV Mysteries","[International TV Shows, TV Dramas, TV Myste..."
2,Ganglands,"Crime TV Shows, International TV Shows, TV Act...","[Crime TV Shows, International TV Shows, TV ..."
3,Jailbirds New Orleans,"Docuseries, Reality TV","[Docuseries, Reality TV]"
4,Kota Factory,"International TV Shows, Romantic TV Shows, TV ...","[International TV Shows, Romantic TV Shows, ..."


In [45]:
df[['type', 'genre_list', 'movie_duration', 'tv_seasons', 'content_age']].head()


Unnamed: 0,type,genre_list,movie_duration,tv_seasons,content_age
0,Movie,[Documentaries],90.0,0.0,1.0
1,TV Show,"[International TV Shows, TV Dramas, TV Myste...",0.0,2.0,0.0
2,TV Show,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0,0.0
3,TV Show,"[Docuseries, Reality TV]",0.0,1.0,0.0
4,TV Show,"[International TV Shows, Romantic TV Shows, ...",0.0,2.0,0.0


#### STEP 2 - TIME FILTERING

In [54]:
 """
 Filters content based on the user's available time.
    - Movies that fit within available_time
    - TV Shows with low season commitment (≤ 1 season)
"""
def filter_by_time(df, available_time):
    time_filtered_df = df[((df['type']=='Movie') & (df['movie_duration'] <= available_time)) | ((df['type']=='TV Show') & (df['tv_seasons'] <= 1))].copy()
    return time_filtered_df

In [59]:
#testing step2
available_time = 120 #min
time_filtered_df = filter_by_time(df, available_time)
time_filtered_df[['title', 'type', 'movie_duration', 'tv_seasons']].head(10)

Unnamed: 0,title,type,movie_duration,tv_seasons
0,Dick Johnson Is Dead,Movie,90.0,0.0
2,Ganglands,TV Show,0.0,1.0
3,Jailbirds New Orleans,TV Show,0.0,1.0
5,Midnight Mass,TV Show,0.0,1.0
6,My Little Pony: A New Generation,Movie,91.0,0.0
9,The Starling,Movie,104.0,0.0
10,"Vendetta: Truth, Lies and The Mafia",TV Show,0.0,1.0
11,Bangkok Breaking,TV Show,0.0,1.0
13,Confessions of an Invisible Girl,Movie,91.0,0.0
14,Crime Stories: India Detectives,TV Show,0.0,1.0


#### STEP 3 - Genre Similarity scoring

In [81]:
def compute_genre_score(genres, user_genres):
    if not isinstance(genres, list):
        return 0

    score = 0
    for user_genre in user_genres:
        for g in genres:
            if user_genre.lower() in g.lower():
                score += 1
                break  # avoid double counting

    return score / len(user_genres)


In [83]:
user_genres = ['Crime', 'Drama']

time_filtered_df['genre_score'] = time_filtered_df['genre_list'].apply(lambda x: compute_genre_score(x, user_genres))

time_filtered_df[['title', 'genre_list', 'genre_score']].sort_values(by='genre_score', ascending=False).head(10)


Unnamed: 0,title,genre_list,genre_score
6841,Get Shorty,"[Crime TV Shows, TV Comedies, TV Dramas]",1.0
5978,Şubat,"[Crime TV Shows, International TV Shows, TV ...",1.0
1585,Manhunt: Deadly Games,"[Crime TV Shows, TV Dramas, TV Mysteries]",1.0
2603,The Victims' Game,"[Crime TV Shows, International TV Shows, TV ...",1.0
260,The Defeated,"[Crime TV Shows, International TV Shows, TV ...",1.0
5321,Marvel's The Defenders,"[Crime TV Shows, TV Action & Adventure, TV D...",1.0
2602,The Forest of Love: Deep Cut,"[Crime TV Shows, International TV Shows, TV ...",1.0
4523,Blood Pact,"[Crime TV Shows, International TV Shows, TV ...",1.0
5324,20 Minutes,"[Crime TV Shows, International TV Shows, TV ...",1.0
273,Gone for Good,"[Crime TV Shows, International TV Shows, TV ...",1.0


#### STEP 4 - Create Freshness score

In [94]:
time_filtered_df['freshness_score'] = 1/(1 + time_filtered_df['content_age'])

In [96]:
time_filtered_df[['title', 'content_age', 'freshness_score']].head()

Unnamed: 0,title,content_age,freshness_score
0,Dick Johnson Is Dead,1.0,0.5
2,Ganglands,0.0,1.0
3,Jailbirds New Orleans,0.0,1.0
5,Midnight Mass,0.0,1.0
6,My Little Pony: A New Generation,0.0,1.0


#### STEP 5 - Final Recommendation Score

In [100]:
time_filtered_df['final_score'] = (0.7 * time_filtered_df['genre_score'] + 0.3 * time_filtered_df['freshness_score'])

In [102]:
#sanity check
time_filtered_df[['title', 'genre_score', 'freshness_score', 'final_score']].head()

Unnamed: 0,title,genre_score,freshness_score,final_score
0,Dick Johnson Is Dead,0.0,0.5,0.15
2,Ganglands,0.5,1.0,0.65
3,Jailbirds New Orleans,0.0,1.0,0.3
5,Midnight Mass,0.5,1.0,0.65
6,My Little Pony: A New Generation,0.0,1.0,0.3


In [104]:
final_recommendations = time_filtered_df[
    ['title', 'genre_list', 'content_age', 'final_score']
].sort_values(by='final_score', ascending=False)

final_recommendations.head(10)


Unnamed: 0,title,genre_list,content_age,final_score
3604,Sintonia,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
4251,In Family We Trust,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
2602,The Forest of Love: Deep Cut,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
2783,Freud,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
2522,Inhuman Resources,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
3940,Quicksand,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
2527,Sleepless Society: Two Pillows & A Lost Soul,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
2053,Young Wallander,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
2792,She,"[Crime TV Shows, International TV Shows, TV ...",0.0,1.0
4632,The Good Cop,"[Crime TV Shows, TV Comedies, TV Dramas]",0.0,1.0
