In [2]:
#Installation of necessary libraries
!pip install -q requests scikit-learn

#Import required packages
import requests
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import time


In [4]:
API_KEY = 'INSERT API KEY'
BASE_URL = 'https://api.themoviedb.org/3'

In [5]:
def get_max_korean_dramas(max_pages=500):  #we are using 100 pages worth of data which is ~2000 kdramas
    all_dramas = []

    for page in range(1, max_pages + 1):
        url = f'{BASE_URL}/discover/tv?api_key={API_KEY}&language=en-US&sort_by=popularity.desc&page={page}&with_original_language=ko'
        response = requests.get(url)

        if response.status_code != 200:
            print(f"Error on page {page}, status code {response.status_code}")
            break

        data = response.json()
        if 'results' not in data or not data['results']:
            print(f"Stopped at page {page}, no more data.")
            break

        for drama in data['results']:
            all_dramas.append({
                'title': drama['name'],
                'overview': drama.get('overview', ''),
                'popularity': drama['popularity'],
                'vote_average': drama['vote_average'],
                'genre_ids': drama['genre_ids'],
                'poster_path': f"https://image.tmdb.org/t/p/w500{drama['poster_path']}" if drama.get('poster_path') else None
            })

        #Sleep for stability
        time.sleep(0.2)

    return pd.DataFrame(all_dramas)

#Fetching all dramas
df_kdrama = get_max_korean_dramas(max_pages=500)
print(f"Total dramas fetched: {len(df_kdrama)}")


Stopped at page 487, no more data.
Total dramas fetched: 9717


In [13]:
#TMDb genre ID mappings for TV shows
genre_dict = {
    18: "Drama", 10749: "Romance", 35: "Comedy",
    10765: "Sci-Fi & Fantasy", 80: "Crime", 9648: "Mystery",
    10759: "Action & Adventure", 16: "Animation", 99: "Documentary"
}

#genre ID lists converted to human-readable strings
def map_genres(genre_ids):
    return ", ".join([genre_dict.get(i, "Other") for i in genre_ids])

df_kdrama['genres'] = df_kdrama['genre_ids'].apply(map_genres)


In [14]:
#Drop rows without overview text
df_kdrama.dropna(subset=['overview'], inplace=True)

#Combine genres and plot descriptions into one feature column
df_kdrama['features'] = df_kdrama['genres'] + " " + df_kdrama['overview']

#Vectorize features using TF-IDF
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_kdrama['features'])

#Compute cosine similarity between all dramas
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [15]:
#recommendation function
def recommend(title, cosine_sim=cosine_sim):
    # Find index of the drama matching the input title (case-insensitive)
    idx = df_kdrama[df_kdrama['title'].str.lower() == title.lower()].index
    if not idx.any():
        return "Drama not found in dataset."

    idx = idx[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6]
    drama_indices = [i[0] for i in sim_scores]

    return df_kdrama[['title', 'genres', 'poster_path']].iloc[drama_indices]


In [16]:
#Example usage
title_input = "Vincenzo"

print(f"Top recommendations for: {title_input}")
recommend(title_input)


Top recommendations for: Vincenzo


Unnamed: 0,title,genres,poster_path
279,Vincenzo,"Action & Adventure, Comedy, Drama",https://image.tmdb.org/t/p/w500/dvXJgEDQXhL9Ou...
5970,ONE,"Action & Adventure, Drama",
1422,When It's At Night,"Action & Adventure, Mystery, Drama, Comedy",https://image.tmdb.org/t/p/w500/cFLanX9q8DPN72...
3006,신의 구슬,Action & Adventure,
3582,The Defects,Action & Adventure,


In [17]:
#Example usage
title_input = "Twenty Five Twenty One"

print(f"Top recommendations for: {title_input}")
recommend(title_input)

Top recommendations for: Twenty Five Twenty One


Unnamed: 0,title,genres,poster_path
2067,"Yeah, That's How It Is","Drama, Other",https://image.tmdb.org/t/p/w500/6TyS0ZwWbk7rAr...
1520,My Golden Life,"Drama, Other",https://image.tmdb.org/t/p/w500/n04DUwIbWuvBrp...
7716,Can Love Be Refunded,,https://image.tmdb.org/t/p/w500/3c7HjtfNbUecAo...
3277,"Ready, Go!",Drama,
5086,Fabulous Neighbor,Drama,https://image.tmdb.org/t/p/w500/9o99VJBZa9BIM6...


In [18]:
#Example usage
title_input = "Alchemy of souls"

print(f"Top recommendations for: {title_input}")
recommend(title_input)

Top recommendations for: Alchemy of souls


Unnamed: 0,title,genres,poster_path
1214,The Haunted House,"Animation, Mystery, Sci-Fi & Fantasy, Action &...",https://image.tmdb.org/t/p/w500/vDmy9WeyfajqdL...
2507,지구용사 벡터맨,"Sci-Fi & Fantasy, Other, Action & Adventure",https://image.tmdb.org/t/p/w500/6O4J4dmpHTSpjH...
4482,돗가비,"Sci-Fi & Fantasy, Action & Adventure",
5150,녹색전차 해모수,"Sci-Fi & Fantasy, Action & Adventure",https://image.tmdb.org/t/p/w500/s1b19MSEPww1HU...
3742,Dino Powers,"Other, Animation, Sci-Fi & Fantasy, Action & A...",https://image.tmdb.org/t/p/w500/jZ74KoMBPfDnqk...


In [19]:
#Example usage
title_input = "When life gives you tangerines"

print(f"Top recommendations for: {title_input}")
recommend(title_input)

Top recommendations for: When life gives you tangerines


Unnamed: 0,title,genres,poster_path
3002,One Sunny Day,Comedy,https://image.tmdb.org/t/p/w500/6QpRfJxVs7ff3k...
8622,The Sorrow of Parting,,https://image.tmdb.org/t/p/w500/iVF2J17jqtO8Zu...
2392,To Be Continued,"Drama, Comedy, Sci-Fi & Fantasy",https://image.tmdb.org/t/p/w500/mVeiZaw8bd7om7...
4161,Anyway Thirty,"Comedy, Drama",https://image.tmdb.org/t/p/w500/1ov7oEgu0h0HqR...
2077,Blossom with Love,Other,https://image.tmdb.org/t/p/w500/t130G66O04tgs4...
