In [15]:
import pandas as pd 
import numpy as np 
import re 

In [16]:
# Load the adaptation dataset
df_adaptations = pd.read_csv("data/combined/not-clean/combined_adaptations.csv")

# Load the combined kdrama and webtoon datasets
df_kdrama = pd.read_csv("data/combined/not-clean/drama_combined.csv")
df_webtoon = pd.read_csv("data/combined/not-clean/webtoon_combined.csv")

In [17]:
# Select desired columns from Korean drama and label "Kdrama" in front of each header
kdrama_cols = ['Title', 'Rating', 'Watchers', 'Genres', 'Synopsis', 'Cast', 'Tags']
df_kdrama_selected = df_kdrama[kdrama_cols].copy()
df_kdrama_selected = df_kdrama_selected.rename(columns=lambda x: f"Kdrama {x}" if x!= 'Title' else 'title')

# Select desired columns from webtoon data and label "WE" in front of each header
webtoon_cols = ['Title', 'Rating', 'Subscribers', 'Genre', 'Views', 'Creators']
df_webtoon_selected = df_webtoon[webtoon_cols].copy()
df_webtoon_selected = df_webtoon_selected.rename(columns=lambda x: f"WE {x}" if x != 'Title' else 'title')

In [18]:
# Merge with drama data
df_adapt_combined = df_adaptations.merge(df_kdrama_selected, on='title', how='left')

# Merge with webtoon data
df_adapt_combined = df_adapt_combined.merge(df_webtoon_selected, on='title', how='left')

In [19]:
df_adapt_combined

Unnamed: 0,title,year,episodes,0,Kdrama Rating,Kdrama Watchers,Kdrama Genres,Kdrama Synopsis,Kdrama Cast,Kdrama Tags,WE Rating,WE Subscribers,WE Genre,WE Views,WE Creators
0,Chicken Nugget,2024.0,10.0,,,,,,,,,,,,
1,A Superior Day,2022.0,8.0,,,,,Lee Ho Cheol is a regular firefighter. When ne...,"Cho Yu Ha, Ha Do Gwon, Han Yi Jin, Im Hwa Youn...",,,,,,
2,The Forbidden Marriage,2022.0,12.0,,,,,"Lee Heon is the king of Joseon. 7 years ago, w...","Choi Deok Moon, Kim Woo Seok, Kim Young Dae, P...",,8.76,52038.0,historical,495802.0,"Chun Ji hye, Sanchaek"
3,Gaus Electronics,2022.0,12.0,,8.5,,"Business, Comedy, Life, Romance",Stress is no stranger to the members of Market...,"Bae Hyun Sung, Baek Hyun Jin, Go Sung Hee, Heo...","Age Gap [Real Life], Heir Male Lead, Hot-tempe...",,,,,
4,See You in My 19th Life,2023.0,12.0,,8.4,,"Comedy, Drama, Fantasy, Romance",Ban Ji Eum has an extraordinary ability: she c...,"Ahn Bo Hyun, Ahn Dong Goo, Ha Yoon Kyung, Kim ...","Adapted From A Webtoon, Childhood Friends' Rel...",9.88,1423217.0,romance,96091538.0,"Lee Hey, Lee Hye"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,Oh Man Sang and Prejudice,,,,,,,,,,,,,,
110,Jinxed at First,2022.0,16.0,,,,,"A fishmonger at a traditional market, Gong Soo...","Cha Kwang Soo, Jeon Kwang Ryul, Ki Do Hoon, Na...",,,,,,
111,Bloodhounds,2023.0,8.0,,,,,When reserved rookie boxer Kim Geon Woo square...,"Heo Joon Ho, Jung Da Eun, Kim Sae Ron, Lee San...",,9.24,115128.0,action,2301171.0,Jeong Chan
112,A History of Losers,2025.0,8.0,,,,,,,,,,,,


In [27]:
required_columns = ['title', 'Kdrama Rating', 'Kdrama Genres', 'Kdrama Synopsis', 'Kdrama Tags']

df_adapt_combined_clean = df_adapt_combined.dropna(subset=required_columns).reset_index(drop=True)
df_adapt_combined_clean = df_adapt_combined_clean[required_columns]

In [28]:
print('Before:', len(df_adapt_combined))
print('After: ', len(df_adapt_combined_clean))

Before: 114
After:  28


In [25]:
df_adapt_combined_clean

Unnamed: 0,title,Kdrama Rating,Kdrama Genres,Kdrama Synopsis,Kdrama Cast,Kdrama Tags
0,Gaus Electronics,8.5,"Business, Comedy, Life, Romance",Stress is no stranger to the members of Market...,"Bae Hyun Sung, Baek Hyun Jin, Go Sung Hee, Heo...","Age Gap [Real Life], Heir Male Lead, Hot-tempe..."
1,See You in My 19th Life,8.4,"Comedy, Drama, Fantasy, Romance",Ban Ji Eum has an extraordinary ability: she c...,"Ahn Bo Hyun, Ahn Dong Goo, Ha Yoon Kyung, Kim ...","Adapted From A Webtoon, Childhood Friends' Rel..."
2,The 8 Show,7.6,"Drama, Mystery, Psychological, Thriller",Eight severely indebted people take part in a ...,"Chun Woo Hee, Lee Joo Young, Lee Yeol Eum, Par...","Adapted From A Webtoon, Death, Debt, Difficult..."
3,Divorce Attorney Shin,8.3,"Drama, Law","Meet Shin Sung Han, a divorce lawyer with a ta...","Cho Seung Woo, Han Hye Jin, Jeon Bae Soo, Jung...","Adapted From A Webtoon, Best Friends' Relation..."
4,Save Me,8.6,"Action, Drama, Mystery, Thriller",Following the failure of her father's business...,"Jo Jae Yoon, Jo Sung Ha, Ok Taec Yeon, Park Ji...","Adapted From A Webtoon, Bromance, Church, Cult..."
5,What's Wrong with Secretary Kim,8.5,"Business, Comedy, Friendship, Romance",The series revolves around the narcissistic Le...,"Kim Byung Ok, Kim Hye Ok, Lee Tae Hwan, Park M...","Adapted From A Webtoon, Boss-Employee Relation..."
6,Love Alarm,7.1,"Comedy, Drama, Romance, Youth",The cellphone app Love Alarm is created. If so...,"Go Min Si, Jung Ga Ram, Kim So Hyun, Kim Young...","Adapted From A Webtoon, Bromance, First Love, ..."
7,Yumi's Cells,8.5,"Comedy, Drama, Psychological, Romance","Controlled by a complex network of cells, each...","Ahn Bo Hyun, Joo Jong Hyuk, Kim Go Eun, Lee Yo...","Adapted From A Webtoon, Career Woman, Friendsh..."
8,D.P.,8.8,"Action, Comedy, Drama, Military",Private soldier Jun Ho is a confused youth who...,"Hong Kyung, Jo Hyun Chul, Jung Hae In, Kim Sun...","Abuse, Adapted From A Webtoon, Bromance, Deser..."
9,Navillera,9.0,"Drama, Family, Friendship, Life",A 70-year-old with a dream and a 23-year-old w...,"Hong Seung Hee, Kim Tae Hoon, Na Moon Hee, Par...","Adapted From A Webtoon, Ballet, Ballet Dancer,..."


In [26]:
df_adapt_combined_clean.to_csv("data/combined/clean/combined_adaptation_clean.csv", index=False)