In [2]:
import os
import requests
import pandas as pd
import kaggle

In [3]:
kaggle.api.authenticate()

In [4]:
os.system('kaggle datasets download -d divyansh22/friends-tv-show-script --unzip --p "data/"')

Downloading friends-tv-show-script.zip to data


 60%|█████▉    | 1.00M/1.67M [00:00<00:00, 3.44MB/s]




100%|██████████| 1.67M/1.67M [00:01<00:00, 1.74MB/s]


0

In [22]:
# Read in the text file
with open("data/Friends_Transcript.txt", "r") as f:
    script = f.readlines()

# Define a function to extract the relevant information from each line of the script
def process_line(line):
    # Remove newline character
    line = line.strip()
    # Check if line contains scene information
    if line.startswith("[Scene:"):
        scene = line[8:-2]
        return ("", scene, "none", "")
    # Check if line contains title information
    elif line.startswith("THE ONE"):
        episode = line.split("(")[0].strip()
        return (episode, "", "none", "")
    # Check if line contains character information
    elif line.strip() and line[0].isupper() and ":" in line:
        character, dialogue = line.split(":", 1)
        return ("", "", character, dialogue.strip())
    else:
        return ("", "", "none", line)

# Process each line of the script and store the results in a list of tuples
lines = []
for line in script:
    lines.append(process_line(line))

# Convert the list of tuples to a Pandas DataFrame
friends_script = pd.DataFrame(lines, columns=["episode", "scene", "character", "line"])
friends_script


Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,,,Written by,Marta Kauffman & David Crane
2,,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
3,,,Monica,There's nothing to tell! He's just some guy I ...
4,,,Joey,"C'mon, you're going out with the guy! There's ..."
...,...,...,...,...
66535,,,Monica,We got some time.
66536,,,Rachel,"Okay, should we get some coffee?"
66537,,,Chandler,Sure. Where?
66538,,,none,(They all leave the apartment. Joey helps Chan...


In [27]:
# use the "fillna()" method to fill missing episode names with the previous value
friends_script.replace("", pd.NA, inplace=True)
friends_script["episode"].fillna(method="ffill", inplace=True)
friends_script


Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Joey,"C'mon, you're going out with the guy! There's ..."
...,...,...,...,...
66535,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,Monica,We got some time.
66536,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,Rachel,"Okay, should we get some coffee?"
66537,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,Chandler,Sure. Where?
66538,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,none,(They all leave the apartment. Joey helps Chan...


In [30]:
friends_script["scene"].fillna(method="ffill", inplace=True)
friends_script

Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ..."
...,...,...,...,...
66535,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Monica,We got some time.
66536,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Rachel,"Okay, should we get some coffee?"
66537,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Chandler,Sure. Where?
66538,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,none,(They all leave the apartment. Joey helps Chan...


In [32]:
friends_script[(friends_script["line"].isna())]

Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
124,THE ONE WHERE MONICA GETS A NEW ROOMATE,"The Subway, Phoebe is singing for change",none,
126,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Ross's Apartment, the guys are there assemblin...",none,
146,THE ONE WHERE MONICA GETS A NEW ROOMATE,"A Restaurant, Monica and Paul are eating",none,
...,...,...,...,...
66469,THE ONE WITH ROSS'S INAPPROPRIATE SONG,"Monica and Chandler's apartment. Joey, Chandle...",none,
66483,THE ONE WITH ROSS'S INAPPROPRIATE SONG,"Monica and Chandler's apartment. Joey, Chandle...",none,
66484,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Ross's apartment. Ross enters and checks his m...,none,
66510,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Ross's apartment. Ross enters and checks his m...,none,


In [37]:
friends_script.iloc[[0,1,2,123,124,125,126]]

Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
123,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,Commercial Break
124,THE ONE WHERE MONICA GETS A NEW ROOMATE,"The Subway, Phoebe is singing for change",none,
125,THE ONE WHERE MONICA GETS A NEW ROOMATE,"The Subway, Phoebe is singing for change",Phoebe,"(singing) Love is sweet as summer showers, lov..."
126,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Ross's Apartment, the guys are there assemblin...",none,


In [38]:
friends_script = friends_script.loc[(friends_script['character'] != 'none') & (friends_script['line'].notna())]
friends_script.head(50)

Unnamed: 0,episode,scene,character,line
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ..."
5,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"All right Joey, be nice. So does he have a hum..."
6,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Wait, does he eat chalk?"
8,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Just, 'cause, I don't want her to go through w..."
9,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,"Okay, everybody relax. This is not even a date..."
10,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,Sounds like a date to me.
12,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"Alright, so I'm back in high school, I'm stand..."
13,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",All,"Oh, yeah. Had that dream."
