In [5]:
import os
import requests
import pandas as pd
import kaggle
import numpy as np

In [6]:
kaggle.api.authenticate()

In [7]:
os.system('kaggle datasets download -d divyansh22/friends-tv-show-script --unzip --p "data/"')

Downloading friends-tv-show-script.zip to data



100%|██████████| 1.67M/1.67M [00:00<00:00, 14.7MB/s]


0

In [8]:
# Read in the text file
with open("data/Friends_Transcript.txt", "r") as f:
    script = f.readlines()

# Define a function to extract the relevant information from each line of the script
def process_line(line):
    # Remove newline character
    line = line.strip()
    # Check if line contains scene information
    if line.startswith("[Scene:"):
        scene = line[8:-2]
        return ("", scene, "none", "")
    # Check if line contains title information
    elif line.startswith("THE ONE"):
        episode = line.split("(")[0].strip()
        return (episode, "", "none", "")
    # Check if line contains character information
    elif line.strip() and line[0].isupper() and ":" in line:
        character, dialogue = line.split(":", 1)
        return ("", "", character, dialogue.strip())
    else:
        return ("", "", "none", line)

# Process each line of the script and store the results in a list of tuples
lines = []
for line in script:
    lines.append(process_line(line))

# Convert the list of tuples to a Pandas DataFrame
friends_script = pd.DataFrame(lines, columns=["episode", "scene", "character", "line"])
friends_script


Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,,,Written by,Marta Kauffman & David Crane
2,,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
3,,,Monica,There's nothing to tell! He's just some guy I ...
4,,,Joey,"C'mon, you're going out with the guy! There's ..."
...,...,...,...,...
66535,,,Monica,We got some time.
66536,,,Rachel,"Okay, should we get some coffee?"
66537,,,Chandler,Sure. Where?
66538,,,none,(They all leave the apartment. Joey helps Chan...


In [9]:
# use the "fillna()" method to fill missing episode names with the previous value
friends_script.replace("", pd.NA, inplace=True)
friends_script["episode"].fillna(method="ffill", inplace=True)
friends_script


Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Joey,"C'mon, you're going out with the guy! There's ..."
...,...,...,...,...
66535,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,Monica,We got some time.
66536,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,Rachel,"Okay, should we get some coffee?"
66537,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,Chandler,Sure. Where?
66538,THE ONE WITH ROSS'S INAPPROPRIATE SONG,,none,(They all leave the apartment. Joey helps Chan...


In [10]:
friends_script["scene"].fillna(method="ffill", inplace=True)
friends_script

Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ..."
...,...,...,...,...
66535,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Monica,We got some time.
66536,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Rachel,"Okay, should we get some coffee?"
66537,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Chandler,Sure. Where?
66538,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,none,(They all leave the apartment. Joey helps Chan...


In [11]:
friends_script[(friends_script["line"].isna())]

Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
124,THE ONE WHERE MONICA GETS A NEW ROOMATE,"The Subway, Phoebe is singing for change",none,
126,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Ross's Apartment, the guys are there assemblin...",none,
146,THE ONE WHERE MONICA GETS A NEW ROOMATE,"A Restaurant, Monica and Paul are eating",none,
...,...,...,...,...
66469,THE ONE WITH ROSS'S INAPPROPRIATE SONG,"Monica and Chandler's apartment. Joey, Chandle...",none,
66483,THE ONE WITH ROSS'S INAPPROPRIATE SONG,"Monica and Chandler's apartment. Joey, Chandle...",none,
66484,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Ross's apartment. Ross enters and checks his m...,none,
66510,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Ross's apartment. Ross enters and checks his m...,none,


In [12]:
friends_script.iloc[[0,1,2,123,124,125,126]]

Unnamed: 0,episode,scene,character,line
0,THE ONE WHERE MONICA GETS A NEW ROOMATE,,none,
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,
123,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",none,Commercial Break
124,THE ONE WHERE MONICA GETS A NEW ROOMATE,"The Subway, Phoebe is singing for change",none,
125,THE ONE WHERE MONICA GETS A NEW ROOMATE,"The Subway, Phoebe is singing for change",Phoebe,"(singing) Love is sweet as summer showers, lov..."
126,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Ross's Apartment, the guys are there assemblin...",none,


In [13]:
friends_script = friends_script.loc[(friends_script['character'] != 'none') & (friends_script['line'].notna())]
friends_script.head(50)

Unnamed: 0,episode,scene,character,line
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ..."
5,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"All right Joey, be nice. So does he have a hum..."
6,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Wait, does he eat chalk?"
8,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Just, 'cause, I don't want her to go through w..."
9,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,"Okay, everybody relax. This is not even a date..."
10,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,Sounds like a date to me.
12,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"Alright, so I'm back in high school, I'm stand..."
13,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",All,"Oh, yeah. Had that dream."


In [14]:
friends_script["character"].value_counts()

Rachel                 8587
Ross                   8334
Chandler               7763
Monica                 7711
Joey                   7624
                       ... 
Marge                     1
The "Hey Guy" Guy         1
CHAN, JOEY, ROSS          1
A Disembodied Voice       1
A Tourist                 1
Name: character, Length: 905, dtype: int64

Great! now we have a table that has every line by every character on every scene of every episode. However, the first 2 columns have a lot of the same data. Lets make a list of every scene per episode and another table of episodes on every season.

In [15]:
len(friends_script["episode"].unique())

181

In [16]:
friends_script["scene"].unique()

array([<NA>, 'Central Perk, Chandler, Joey, Phoebe, and Monica are there',
       'The Subway, Phoebe is singing for change', ...,
       "Monica and Chandler's apartment. Joey, Chandler, Monica and the twins are there. Everything has been put into boxes",
       "Ross's apartment. Ross enters and checks his messages",
       "Monica and Chandler's apartment. Chandler and Monica are holding the twins. Joey and Phoebe are sitting by the window, while Ross and Rachel are standing together. The apartment is completely empty. Two men are carrying a large dresser"],
      dtype=object)

In [17]:
f_scene_info =friends_script[["episode","scene"]]
f_scene_info.drop_duplicates(inplace=True)
f_scene_info.dropna(inplace=True)
f_scene_info.reset_index(inplace=True)
f_scene_info.drop("index", axis =1, inplace=True)
f_scene_info.reset_index(inplace=True)
f_scene_info.rename(columns={"index":"scene_number"}, inplace=True)
f_scene_info

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  f_scene_info.drop_duplicates(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  f_scene_info.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  f_scene_info.drop("index", axis =1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  f_scene_info.rename(columns={"index":"sce

Unnamed: 0,scene_number,episode,scene
0,0,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni..."
1,1,THE ONE WHERE MONICA GETS A NEW ROOMATE,"The Subway, Phoebe is singing for change"
2,2,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Ross's Apartment, the guys are there assemblin..."
3,3,THE ONE WHERE MONICA GETS A NEW ROOMATE,"A Restaurant, Monica and Paul are eating"
4,4,THE ONE WHERE MONICA GETS A NEW ROOMATE,Ross's Apartment; Ross is pacing while Joey an...
...,...,...,...
2966,2966,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Joey's apartment. Monica has completely destro...
2967,2967,THE ONE WITH ROSS'S INAPPROPRIATE SONG,The gate at the airport. The passengers are st...
2968,2968,THE ONE WITH ROSS'S INAPPROPRIATE SONG,"Monica and Chandler's apartment. Joey, Chandle..."
2969,2969,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Ross's apartment. Ross enters and checks his m...


Now that we have a list of scenes numbered and with the episode they belong to, lets do the same but for episodes and seasons. For this we will use the list of episodes from wikipedia.

In [18]:
seasons_url="https://en.wikipedia.org/wiki/List_of_Friends_episodes"
seasons_list = pd.read_html(seasons_url)
seasons_list

[         Season     Episodes                 Originally aired                \
          Season     Episodes   Episodes.1         First aired    Last aired   
 0             1           24           24  September 22, 1994  May 18, 1995   
 1             2           24           24  September 21, 1995  May 16, 1996   
 2             3           25           25  September 19, 1996  May 15, 1997   
 3             4           24           24  September 25, 1997   May 7, 1998   
 4             5           24           24  September 24, 1998  May 20, 1999   
 5             6           25           25  September 23, 1999  May 18, 2000   
 6             7           24           24    October 12, 2000  May 17, 2001   
 7             8           24           24  September 27, 2001  May 16, 2002   
 8             9           24           24  September 26, 2002  May 15, 2003   
 9            10           18           18  September 25, 2003   May 6, 2004   
 10  The Reunion  The Reunion  The Reuni

In [19]:
len(seasons_list)

17

In [20]:
seasons_list[2]
#Since this list has all the tables in the wikipedia site for friends, lets drop the tables we are not interested in

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),Rating(18–49)
0,25,1,"""The One with Ross's New Girlfriend""",Michael Lembeck,Jeffrey Astrof & Mike Sikowitz,"September 21, 1995",457301,32.1[14],
1,26,2,"""The One with the Breast Milk""",Michael Lembeck,Adam Chase & Ira Ungerleider,"September 28, 1995",457302,29.8[14],
2,27,3,"""The One Where Heckles Dies""",Kevin S. Bright,Michael Curtis & Gregory S. Malins,"October 5, 1995",457303,30.2[14],
3,28,4,"""The One with Phoebe's Husband""",Gail Mancuso,Alexa Junge,"October 12, 1995",457305,28.1[14],
4,29,5,"""The One with Five Steaks and an Eggplant""",Ellen Gittelsohn,Chris Brown,"October 19, 1995",457304,28.3[14],
5,30,6,"""The One with the Baby on the Bus""",Gail Mancuso,Betsy Borns,"November 2, 1995",457306,30.2[14],
6,31,7,"""The One Where Ross Finds Out""",Peter Bonerz,Michael Borkow,"November 9, 1995",457307,30.5[14],
7,32,8,"""The One with the List""",Mary Kay Place,David Crane & Marta Kauffman,"November 16, 1995",457308,32.9[14],
8,33,9,"""The One with Phoebe's Dad""",Kevin S. Bright,Jeffrey Astrof & Mike Sikowitz,"December 14, 1995",457309,27.8[14],
9,34,10,"""The One with Russ""",Thomas Schlamme,Ira Ungerleider,"January 4, 1996",457311,32.2[14],


In [21]:
f_seasons = []
for i in list(range(1,12)):
    f_seasons.append(seasons_list[i])
f_seasons

[    No.overall  No. inseason  \
 0            1             1   
 1            2             2   
 2            3             3   
 3            4             4   
 4            5             5   
 5            6             6   
 6            7             7   
 7            8             8   
 8            9             9   
 9           10            10   
 10          11            11   
 11          12            12   
 12          13            13   
 13          14            14   
 14          15            15   
 15        1617          1617   
 16          18            18   
 17          19            19   
 18          20            20   
 19          21            21   
 20          22            22   
 21          23            23   
 22          24            24   
 
                                                 Title      Directed by  \
 0   "Pilot"[a]""Monica Gets a Roommate""Where It A...    James Burrows   
 1              "The One with the Sonogram at the End"  

In [22]:
f_seasons[0]

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions)
0,1,1,"""Pilot""[a]""""Monica Gets a Roommate""""Where It A...",James Burrows,David Crane & Marta Kauffman,"September 22, 1994",475085,21.5[13]
1,2,2,"""The One with the Sonogram at the End""",James Burrows,David Crane & Marta Kauffman,"September 29, 1994",456652,20.2[13]
2,3,3,"""The One with the Thumb""",James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",456651,19.5[13]
3,4,4,"""The One with George Stephanopoulos""",James Burrows,Alexa Junge,"October 13, 1994",456654,19.7[13]
4,5,5,"""The One with the East German Laundry Detergent""",Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",456653,18.6[13]
5,6,6,"""The One with the Butt""",Arlene Sanford,Adam Chase & Ira Ungerleider,"October 27, 1994",456655,18.2[13]
6,7,7,"""The One with the Blackout""",James Burrows,Jeffrey Astrof & Mike Sikowitz,"November 3, 1994",456656,23.5[13]
7,8,8,"""The One Where Nana Dies Twice""",James Burrows,Marta Kauffman & David Crane,"November 10, 1994",456657,21.1[13]
8,9,9,"""The One Where Underdog Gets Away""",James Burrows,Jeff Greenstein & Jeff Strauss,"November 17, 1994",456659,23.1[13]
9,10,10,"""The One with the Monkey""",Peter Bonerz,Adam Chase & Ira Ungerleider,"December 15, 1994",456661,19.9[13]


In [23]:
#for i,df in enumerate(f_seasons):
#    df["season"] = f'{i+1}'

for i, df in enumerate(f_seasons):
    df["season"] = f'{i+1}'
    print(df.columns)

Index(['No.overall', 'No. inseason', 'Title', 'Directed by', 'Written by',
       'Original air date', 'Prod.code', 'U.S. viewers(millions)', 'season'],
      dtype='object')
Index(['No.overall', 'No. inseason', 'Title', 'Directed by', 'Written by',
       'Original air date', 'Prod.code', 'U.S. viewers(millions)',
       'Rating(18–49)', 'season'],
      dtype='object')
Index(['No.overall', 'No. inseason', 'Title', 'Directed by', 'Written by',
       'Original air date', 'Prod.code', 'U.S. viewers(millions)',
       'Rating/share(18–49)', 'season'],
      dtype='object')
Index(['No.overall', 'No. inseason', 'Title', 'Directed by', 'Written by',
       'Original air date', 'Prod.code', 'U.S. viewers(millions)',
       'Rating/share(18–49)', 'season'],
      dtype='object')
Index(['No.overall', 'No. inseason', 'Title', 'Directed by', 'Written by',
       'Original air date', 'Prod.code', 'U.S. viewers(millions)',
       'Rating/share(18–49)', 'season'],
      dtype='object')
Index(['No.

In [24]:
f_seasons[10]

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season
0,219,1,"""The One After Joey and Rachel Kiss""†",Kevin S. Bright,Andrew Reich & Ted Cohen,"September 25, 2003",176251,24.54[318],11
1,220,2,"""The One Where Ross Is Fine""",Ben Weiss,Sherry Bilsing-Graham & Ellen Plummer,"October 2, 2003",176252,22.37[319],11
2,221,3,"""The One with Ross's Tan""",Gary Halvorson,Brian Buckner,"October 9, 2003",176253,21.87[320],11
3,222,4,"""The One with the Cake""",Gary Halvorson,Robert Carlock,"October 23, 2003",176254,18.76[321],11
4,223,5,"""The One Where Rachel's Sister Babysits""†",Roger Christiansen,Dana Klein Borkow,"October 30, 2003",176255,19.37[322],11
5,224,6,"""The One with Ross's Grant""",Ben Weiss,Sebastian Jones,"November 6, 2003",176256,20.37[323],11
6,225,7,"""The One with the Home Study""",Kevin S. Bright,Mark Kunerth,"November 13, 2003",176257,20.21[324],11
7,226,8,"""The One with the Late Thanksgiving""",Gary Halvorson,Shana Goldberg-Meehan,"November 20, 2003",176259,20.66[325],11
8,227,9,"""The One with the Birth Mother""",David Schwimmer,Scott Silveri,"January 8, 2004",176258,25.48[326],11
9,228,10,"""The One Where Chandler Gets Caught""",Gary Halvorson,Doty Abrams,"January 15, 2004",176268,26.68[327],11


In [25]:
f_seasons[1].columns

Index(['No.overall', 'No. inseason', 'Title', 'Directed by', 'Written by',
       'Original air date', 'Prod.code', 'U.S. viewers(millions)',
       'Rating(18–49)', 'season'],
      dtype='object')

In [26]:
for i in range(len(f_seasons)):
    if 'season' in f_seasons[i].columns:
        print(f'yes in {i}')

yes in 0
yes in 1
yes in 2
yes in 3
yes in 4
yes in 5
yes in 6
yes in 7
yes in 8
yes in 9
yes in 10


In [27]:
f_seasons = pd.concat(f_seasons, ignore_index=True)
f_seasons

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1.0,1.0,"""Pilot""[a]""""Monica Gets a Roommate""""Where It A...",James Burrows,David Crane & Marta Kauffman,"September 22, 1994",4.750850e+05,21.5[13],1,,,,
1,2.0,2.0,"""The One with the Sonogram at the End""",James Burrows,David Crane & Marta Kauffman,"September 29, 1994",4.566520e+05,20.2[13],1,,,,
2,3.0,3.0,"""The One with the Thumb""",James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",4.566510e+05,19.5[13],1,,,,
3,4.0,4.0,"""The One with George Stephanopoulos""",James Burrows,Alexa Junge,"October 13, 1994",4.566540e+05,19.7[13],1,,,,
4,5.0,5.0,"""The One with the East German Laundry Detergent""",Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",4.566530e+05,18.6[13],1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,231.0,13.0,"""The One Where Joey Speaks French""",Gary Halvorson,Sherry Bilsing-Graham & Ellen Plummer,"February 19, 2004",1.762610e+05,24.27[330],11,,,,
223,232.0,14.0,"""The One with Princess Consuela""",Gary Halvorson,Story by : Robert CarlockTeleplay by : Tracy R...,"February 26, 2004",1.762630e+05,22.82[331],11,,,,
224,233.0,15.0,"""The One Where Estelle Dies""",Gary Halvorson,Story by : Mark KunerthTeleplay by : David Cra...,"April 22, 2004",1.762640e+05,22.64[332],11,,,,
225,234.0,16.0,"""The One with Rachel's Going Away Party""",Gary Halvorson,Andrew Reich & Ted Cohen,"April 29, 2004",1.762650e+05,24.51[333],11,,,,


In [28]:
f_seasons["No.overall"] = f_seasons["No.overall"].fillna(-1)
f_seasons["No.overall"] = f_seasons["No.overall"].astype(int)
f_seasons["No. inseason"] = f_seasons["No. inseason"].fillna(-1)
f_seasons["No. inseason"] = f_seasons["No. inseason"].astype(int)
f_seasons

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1,1,"""Pilot""[a]""""Monica Gets a Roommate""""Where It A...",James Burrows,David Crane & Marta Kauffman,"September 22, 1994",4.750850e+05,21.5[13],1,,,,
1,2,2,"""The One with the Sonogram at the End""",James Burrows,David Crane & Marta Kauffman,"September 29, 1994",4.566520e+05,20.2[13],1,,,,
2,3,3,"""The One with the Thumb""",James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",4.566510e+05,19.5[13],1,,,,
3,4,4,"""The One with George Stephanopoulos""",James Burrows,Alexa Junge,"October 13, 1994",4.566540e+05,19.7[13],1,,,,
4,5,5,"""The One with the East German Laundry Detergent""",Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",4.566530e+05,18.6[13],1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,231,13,"""The One Where Joey Speaks French""",Gary Halvorson,Sherry Bilsing-Graham & Ellen Plummer,"February 19, 2004",1.762610e+05,24.27[330],11,,,,
223,232,14,"""The One with Princess Consuela""",Gary Halvorson,Story by : Robert CarlockTeleplay by : Tracy R...,"February 26, 2004",1.762630e+05,22.82[331],11,,,,
224,233,15,"""The One Where Estelle Dies""",Gary Halvorson,Story by : Mark KunerthTeleplay by : David Cra...,"April 22, 2004",1.762640e+05,22.64[332],11,,,,
225,234,16,"""The One with Rachel's Going Away Party""",Gary Halvorson,Andrew Reich & Ted Cohen,"April 29, 2004",1.762650e+05,24.51[333],11,,,,


In [29]:
f_seasons[f_seasons["No. inseason"]==-1]

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
163,-1,-1,"""Friends: The Stuff You've Never Seen""",,,"February 15, 2001",,,8,,11.6/27[236],S01,22.50[235]


In [30]:
import re
f_seasons = f_seasons.applymap(lambda x: x if pd.isnull(x) else re.sub(r'\[.*?\]', '', str(x)))
f_seasons

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1,1,"""Pilot""""""Monica Gets a Roommate""""Where It All ...",James Burrows,David Crane & Marta Kauffman,"September 22, 1994",475085.0,21.5,1,,,,
1,2,2,"""The One with the Sonogram at the End""",James Burrows,David Crane & Marta Kauffman,"September 29, 1994",456652.0,20.2,1,,,,
2,3,3,"""The One with the Thumb""",James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",456651.0,19.5,1,,,,
3,4,4,"""The One with George Stephanopoulos""",James Burrows,Alexa Junge,"October 13, 1994",456654.0,19.7,1,,,,
4,5,5,"""The One with the East German Laundry Detergent""",Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",456653.0,18.6,1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,231,13,"""The One Where Joey Speaks French""",Gary Halvorson,Sherry Bilsing-Graham & Ellen Plummer,"February 19, 2004",176261.0,24.27,11,,,,
223,232,14,"""The One with Princess Consuela""",Gary Halvorson,Story by : Robert CarlockTeleplay by : Tracy R...,"February 26, 2004",176263.0,22.82,11,,,,
224,233,15,"""The One Where Estelle Dies""",Gary Halvorson,Story by : Mark KunerthTeleplay by : David Cra...,"April 22, 2004",176264.0,22.64,11,,,,
225,234,16,"""The One with Rachel's Going Away Party""",Gary Halvorson,Andrew Reich & Ted Cohen,"April 29, 2004",176265.0,24.51,11,,,,


Now lets modify the episode names so that we can match them

In [31]:
f_seasons['Title'] = f_seasons['Title'].str.replace('"', '')
f_seasons

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1,1,PilotMonica Gets a RoommateWhere It All BeganT...,James Burrows,David Crane & Marta Kauffman,"September 22, 1994",475085.0,21.5,1,,,,
1,2,2,The One with the Sonogram at the End,James Burrows,David Crane & Marta Kauffman,"September 29, 1994",456652.0,20.2,1,,,,
2,3,3,The One with the Thumb,James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",456651.0,19.5,1,,,,
3,4,4,The One with George Stephanopoulos,James Burrows,Alexa Junge,"October 13, 1994",456654.0,19.7,1,,,,
4,5,5,The One with the East German Laundry Detergent,Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",456653.0,18.6,1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,231,13,The One Where Joey Speaks French,Gary Halvorson,Sherry Bilsing-Graham & Ellen Plummer,"February 19, 2004",176261.0,24.27,11,,,,
223,232,14,The One with Princess Consuela,Gary Halvorson,Story by : Robert CarlockTeleplay by : Tracy R...,"February 26, 2004",176263.0,22.82,11,,,,
224,233,15,The One Where Estelle Dies,Gary Halvorson,Story by : Mark KunerthTeleplay by : David Cra...,"April 22, 2004",176264.0,22.64,11,,,,
225,234,16,The One with Rachel's Going Away Party,Gary Halvorson,Andrew Reich & Ted Cohen,"April 29, 2004",176265.0,24.51,11,,,,


In [32]:
f_seasons.loc[0, 'Title'] = 'The One Where Monica Gets A New Roommate'
f_seasons

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1,1,The One Where Monica Gets A New Roommate,James Burrows,David Crane & Marta Kauffman,"September 22, 1994",475085.0,21.5,1,,,,
1,2,2,The One with the Sonogram at the End,James Burrows,David Crane & Marta Kauffman,"September 29, 1994",456652.0,20.2,1,,,,
2,3,3,The One with the Thumb,James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",456651.0,19.5,1,,,,
3,4,4,The One with George Stephanopoulos,James Burrows,Alexa Junge,"October 13, 1994",456654.0,19.7,1,,,,
4,5,5,The One with the East German Laundry Detergent,Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",456653.0,18.6,1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,231,13,The One Where Joey Speaks French,Gary Halvorson,Sherry Bilsing-Graham & Ellen Plummer,"February 19, 2004",176261.0,24.27,11,,,,
223,232,14,The One with Princess Consuela,Gary Halvorson,Story by : Robert CarlockTeleplay by : Tracy R...,"February 26, 2004",176263.0,22.82,11,,,,
224,233,15,The One Where Estelle Dies,Gary Halvorson,Story by : Mark KunerthTeleplay by : David Cra...,"April 22, 2004",176264.0,22.64,11,,,,
225,234,16,The One with Rachel's Going Away Party,Gary Halvorson,Andrew Reich & Ted Cohen,"April 29, 2004",176265.0,24.51,11,,,,


In [33]:
f_scene_info["episode"] = f_scene_info["episode"].str.title()
f_scene_info.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  f_scene_info["episode"] = f_scene_info["episode"].str.title()


Unnamed: 0,scene_number,episode,scene
0,0,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni..."
1,1,The One Where Monica Gets A New Roomate,"The Subway, Phoebe is singing for change"
2,2,The One Where Monica Gets A New Roomate,"Ross's Apartment, the guys are there assemblin..."
3,3,The One Where Monica Gets A New Roomate,"A Restaurant, Monica and Paul are eating"
4,4,The One Where Monica Gets A New Roomate,Ross's Apartment; Ross is pacing while Joey an...


In [34]:
f_seasons["Title"] = f_seasons["Title"].str.title()
f_seasons.head(5)

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1,1,The One Where Monica Gets A New Roommate,James Burrows,David Crane & Marta Kauffman,"September 22, 1994",475085.0,21.5,1,,,,
1,2,2,The One With The Sonogram At The End,James Burrows,David Crane & Marta Kauffman,"September 29, 1994",456652.0,20.2,1,,,,
2,3,3,The One With The Thumb,James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",456651.0,19.5,1,,,,
3,4,4,The One With George Stephanopoulos,James Burrows,Alexa Junge,"October 13, 1994",456654.0,19.7,1,,,,
4,5,5,The One With The East German Laundry Detergent,Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",456653.0,18.6,1,,,,


In [35]:
# Merge the data frames based on the common column "episode"
merged_df = pd.merge(f_scene_info, f_seasons[['No.overall', 'Title', 'season']], 
                     left_on='episode', right_on='Title', how='left')

# Replace the "episode" column with the "No.overall" column where it is available
merged_df['episode'] = merged_df['No.overall'].fillna(merged_df['episode'])

# Drop unnecessary columns
merged_df.drop(['No.overall', 'Title'], axis=1, inplace=True)
merged_df

# Assign the modified data frame back to f_scene_info
#f_scene_info = merged_df

Unnamed: 0,scene_number,episode,scene,season
0,0,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",
1,1,The One Where Monica Gets A New Roomate,"The Subway, Phoebe is singing for change",
2,2,The One Where Monica Gets A New Roomate,"Ross's Apartment, the guys are there assemblin...",
3,3,The One Where Monica Gets A New Roomate,"A Restaurant, Monica and Paul are eating",
4,4,The One Where Monica Gets A New Roomate,Ross's Apartment; Ross is pacing while Joey an...,
...,...,...,...,...
2966,2966,201,Joey's apartment. Monica has completely destro...,10
2967,2967,201,The gate at the airport. The passengers are st...,10
2968,2968,201,"Monica and Chandler's apartment. Joey, Chandle...",10
2969,2969,201,Ross's apartment. Ross enters and checks his m...,10


In [36]:
merged_df["episode"].unique()

array(['The One Where Monica Gets A New Roomate', '2', '3', '4', '5', '6',
       '7', '8', '9', '10', '11', '12', '13', '14', '15',
       'The One With Two Parts, Part 1', 'The One With Two Parts, Part 2',
       '18', '19', '20', '21', '22', '23', '24',
       "The One With Ross' New Girlfriend", '26',
       'The One Where Mr. Heckles Dies', '29', '31',
       'The One With The Last', '33', '34', '35', '38', '39', '40', '48',
       '49', "The One Where No-One'S Ready", '51', '52', '53', '54', '55',
       '56', '57', '58', '59', '60',
       'The One Where Monica And Richard Are Friends', '62', '63',
       'The One The Morning After', '65', '66', '67', '68',
       'The One With A Chick. And A Duck', '70', '71', '72', '73', '74',
       '75', '76', '77', '78', '79',
       'The One Where Chandler Crosses A Line', '81',
       "The One Where They'Re Gonna Party!", '83',
       'The One With Phoebes Uterus', '85', 'The One With Rachels Crush',
       'The One With Joeys Dirty Day',

In [37]:
# We can see that many episodes are not matched because of differences in spelling. I will have to go through them one by one

In [38]:
merged_df['episode'] = merged_df['episode'].replace('The One Where Monica Gets A New Roomate', '1')
merged_df['episode'] = merged_df['episode'].replace('The One With Two Parts, Part 1', '16')
merged_df['episode'] = merged_df['episode'].replace('The One With Two Parts, Part 2', '17')
merged_df['episode'] = merged_df['episode'].replace("The One With Ross' New Girlfriend",'25' )
merged_df['episode'] = merged_df['episode'].replace('The One Where Mr. Heckles Dies', '27')
merged_df['episode'] = merged_df['episode'].replace('The One With The Last', '32')
merged_df['episode'] = merged_df['episode'].replace("The One Where No-One'S Ready", '50')
merged_df['episode'] = merged_df['episode'].replace('The One Where Monica And Richard Are Friends', '13')
merged_df['episode'] = merged_df['episode'].replace('The One The Morning After', '64')
merged_df['episode'] = merged_df['episode'].replace("The One With A Chick. And A Duck", '69')
merged_df['episode'] = merged_df['episode'].replace("The One Where Chandler Crosses A Line", '80')
merged_df['episode'] = merged_df['episode'].replace("The One Where They'Re Gonna Party!", '82')
merged_df['episode'] = merged_df['episode'].replace("The One With Phoebes Uterus", '84')
merged_df['episode'] = merged_df['episode'].replace("The One With Rachels Crush", '86')
merged_df['episode'] = merged_df['episode'].replace("The One With Joeys Dirty Day", '87')
merged_df['episode'] = merged_df['episode'].replace("The One With Ross'S Wedding Parts I And Ii", '96')
merged_df['episode'] = merged_df['episode'].replace("The One With Ross'S Wedding - Uncut Version", '97')
merged_df['episode'] = merged_df['episode'].replace("The One Hundredth", '100')
merged_df['episode'] = merged_df['episode'].replace("The One With All The Kips", '102')
merged_df['episode'] = merged_df['episode'].replace("The One With The Thanksgiving Flashbacks", '105')
merged_df['episode'] = merged_df['episode'].replace("The One Where Everyone Finds Out", '111')
merged_df['episode'] = merged_df['episode'].replace("The One With A Cop", '113')
merged_df['episode'] = merged_df['episode'].replace("The One With Rachel'S Inadvertant Kiss", '114')
merged_df['episode'] = merged_df['episode'].replace("The One With The Ride Along", '117')
merged_df['episode'] = merged_df['episode'].replace("The One With Rosss Denial", '124')
merged_df['episode'] = merged_df['episode'].replace("The One With Joeys Porsche", '126')
merged_df['episode'] = merged_df['episode'].replace("The One The Last Night", '127')
merged_df['episode'] = merged_df['episode'].replace("The One With Rachels Sister", '134')
merged_df['episode'] = merged_df['episode'].replace("The One Where Chandler Cant Cry", '135')
merged_df['episode'] = merged_df['episode'].replace("The One With The Unagi", '138')
merged_df['episode'] = merged_df['episode'].replace("The One With Joeys Fridge", '140')
merged_df['episode'] = merged_df['episode'].replace("The One Where Ross Meets Elizabeths Dad", '142')
merged_df['episode'] = merged_df['episode'].replace("The One Where Pauls The Man", '143')
merged_df['episode'] = merged_df['episode'].replace("The One With Monicas Thunder", '147')
merged_df['episode'] = merged_df['episode'].replace("The One With Rachels Book", '148')
merged_df['episode'] = merged_df['episode'].replace("The One With Phoebes Cookies", '149')
merged_df['episode'] = merged_df['episode'].replace("The One With Rachels Assistant", '150')
merged_df['episode'] = merged_df['episode'].replace("The One With Rosss Book", '153')
merged_df['episode'] = merged_df['episode'].replace("The One Where Chandler Doesnt Like Dogs", '154')
merged_df['episode'] = merged_df['episode'].replace("The One With All The Cheesecake", '157')
merged_df['episode'] = merged_df['episode'].replace("The One Where Theyre Up All Night", '158')
merged_df['episode'] = merged_df['episode'].replace("The One Where Rosita Dies", '159')
merged_df['episode'] = merged_df['episode'].replace("The One Where They All Turn Thirty", '160')
merged_df['episode'] = merged_df['episode'].replace("The One With Joeys New Brain", '161')
merged_df['episode'] = merged_df['episode'].replace("The One With The Truth About London", '162')
merged_df['episode'] = merged_df['episode'].replace("The One With Joeys Award", '164')
merged_df['episode'] = merged_df['episode'].replace("The One With Ross And Monicas Cousin", '165')
merged_df['episode'] = merged_df['episode'].replace("The One With Rachels Big Kiss", '166')
merged_df['episode'] = merged_df['episode'].replace("The One With Chandlers Dad", '168')
merged_df['episode'] = merged_df['episode'].replace("The One With Chandler And Monicas Wedding", '169')
merged_df['episode'] = merged_df['episode'].replace('The One After "I Do"', '171')
merged_df['episode'] = merged_df['episode'].replace("The One With Monicas Boots", '180')
merged_df['episode'] = merged_df['episode'].replace("The One With Ross' Big Step Forward", '181')
merged_df['episode'] = merged_df['episode'].replace("The One In Massapequa", '188')
merged_df['episode'] = merged_df['episode'].replace("The One With Joeys Interview", '189')


In [39]:
# now lets re- match the seasons
# Merge the data frames based on the common column "episode"
merged_df.drop(['season'], axis=1, inplace=True)
merged_df = pd.merge(merged_df, f_seasons[['No.overall', 'season']], 
                     left_on='episode', right_on='No.overall', how='left')

# Replace the "episode" column with the "No.overall" column where it is available
#merged_df['episode'] = merged_df['No.overall'].fillna(merged_df['episode'])

# Drop unnecessary columns
#merged_df.drop(['No.overall', 'Title'], axis=1, inplace=True)
merged_df

# Assign the modified data frame back to f_scene_info
#f_scene_info = merged_df


Unnamed: 0,scene_number,episode,scene,No.overall,season
0,0,1,"Central Perk, Chandler, Joey, Phoebe, and Moni...",1,1
1,1,1,"The Subway, Phoebe is singing for change",1,1
2,2,1,"Ross's Apartment, the guys are there assemblin...",1,1
3,3,1,"A Restaurant, Monica and Paul are eating",1,1
4,4,1,Ross's Apartment; Ross is pacing while Joey an...,1,1
...,...,...,...,...,...
2966,2966,201,Joey's apartment. Monica has completely destro...,201,10
2967,2967,201,The gate at the airport. The passengers are st...,201,10
2968,2968,201,"Monica and Chandler's apartment. Joey, Chandle...",201,10
2969,2969,201,Ross's apartment. Ross enters and checks his m...,201,10


In [40]:
missing_episodes = merged_df[merged_df['season'].isna()]['episode']
print(f"The following episodes are not being matched in f_seasons: {missing_episodes.unique()}")


The following episodes are not being matched in f_seasons: ['16' '17' '96' '97' '169']


In [41]:
# These episodes are episodes with two parts. In the seasons df they have both numbers together, so lets fix that:

In [42]:
f_seasons[f_seasons["No.overall"]=='9697']

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
93,9697,2324,The One With Ross'S Wedding,Kevin S. Bright,Michael BorkowStory by : Jill Condon & Amy Too...,"May 7, 1998",466623466624.0,31.61,4,,16.7/49,,


In [43]:
row_96 = f_seasons.loc[93].to_dict()
row_96


{'No.overall': '9697',
 'No. inseason': '2324',
 'Title': "The One With Ross'S Wedding",
 'Directed by': 'Kevin S. Bright',
 'Written by': 'Michael BorkowStory by\u200a: Jill Condon & Amy ToominTeleplay by\u200a: Shana Goldberg-Meehan & Scott Silveri',
 'Original air date': 'May\xa07,\xa01998',
 'Prod.code': '466623466624.0',
 'U.S. viewers(millions)': '31.61',
 'season': '4',
 'Rating(18–49)': nan,
 'Rating/share(18–49)': '16.7/49',
 'Special No.': nan,
 'U.S. viewersmillions': nan}

In [44]:
f_seasons[f_seasons["No.overall"]=='1617']

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
15,1617,1617,The One With Two Parts,Michael Lembeck,Marta Kauffman & David Crane,"February 23, 1995",456665456666.0,26.130.5,1,,,,


In [45]:
row_16 = f_seasons.loc[15].to_dict()
row_16

{'No.overall': '1617',
 'No. inseason': '1617',
 'Title': 'The One With Two Parts',
 'Directed by': 'Michael Lembeck',
 'Written by': 'Marta Kauffman & David Crane',
 'Original air date': 'February\xa023,\xa01995',
 'Prod.code': '456665456666.0',
 'U.S. viewers(millions)': '26.130.5',
 'season': '1',
 'Rating(18–49)': nan,
 'Rating/share(18–49)': nan,
 'Special No.': nan,
 'U.S. viewersmillions': nan}

In [46]:
f_seasons[f_seasons["No.overall"]=='169170']

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
162,169170,2324,The One With Monica And Chandler'S Wedding,Kevin S. Bright,Gregory S. MalinsMarta Kauffman & David Crane,"May 17, 2001",226422226423.0,30.05,7,,15.7/43,,


In [47]:
row_169 = f_seasons.loc[162].to_dict()
print(row_169)

{'No.overall': '169170', 'No. inseason': '2324', 'Title': "The One With Monica And Chandler'S Wedding", 'Directed by': 'Kevin S. Bright', 'Written by': 'Gregory S. MalinsMarta Kauffman & David Crane', 'Original air date': 'May\xa017,\xa02001', 'Prod.code': '226422226423.0', 'U.S. viewers(millions)': '30.05', 'season': '7', 'Rating(18–49)': nan, 'Rating/share(18–49)': '15.7/43', 'Special No.': nan, 'U.S. viewersmillions': nan}


In [48]:
new_rows = [{'No.overall': '96', 'No. inseason': '23', 'Title': "The One With Ross'S Wedding", 'Directed by': 'Kevin S. Bright', 'Written by': 'Michael BorkowStory by\u200a: Jill Condon & Amy ToominTeleplay by\u200a: Shana Goldberg-Meehan & Scott Silveri', 'Original air date': 'May\xa07,\xa01998', 'Prod.code': '466623', 'U.S. viewers(millions)': '31.61','season': '4', 'Rating(18–49)': np.nan, 'Rating/share(18–49)': '16.7/49', 'Special No.': np.nan, 'U.S. viewersmillions': np.nan},
            {'No.overall': '97', 'No. inseason': '24', 'Title': "The One With Ross'S Wedding", 'Directed by': 'Kevin S. Bright', 'Written by': 'Michael BorkowStory by\u200a: Jill Condon & Amy ToominTeleplay by\u200a: Shana Goldberg-Meehan & Scott Silveri', 'Original air date': 'May\xa07,\xa01998', 'Prod.code': '466624', 'U.S. viewers(millions)': '31.61','season': '4', 'Rating(18–49)': np.nan, 'Rating/share(18–49)': '16.7/49', 'Special No.': np.nan, 'U.S. viewersmillions': np.nan},
            {'No.overall': '16', 'No. inseason': '16', 'Title': 'The One With Two Parts', 'Directed by': 'Michael Lembeck', 'Written by': 'Marta Kauffman & David Crane', 'Original air date': 'February\xa023,\xa01995', 'Prod.code': '456665', 'U.S. viewers(millions)': '26.130.5', 'season': '1', 'Rating(18–49)': np.nan, 'Rating/share(18–49)': np.nan, 'Special No.': np.nan, 'U.S. viewersmillions': np.nan},
            {'No.overall': '17', 'No. inseason': '17', 'Title': 'The One With Two Parts', 'Directed by': 'Michael Lembeck', 'Written by': 'Marta Kauffman & David Crane', 'Original air date': 'February\xa023,\xa01995', 'Prod.code': '456666', 'U.S. viewers(millions)': '26.130.5', 'season': '1', 'Rating(18–49)': np.nan, 'Rating/share(18–49)': np.nan, 'Special No.': np.nan, 'U.S. viewersmillions': np.nan},
            {'No.overall': '169', 'No. inseason': '23', 'Title': "The One With Monica And Chandler'S Wedding", 'Directed by': 'Kevin S. Bright', 'Written by': 'Gregory S. MalinsMarta Kauffman & David Crane', 'Original air date': 'May\xa017,\xa02001', 'Prod.code': '226422', 'U.S. viewers(millions)': '30.05', 'season': '7', 'Rating(18–49)': np.nan, 'Rating/share(18–49)': '15.7/43', 'Special No.': np.nan, 'U.S. viewersmillions': np.nan},
            {'No.overall': '170', 'No. inseason': '24', 'Title': "The One With Monica And Chandler'S Wedding", 'Directed by': 'Kevin S. Bright', 'Written by': 'Gregory S. MalinsMarta Kauffman & David Crane', 'Original air date': 'May\xa017,\xa02001', 'Prod.code': '226423', 'U.S. viewers(millions)': '30.05', 'season': '7', 'Rating(18–49)': np.nan, 'Rating/share(18–49)': '15.7/43', 'Special No.': np.nan, 'U.S. viewersmillions': np.nan}]

In [49]:
f_seasons = f_seasons.append(new_rows, ignore_index=True)
f_seasons = f_seasons.drop(index=[93,15,162])
f_seasons.tail()

  f_seasons = f_seasons.append(new_rows, ignore_index=True)


Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
228,97,24,The One With Ross'S Wedding,Kevin S. Bright,Michael BorkowStory by : Jill Condon & Amy Too...,"May 7, 1998",466624,31.61,4,,16.7/49,,
229,16,16,The One With Two Parts,Michael Lembeck,Marta Kauffman & David Crane,"February 23, 1995",456665,26.130.5,1,,,,
230,17,17,The One With Two Parts,Michael Lembeck,Marta Kauffman & David Crane,"February 23, 1995",456666,26.130.5,1,,,,
231,169,23,The One With Monica And Chandler'S Wedding,Kevin S. Bright,Gregory S. MalinsMarta Kauffman & David Crane,"May 17, 2001",226422,30.05,7,,15.7/43,,
232,170,24,The One With Monica And Chandler'S Wedding,Kevin S. Bright,Gregory S. MalinsMarta Kauffman & David Crane,"May 17, 2001",226423,30.05,7,,15.7/43,,


In [50]:
# now lets re- match the seasons
# Merge the data frames based on the common column "episode"
merged_df.drop(['season'], axis=1, inplace=True)
merged_df = pd.merge(merged_df, f_seasons[['No.overall', 'season']], 
                     left_on='episode', right_on='No.overall', how='left')

# Replace the "episode" column with the "No.overall" column where it is available
#merged_df['episode'] = merged_df['No.overall'].fillna(merged_df['episode'])

# Drop unnecessary columns
#merged_df.drop(['No.overall', 'Title'], axis=1, inplace=True)
merged_df

# Assign the modified data frame back to f_scene_info
#f_scene_info = merged_df


Unnamed: 0,scene_number,episode,scene,No.overall_x,No.overall_y,season
0,0,1,"Central Perk, Chandler, Joey, Phoebe, and Moni...",1,1,1
1,1,1,"The Subway, Phoebe is singing for change",1,1,1
2,2,1,"Ross's Apartment, the guys are there assemblin...",1,1,1
3,3,1,"A Restaurant, Monica and Paul are eating",1,1,1
4,4,1,Ross's Apartment; Ross is pacing while Joey an...,1,1,1
...,...,...,...,...,...,...
2966,2966,201,Joey's apartment. Monica has completely destro...,201,201,10
2967,2967,201,The gate at the airport. The passengers are st...,201,201,10
2968,2968,201,"Monica and Chandler's apartment. Joey, Chandle...",201,201,10
2969,2969,201,Ross's apartment. Ross enters and checks his m...,201,201,10


In [51]:
merged_df[merged_df["episode"].isna()]

Unnamed: 0,scene_number,episode,scene,No.overall_x,No.overall_y,season


In [52]:
merged_df

Unnamed: 0,scene_number,episode,scene,No.overall_x,No.overall_y,season
0,0,1,"Central Perk, Chandler, Joey, Phoebe, and Moni...",1,1,1
1,1,1,"The Subway, Phoebe is singing for change",1,1,1
2,2,1,"Ross's Apartment, the guys are there assemblin...",1,1,1
3,3,1,"A Restaurant, Monica and Paul are eating",1,1,1
4,4,1,Ross's Apartment; Ross is pacing while Joey an...,1,1,1
...,...,...,...,...,...,...
2966,2966,201,Joey's apartment. Monica has completely destro...,201,201,10
2967,2967,201,The gate at the airport. The passengers are st...,201,201,10
2968,2968,201,"Monica and Chandler's apartment. Joey, Chandle...",201,201,10
2969,2969,201,Ross's apartment. Ross enters and checks his m...,201,201,10


In [53]:
f_scene_info = merged_df

In [54]:
# I will drop no.overall and season since this information is already in our f_easons df
f_scene_info.drop(["season", "No.overall_x", "No.overall_y"], axis=1, inplace=True)

In [55]:
f_scene_info

Unnamed: 0,scene_number,episode,scene
0,0,1,"Central Perk, Chandler, Joey, Phoebe, and Moni..."
1,1,1,"The Subway, Phoebe is singing for change"
2,2,1,"Ross's Apartment, the guys are there assemblin..."
3,3,1,"A Restaurant, Monica and Paul are eating"
4,4,1,Ross's Apartment; Ross is pacing while Joey an...
...,...,...,...
2966,2966,201,Joey's apartment. Monica has completely destro...
2967,2967,201,The gate at the airport. The passengers are st...
2968,2968,201,"Monica and Chandler's apartment. Joey, Chandle..."
2969,2969,201,Ross's apartment. Ross enters and checks his m...


In [56]:
f_seasons

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1,1,The One Where Monica Gets A New Roommate,James Burrows,David Crane & Marta Kauffman,"September 22, 1994",475085.0,21.5,1,,,,
1,2,2,The One With The Sonogram At The End,James Burrows,David Crane & Marta Kauffman,"September 29, 1994",456652.0,20.2,1,,,,
2,3,3,The One With The Thumb,James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",456651.0,19.5,1,,,,
3,4,4,The One With George Stephanopoulos,James Burrows,Alexa Junge,"October 13, 1994",456654.0,19.7,1,,,,
4,5,5,The One With The East German Laundry Detergent,Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",456653.0,18.6,1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
228,97,24,The One With Ross'S Wedding,Kevin S. Bright,Michael BorkowStory by : Jill Condon & Amy Too...,"May 7, 1998",466624,31.61,4,,16.7/49,,
229,16,16,The One With Two Parts,Michael Lembeck,Marta Kauffman & David Crane,"February 23, 1995",456665,26.130.5,1,,,,
230,17,17,The One With Two Parts,Michael Lembeck,Marta Kauffman & David Crane,"February 23, 1995",456666,26.130.5,1,,,,
231,169,23,The One With Monica And Chandler'S Wedding,Kevin S. Bright,Gregory S. MalinsMarta Kauffman & David Crane,"May 17, 2001",226422,30.05,7,,15.7/43,,


In [57]:
friends_script

Unnamed: 0,episode,scene,character,line
1,THE ONE WHERE MONICA GETS A NEW ROOMATE,,Written by,Marta Kauffman & David Crane
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ..."
5,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"All right Joey, be nice. So does he have a hum..."
6,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Wait, does he eat chalk?"
...,...,...,...,...
66532,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Chandler,"Oh, it's gonna be okay."
66534,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Rachel,(crying) Do you guys have to go to the new hou...
66535,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Monica,We got some time.
66536,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Rachel,"Okay, should we get some coffee?"


In [58]:
# now we need to add the episode number to our script so that we can relate all 3 tables. The writer info is redundant so lets start by dropping thos rows:
friends_script = friends_script.dropna(subset=['scene'])
friends_script

Unnamed: 0,episode,scene,character,line
3,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...
4,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ..."
5,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"All right Joey, be nice. So does he have a hum..."
6,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Wait, does he eat chalk?"
8,THE ONE WHERE MONICA GETS A NEW ROOMATE,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Just, 'cause, I don't want her to go through w..."
...,...,...,...,...
66532,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Chandler,"Oh, it's gonna be okay."
66534,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Rachel,(crying) Do you guys have to go to the new hou...
66535,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Monica,We got some time.
66536,THE ONE WITH ROSS'S INAPPROPRIATE SONG,Monica and Chandler's apartment. Chandler and ...,Rachel,"Okay, should we get some coffee?"


In [59]:
friends_script['episode'] = friends_script['episode'].str.title()
friends_script

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  friends_script['episode'] = friends_script['episode'].str.title()


Unnamed: 0,episode,scene,character,line
3,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...
4,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ..."
5,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"All right Joey, be nice. So does he have a hum..."
6,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Wait, does he eat chalk?"
8,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Just, 'cause, I don't want her to go through w..."
...,...,...,...,...
66532,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Chandler,"Oh, it's gonna be okay."
66534,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Rachel,(crying) Do you guys have to go to the new hou...
66535,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Monica,We got some time.
66536,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Rachel,"Okay, should we get some coffee?"


In [60]:
merged_df_scenes = pd.merge(friends_script, f_scene_info[['scene_number', 'episode','scene']], 
                     left_on='scene', right_on='scene', how='left')
merged_df_scenes

Unnamed: 0,episode_x,scene,character,line,scene_number,episode_y
0,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Monica,There's nothing to tell! He's just some guy I ...,0,1
1,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Joey,"C'mon, you're going out with the guy! There's ...",0,1
2,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Chandler,"All right Joey, be nice. So does he have a hum...",0,1
3,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Wait, does he eat chalk?",0,1
4,The One Where Monica Gets A New Roomate,"Central Perk, Chandler, Joey, Phoebe, and Moni...",Phoebe,"Just, 'cause, I don't want her to go through w...",0,1
...,...,...,...,...,...,...
67502,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Chandler,"Oh, it's gonna be okay.",2970,201
67503,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Rachel,(crying) Do you guys have to go to the new hou...,2970,201
67504,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Monica,We got some time.,2970,201
67505,The One With Ross'S Inappropriate Song,Monica and Chandler's apartment. Chandler and ...,Rachel,"Okay, should we get some coffee?",2970,201


In [61]:
merged_df_scenes[merged_df_scenes.isna().any(axis=1)]

Unnamed: 0,episode_x,scene,character,line,scene_number,episode_y


In [62]:
merged_df_scenes.drop(["episode_x", "scene", "episode_y"], axis=1, inplace=True)
merged_df_scenes

Unnamed: 0,character,line,scene_number
0,Monica,There's nothing to tell! He's just some guy I ...,0
1,Joey,"C'mon, you're going out with the guy! There's ...",0
2,Chandler,"All right Joey, be nice. So does he have a hum...",0
3,Phoebe,"Wait, does he eat chalk?",0
4,Phoebe,"Just, 'cause, I don't want her to go through w...",0
...,...,...,...
67502,Chandler,"Oh, it's gonna be okay.",2970
67503,Rachel,(crying) Do you guys have to go to the new hou...,2970
67504,Monica,We got some time.,2970
67505,Rachel,"Okay, should we get some coffee?",2970


In [63]:
friends_script = merged_df_scenes
friends_script

Unnamed: 0,character,line,scene_number
0,Monica,There's nothing to tell! He's just some guy I ...,0
1,Joey,"C'mon, you're going out with the guy! There's ...",0
2,Chandler,"All right Joey, be nice. So does he have a hum...",0
3,Phoebe,"Wait, does he eat chalk?",0
4,Phoebe,"Just, 'cause, I don't want her to go through w...",0
...,...,...,...
67502,Chandler,"Oh, it's gonna be okay.",2970
67503,Rachel,(crying) Do you guys have to go to the new hou...,2970
67504,Monica,We got some time.,2970
67505,Rachel,"Okay, should we get some coffee?",2970


In [64]:
# lets add the sentiment analyis to this df
import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.downloader.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/kike/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [69]:
sia = SentimentIntensityAnalyzer()
test = sia.polarity_scores(friends_script["line"][0])

In [72]:
test

{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}

In [73]:
# initialize the sentiment analyzer
sia = SentimentIntensityAnalyzer()

# define a function to apply the sentiment analyzer to each row
def analyze_sentiment_sia(row):
    sentiment = sia.polarity_scores(row['line'])
    return pd.Series(sentiment)

In [74]:
# apply the function to each row and create new columns for the sentiment scores
friends_script[['sia.neg', 'sia.neu', 'sia.pos', 'sia.compound']] = friends_script.apply(analyze_sentiment_sia, axis=1)
friends_script

Unnamed: 0,character,line,scene_number,sia.neg,sia.neu,sia.pos,sia.compound
0,Monica,There's nothing to tell! He's just some guy I ...,0,0.000,1.000,0.000,0.0000
1,Joey,"C'mon, you're going out with the guy! There's ...",0,0.221,0.779,0.000,-0.5696
2,Chandler,"All right Joey, be nice. So does he have a hum...",0,0.000,0.792,0.208,0.4871
3,Phoebe,"Wait, does he eat chalk?",0,0.000,1.000,0.000,0.0000
4,Phoebe,"Just, 'cause, I don't want her to go through w...",0,0.104,0.896,0.000,-0.1316
...,...,...,...,...,...,...,...
67502,Chandler,"Oh, it's gonna be okay.",2970,0.000,0.678,0.322,0.2263
67503,Rachel,(crying) Do you guys have to go to the new hou...,2970,0.000,1.000,0.000,0.0000
67504,Monica,We got some time.,2970,0.000,1.000,0.000,0.0000
67505,Rachel,"Okay, should we get some coffee?",2970,0.000,0.725,0.275,0.2263


In [66]:
from textblob import TextBlob

In [67]:
test = TextBlob(friends_script["line"][1]).sentiment

In [68]:
test[1]

0.9

In [76]:
# define a function to apply the TextBlob sentiment analysis to each row
def analyze_sentiment_tb(row):
    sentiment = TextBlob(row['line']).sentiment
    return pd.Series(sentiment)

In [77]:
# apply the function to each row and create new columns for the sentiment scores
friends_script[['tb.polarity', 'tb.subjectivity']] = friends_script.apply(analyze_sentiment_tb, axis=1)
friends_script

Unnamed: 0,character,line,scene_number,sia.neg,sia.neu,sia.pos,sia.compound,tb.polarity,tb.subjectivity
0,Monica,There's nothing to tell! He's just some guy I ...,0,0.000,1.000,0.000,0.0000,0.000000,0.000000
1,Joey,"C'mon, you're going out with the guy! There's ...",0,0.221,0.779,0.000,-0.5696,-0.625000,0.900000
2,Chandler,"All right Joey, be nice. So does he have a hum...",0,0.000,0.792,0.208,0.4871,0.442857,0.767857
3,Phoebe,"Wait, does he eat chalk?",0,0.000,1.000,0.000,0.0000,0.000000,0.000000
4,Phoebe,"Just, 'cause, I don't want her to go through w...",0,0.104,0.896,0.000,-0.1316,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...
67502,Chandler,"Oh, it's gonna be okay.",2970,0.000,0.678,0.322,0.2263,0.500000,0.500000
67503,Rachel,(crying) Do you guys have to go to the new hou...,2970,0.000,1.000,0.000,0.0000,0.074026,0.530087
67504,Monica,We got some time.,2970,0.000,1.000,0.000,0.0000,0.000000,0.000000
67505,Rachel,"Okay, should we get some coffee?",2970,0.000,0.725,0.275,0.2263,0.500000,0.500000


In [None]:
# Excellent, now lets export these as csvs 

In [84]:
# lets check if we can save it using the ';' separator
friends_script["line"].str.contains(';')[friends_script["line"].str.contains(';') == True]

1755     True
2032     True
3469     True
3595     True
4833     True
         ... 
61644    True
62354    True
65058    True
66957    True
67294    True
Name: line, Length: 230, dtype: bool

In [85]:
friends_script["line"][1755]

"Well, y'know, I had some trouble with it at first too, but the way I look at it is, I get all the good stuff: all the fun, all the talking, all the sex; and none of the responsibility. I mean, this is every guy's fantasy!"

In [89]:
# Looks like we are out of luck, lets try '~'
friends_script["line"].str.contains('~').sum()

0

In [90]:
friends_script.to_csv('data/script.csv', sep='~')

In [94]:
friends_script

Unnamed: 0,character,line,scene_number,sia.neg,sia.neu,sia.pos,sia.compound,tb.polarity,tb.subjectivity
0,Monica,There's nothing to tell! He's just some guy I ...,0,0.000,1.000,0.000,0.0000,0.000000,0.000000
1,Joey,"C'mon, you're going out with the guy! There's ...",0,0.221,0.779,0.000,-0.5696,-0.625000,0.900000
2,Chandler,"All right Joey, be nice. So does he have a hum...",0,0.000,0.792,0.208,0.4871,0.442857,0.767857
3,Phoebe,"Wait, does he eat chalk?",0,0.000,1.000,0.000,0.0000,0.000000,0.000000
4,Phoebe,"Just, 'cause, I don't want her to go through w...",0,0.104,0.896,0.000,-0.1316,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...
67502,Chandler,"Oh, it's gonna be okay.",2970,0.000,0.678,0.322,0.2263,0.500000,0.500000
67503,Rachel,(crying) Do you guys have to go to the new hou...,2970,0.000,1.000,0.000,0.0000,0.074026,0.530087
67504,Monica,We got some time.,2970,0.000,1.000,0.000,0.0000,0.000000,0.000000
67505,Rachel,"Okay, should we get some coffee?",2970,0.000,0.725,0.275,0.2263,0.500000,0.500000


In [91]:
f_seasons.to_csv('data/seasons.csv', sep='~')

In [93]:
f_seasons

Unnamed: 0,No.overall,No. inseason,Title,Directed by,Written by,Original air date,Prod.code,U.S. viewers(millions),season,Rating(18–49),Rating/share(18–49),Special No.,U.S. viewersmillions
0,1,1,The One Where Monica Gets A New Roommate,James Burrows,David Crane & Marta Kauffman,"September 22, 1994",475085.0,21.5,1,,,,
1,2,2,The One With The Sonogram At The End,James Burrows,David Crane & Marta Kauffman,"September 29, 1994",456652.0,20.2,1,,,,
2,3,3,The One With The Thumb,James Burrows,Jeffrey Astrof & Mike Sikowitz,"October 6, 1994",456651.0,19.5,1,,,,
3,4,4,The One With George Stephanopoulos,James Burrows,Alexa Junge,"October 13, 1994",456654.0,19.7,1,,,,
4,5,5,The One With The East German Laundry Detergent,Pamela Fryman,Jeff Greenstein & Jeff Strauss,"October 20, 1994",456653.0,18.6,1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
228,97,24,The One With Ross'S Wedding,Kevin S. Bright,Michael BorkowStory by : Jill Condon & Amy Too...,"May 7, 1998",466624,31.61,4,,16.7/49,,
229,16,16,The One With Two Parts,Michael Lembeck,Marta Kauffman & David Crane,"February 23, 1995",456665,26.130.5,1,,,,
230,17,17,The One With Two Parts,Michael Lembeck,Marta Kauffman & David Crane,"February 23, 1995",456666,26.130.5,1,,,,
231,169,23,The One With Monica And Chandler'S Wedding,Kevin S. Bright,Gregory S. MalinsMarta Kauffman & David Crane,"May 17, 2001",226422,30.05,7,,15.7/43,,


In [95]:
f_scene_info.to_csv('data/scenes.csv', sep='~')

In [92]:
f_scene_info

Unnamed: 0,scene_number,episode,scene
0,0,1,"Central Perk, Chandler, Joey, Phoebe, and Moni..."
1,1,1,"The Subway, Phoebe is singing for change"
2,2,1,"Ross's Apartment, the guys are there assemblin..."
3,3,1,"A Restaurant, Monica and Paul are eating"
4,4,1,Ross's Apartment; Ross is pacing while Joey an...
...,...,...,...
2966,2966,201,Joey's apartment. Monica has completely destro...
2967,2967,201,The gate at the airport. The passengers are st...
2968,2968,201,"Monica and Chandler's apartment. Joey, Chandle..."
2969,2969,201,Ross's apartment. Ross enters and checks his m...


In [96]:
# Now lets upload this to sql

In [119]:
import sqlalchemy as alch
import pymysql
from dotenv import load_dotenv


In [120]:
load_dotenv()

True

In [122]:
password = os.getenv('SQL_PASSWORD')

In [123]:
password = os.getenv('SQL_PASSWORD')
dbName = "friends"
connectionData=f"mysql+pymysql://root:{password}@localhost/{dbName}"
engine = alch.create_engine(connectionData)
engine

Engine(mysql+pymysql://root:***@localhost/friends)

In [124]:
friends_script.to_sql('script', con=engine, if_exists='replace')


67507

In [125]:
f_scene_info.to_sql('scenes', con=engine, if_exists='replace')

2971

In [126]:
f_seasons.to_sql('seasons', con=engine, if_exists='replace')

230