In [1]:
# Import dependencies
import pandas as pd
import numpy as np

In [2]:
# Read in the Scripts CSV file
cleaned_df = pd.read_csv('../rawData/cleaned_scripts.csv')

# Drop the "Unnamed: 0" and "Unnamed: 0.1" columns
cleaned_df = cleaned_df.drop("Unnamed: 0", 1)
cleaned_df = cleaned_df.drop("Unnamed: 0.1", 1)
cleaned_df.head()

Unnamed: 0,Character,Dialogue,EpisodeNo,SEID,Season,Sentiment
0,JERRY,Do you know what this is all about? Do you kno...,1,S01E01,1,0.9029
1,JERRY,"(pointing at Georges shirt) See, to me, that b...",1,S01E01,1,-0.4389
2,GEORGE,Are you through?,1,S01E01,1,0.0
3,JERRY,"You do of course try on, when you buy?",1,S01E01,1,0.0
4,GEORGE,"Yes, it was purple, I liked it, I dont actuall...",1,S01E01,1,0.6705


In [3]:
# Create variables for WordCount and LineCount
# LineCount is easy - just make it equal to 1, then when we group by episode, take the sum

In [4]:
# LineCount variable created
cleaned_df["LineCount"] = np.repeat(1, cleaned_df.shape[0])
cleaned_df.head()

Unnamed: 0,Character,Dialogue,EpisodeNo,SEID,Season,Sentiment,LineCount
0,JERRY,Do you know what this is all about? Do you kno...,1,S01E01,1,0.9029,1
1,JERRY,"(pointing at Georges shirt) See, to me, that b...",1,S01E01,1,-0.4389,1
2,GEORGE,Are you through?,1,S01E01,1,0.0,1
3,JERRY,"You do of course try on, when you buy?",1,S01E01,1,0.0,1
4,GEORGE,"Yes, it was purple, I liked it, I dont actuall...",1,S01E01,1,0.6705,1


In [5]:
#Create a function for counting the # of words per line of dialogue
def word_count(text):
    count = 0
    words = text.split()

    for word in words:
        count += 1
    
    return count

In [6]:
# WordCount -- create a loop through each row and each Dialogue...

# Create a list that will later be added to dataframe
WordCount = []

for i in range(cleaned_df.shape[0]):
    
    # Assign the current line of text to variable 'line'
    line = str(cleaned_df["Dialogue"][i]).lower()
    
    # Run the word_count function on line i
    try:
        count = word_count(line)
    except:
        count = 0
    
    # Append count to WordCount list
    WordCount.append(count)
    
    #########################
    # Print notifications to make sure the code is running
    #########################

    if i % 2000 == 0:
        perc_complete = round(i / cleaned_df.shape[0] * 100, 0)
        print("Percent Complete: " + str(perc_complete) + "%")
    
    if i == cleaned_df.shape[0]:
        print("-----------------------------------")
        print("Loop Complete!")

Percent Complete: 0.0%
Percent Complete: 4.0%
Percent Complete: 7.0%
Percent Complete: 11.0%
Percent Complete: 15.0%
Percent Complete: 18.0%
Percent Complete: 22.0%
Percent Complete: 26.0%
Percent Complete: 29.0%
Percent Complete: 33.0%
Percent Complete: 37.0%
Percent Complete: 40.0%
Percent Complete: 44.0%
Percent Complete: 48.0%
Percent Complete: 51.0%
Percent Complete: 55.0%
Percent Complete: 59.0%
Percent Complete: 62.0%
Percent Complete: 66.0%
Percent Complete: 70.0%
Percent Complete: 73.0%
Percent Complete: 77.0%
Percent Complete: 81.0%
Percent Complete: 84.0%
Percent Complete: 88.0%
Percent Complete: 92.0%
Percent Complete: 95.0%
Percent Complete: 99.0%


In [7]:
#Add WordCount as a column in dataframe
cleaned_df["WordCount"] = WordCount
cleaned_df.tail()

Unnamed: 0,Character,Dialogue,EpisodeNo,SEID,Season,Sentiment,LineCount,WordCount
54611,JERRY,Grand theft auto - don't steal any of my jokes.,23,S09E23,9,0.7669,1,10
54612,OTHER,You suck - I'm gonna cut you.,23,S09E23,9,-0.6124,1,7
54613,JERRY,"Hey, I don't come down to where you work, and ...",23,S09E23,9,0.4939,1,18
54614,OTHER,"Alright, Seinfeld, that's it. Let's go. Come on.",23,S09E23,9,0.25,1,8
54615,JERRY,"Alright, hey, you've been great! See you in th...",23,S09E23,9,0.75,1,10


In [8]:
# Create a weighted sentiment score (Sentiment x WordCount)
cleaned_df["WtdSentiment"] = cleaned_df["Sentiment"] * cleaned_df["WordCount"]
cleaned_df.tail(10)

Unnamed: 0,Character,Dialogue,EpisodeNo,SEID,Season,Sentiment,LineCount,WordCount,WtdSentiment
54606,OTHER,I am.,23,S09E23,9,0.0,1,2,0.0
54607,JERRY,I'll talk slower. I'm kidding - I love Cellblo...,23,S09E23,9,0.8316,1,22,18.2952
54608,OTHER,Murder one.,23,S09E23,9,-0.6908,1,2,-1.3816
54609,JERRY,"Murder one? Oooooo, watch out everybody. Bette...",23,S09E23,9,0.1926,1,26,5.0076
54610,OTHER,Grand theft auto.,23,S09E23,9,0.4588,1,3,1.3764
54611,JERRY,Grand theft auto - don't steal any of my jokes.,23,S09E23,9,0.7669,1,10,7.669
54612,OTHER,You suck - I'm gonna cut you.,23,S09E23,9,-0.6124,1,7,-4.2868
54613,JERRY,"Hey, I don't come down to where you work, and ...",23,S09E23,9,0.4939,1,18,8.8902
54614,OTHER,"Alright, Seinfeld, that's it. Let's go. Come on.",23,S09E23,9,0.25,1,8,2.0
54615,JERRY,"Alright, hey, you've been great! See you in th...",23,S09E23,9,0.75,1,10,7.5
