In [None]:
# Import dependencies
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [None]:
# Read in the Scripts CSV file
scripts_df = pd.read_csv('../rawData/scripts.csv')
#scripts_df.head()

In [None]:
# Define list of main characters
primaryChars = ["JERRY", "GEORGE", "KRAMER", "ELAINE"]

In [None]:
# Setting the cut-off for our secondary characters at 150 lines of dialogue
secondaryChars = ["NEWMAN", "MORTY", "HELEN", "FRANK","SUSAN", "ESTELLE", "PUDDY"]

In [None]:
# Create blank coolumns for positive, negative and compound sentiment episodes
scripts_df["PosSent"] = np.repeat(0, scripts_df.shape[0])
scripts_df["NegSent"] = np.repeat(0, scripts_df.shape[0])
scripts_df["CompSent"] = np.repeat(0, scripts_df.shape[0])

scripts_df.head()

In [None]:
# Create lists that will later be added to data frame
PosSent = []
NegSent = []
CompSent = []

In [None]:
# Loop to re-name Character column and add Sentiment Score

for i in range(scripts_df.shape[0]):

    #########################
    # Re-assign character names into "OTHER" or "SECONDARY" for non-main characters
    #########################
    
    char = scripts_df["Character"][i]

    if (char not in primaryChars) & (char not in secondaryChars):
        scripts_df["Character"][i] = "OTHER"

    elif char not in primaryChars:
        scripts_df["Character"][i] = "SECONDARY"

        
    #########################
    # Sentiment Analysis
    #########################

    # Assign the current line of text to variable 'line'
    line = str(scripts_df["Dialogue"][i]).lower()

    try:
        # Setting each line of dialogue as a string, otherwise numbers give us errors
        pos = analyzer.polarity_scores(line)["pos"]
        neg = analyzer.polarity_scores(line)["neg"]
        comp = analyzer.polarity_scores(line)["compound"]
    
    except:
        # Also using try / except to limit that
        pos = None
        neg = None
        comp = None

    PosSent.append(pos)
    NegSent.append(neg)
    CompSent.append(comp)
        
    #########################
    # Print notifications to make sure the code is running
    #########################

    if i % 10000 == 0:
        perc_complete = round(i / scripts_df.shape[0] * 100, 0)
        print("Percent Complete: " + str(perc_complete) + "%")
    
    if i == (scripts_df.shape[0] - 1):
        print("-----------------------------------")
        print("Loop Complete!")

In [None]:
#Add PosSent, NegSent, andCompSent as columns in dataframe
scripts_df["PosSent"] = PosSent
scripts_df["NegSent"] = NegSent
scripts_df["CompSent"] = CompSent

scripts_df.head()

In [None]:
# Save scripts_df to a new CSV file, so that we don't have to do the loop again
scripts_df.to_csv('../outputData/cleaned_scripts.csv')