In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

In [2]:
# Read in the Scripts CSV file
scripts_df = pd.read_csv('../rawData/scripts.csv')
#scripts_df.head()

In [3]:
# Define list of main characters
primaryChars = ["JERRY", "GEORGE", "KRAMER", "ELAINE"]

In [4]:
# Setting the cut-off for our secondary characters at 150 lines of dialogue
secondaryChars = ["NEWMAN", "MORTY", "HELEN", "FRANK","SUSAN", "ESTELLE", "PUDDY"]

In [5]:
# Create blank coolumns for positive, negative and compound sentiment episodes
scripts_df["PosSent"] = np.repeat(0, scripts_df.shape[0])
scripts_df["NegSent"] = np.repeat(0, scripts_df.shape[0])
scripts_df["CompSent"] = np.repeat(0, scripts_df.shape[0])

scripts_df.head()

Unnamed: 0.1,Unnamed: 0,Character,Dialogue,EpisodeNo,SEID,Season,PosSent,NegSent,CompSent
0,0,JERRY,Do you know what this is all about? Do you kno...,1,S01E01,1,0,0,0
1,1,JERRY,"(pointing at Georges shirt) See, to me, that b...",1,S01E01,1,0,0,0
2,2,GEORGE,Are you through?,1,S01E01,1,0,0,0
3,3,JERRY,"You do of course try on, when you buy?",1,S01E01,1,0,0,0
4,4,GEORGE,"Yes, it was purple, I liked it, I dont actuall...",1,S01E01,1,0,0,0


In [6]:
# Create lists that will later be added to data frame
PosSent = []
NegSent = []
CompSent = []

In [7]:
# Loop to re-name Character column and add Sentiment Score

for i in range(scripts_df.shape[0]):

    #########################
    # Re-assign character names into "OTHER" or "SECONDARY" for non-main characters
    #########################
    
    char = scripts_df["Character"][i]

    if (char not in primaryChars) & (char not in secondaryChars):
        scripts_df["Character"][i] = "OTHER"

    elif char not in primaryChars:
        scripts_df["Character"][i] = "SECONDARY"

        
    #########################
    # Sentiment Analysis
    #########################

    # Assign the current line of text to variable 'line'
    line = str(scripts_df["Dialogue"][i]).lower()

    try:
        # Setting each line of dialogue as a string, otherwise numbers give us errors
        pos = analyzer.polarity_scores(line)["pos"]
        neg = analyzer.polarity_scores(line)["neg"]
        comp = analyzer.polarity_scores(line)["compound"]
    
    except:
        # Also using try / except to limit that
        pos = None
        neg = None
        comp = None

    PosSent.append(pos)
    NegSent.append(neg)
    CompSent.append(comp)
        
    #########################
    # Print notifications to make sure the code is running
    #########################

    if i % 10000 == 0:
        perc_complete = round(i / scripts_df.shape[0] * 100, 0)
        print("Percent Complete: " + str(perc_complete) + "%")
    
    if i == (scripts_df.shape[0] - 1):
        print("-----------------------------------")
        print("Loop Complete!")

Percent Complete: 0.0%


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Percent Complete: 18.0%
Percent Complete: 37.0%
Percent Complete: 55.0%
Percent Complete: 73.0%
Percent Complete: 92.0%
-----------------------------------
Loop Complete!


In [8]:
#Add PosSent, NegSent, andCompSent as columns in dataframe
scripts_df["PosSent"] = PosSent
scripts_df["NegSent"] = NegSent
scripts_df["CompSent"] = CompSent

scripts_df.head()

Unnamed: 0.1,Unnamed: 0,Character,Dialogue,EpisodeNo,SEID,Season,PosSent,NegSent,CompSent
0,0,JERRY,Do you know what this is all about? Do you kno...,1,S01E01,1,0.072,0.011,0.9029
1,1,JERRY,"(pointing at Georges shirt) See, to me, that b...",1,S01E01,1,0.054,0.096,-0.4389
2,2,GEORGE,Are you through?,1,S01E01,1,0.0,0.0,0.0
3,3,JERRY,"You do of course try on, when you buy?",1,S01E01,1,0.0,0.0,0.0
4,4,GEORGE,"Yes, it was purple, I liked it, I dont actuall...",1,S01E01,1,0.355,0.0,0.6705


In [9]:
# Save scripts_df to a new CSV file, so that we don't have to do the loop again
scripts_df.to_csv('../outputData/cleaned_scripts.csv')