In [1]:
import pandas as pd
import os
from progressbar import ProgressBar
import numpy as np
import numpy as np
import re
import math
import spacy
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction import text
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

# 1. Preparing Data

In [2]:
def getData(file):
    data = pd.read_csv("../data/extracted/"+file,delimiter=',')
    moviename = ' '.join(file[:-4].split('_'))
#     print(moviename)
    return data, moviename

In [3]:
def getTrueLabels(df, moviename, labelsfile):

    
    movie = labelsfile[labelsfile['Movie']==moviename]
    if movie is not None:
        movie = movie[['Speaker', 'Gender']]
        movie['Speaker'] = movie['Speaker']
        temp = list(df['Speaker'])
        gen = []
#         print(temp, movie['Character'])
        count = 0
        for speaker in temp:
            try:
                gender = movie[movie['Speaker'].str.contains(speaker)]
                gender = list(gender['Gender'])
                gen.append(gender[0])
            except:
                gen.append(np.nan)
        df['Gender'] = gen
    return df

In [4]:
def convertToModelInputFormat(data):
    ans = []
    speakers = list(set(list(data['Speaker'])))
    for speaker in speakers:
        dialogues = data[data['Speaker']==speaker]
        dialogues = list(dialogues['Dialogue'])
        temp = []
        speaker = str(speaker)
        speaker = speaker.replace(".", "")
        speaker = speaker.replace("\"", "")
        speaker = speaker.replace("\t", "")
        temp.append(speaker)
        temp.append(dialogues)
        ans.append(temp)
    return ans

In [5]:
def test_train_split(entire_data):
    data = entire_data.copy()
    train_set = data.sample(frac=0.70, random_state=0)
    test_set = data.drop(train_set.index)
    train_set_labels = train_set.pop('Gender')
    test_set_labels = test_set.pop('Gender')
    return train_set, train_set_labels, test_set, test_set_labels

In [6]:
labelsfile = pd.read_csv('../data/Pre-processing_files/polygraph_matched_scriptid_title_gender.txt', 
                     delimiter='\t', names=['Match', 'Movie', 'Code', 'Speaker', 'Gender'])
labelsfile = labelsfile[labelsfile['Match']=='Matched']
labelsfile = labelsfile[['Movie', 'Speaker','Gender']]
labelsfile2 = pd.read_csv("../data/task1/entire_data2.csv",delimiter=',')
labelsfile = labelsfile.append(labelsfile2, ignore_index=True)

In [7]:
entire_data = pd.DataFrame([['Speaker', ['d1']]], columns=['Speaker', 'Dialogues'])
files = os.listdir('../data/extracted')
files.sort()
pbar = ProgressBar()
for file in pbar(files):
    if file[-4:] != ".csv":
        continue
    else:
        data, moviename = getData(file)
        ans = convertToModelInputFormat(data)
        df = pd.DataFrame(ans, columns = ['Speaker', 'Dialogues'])
        df = getTrueLabels(df, moviename, labelsfile)
        df['Movie'] = moviename
        entire_data = entire_data.append(df, ignore_index=True)

entire_data = entire_data.drop(0)
entire_data

100% |########################################################################|


Unnamed: 0,Speaker,Dialogues,Gender,Movie
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You
3,kat,"[Leave it, Why didn't we just read the Hardy B...",f,10 Things I Hate About You
4,bianca,"[Did you change your hair?, You might wanna th...",f,10 Things I Hate About You
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",m,10 Things I Hate About You
...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",m,xXx
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx


In [8]:
null_url = pd.isnull(entire_data['Gender'])
temp = entire_data[null_url]

In [9]:
temp

Unnamed: 0,Speaker,Dialogues,Gender,Movie
583,"white - march 14, 2012","[Blue Revised - April 7, 2012 Pink Revised - A...",,42
844,,[],,A Prayer Before Dawn
845,billy's face,"[plunges into water, FULL FRAME. Bubbles spira...",,A Prayer Before Dawn
846,black dildo,"[Free to all. Help get prisoners clean., Come ...",,A Prayer Before Dawn
847,lek,"[Do you have father?, He come visit you?]",,A Prayer Before Dawn
...,...,...,...,...
39676,mrs floyd,[Frank Ross's daughter. My poor child. My poor...,,True Grit
39682,mr goudy,"[Objection. Hearsay., Let us restrict it to ""k...",,True Grit
40165,octobereighthninety-nine,[written by m.night.shyamalan],,Unbreakable
40745,"july 27, 2000",[WARNER BROS. ...,,Walk to Remember


In [10]:
entire_data.loc[entire_data['Speaker'] == '', 'Speaker'] = np.nan

In [11]:
# final_data = entire_data[entire_data.apply(lambda x: x.values.tolist() not in temp.values.tolist(), axis=1)]

In [12]:
entire_data.dropna(
    axis=0,
    how='any',
    thresh=None,
    subset=None,
    inplace=True
)

# 2. Making features

In [13]:
final_data = entire_data.rename(columns={"Speaker": "Name"})

In [14]:
final_data.loc[final_data['Gender'] == '?', 'Gender'] = "male"

In [15]:
# err

In [16]:
final_data.loc[final_data['Gender'] == "m", 'Gender'] = "male"
final_data.loc[final_data['Gender'] == "f", 'Gender'] = "female"
final_data

Unnamed: 0,Name,Dialogues,Gender,Movie
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You
...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx


In [17]:
def numberOfMF(data):
    df = data.groupby('Gender')['Name'].count()
    print(df)
    df = pd.DataFrame({'Names': [df[0], df[1]]}, index=['Female', 'Male'])
    plot = df.plot.pie(subplots=True, figsize=(11, 6), colors = ['#ffb3e6','#c2c2f0'], 
                       autopct = "%.2f%%", labeldistance=1.15)
#   wedgeprops = {'linewidth':1.5, 'edgecolor':'white'}

In [18]:
numberOfMF(final_data)

Gender
female    15427
male      26699
Name: Name, dtype: int64


In [19]:
def checkEnd(name):
    if name[-1] in "aeiou":
        return "vowel"
    return "consonant"

final_data["EndsWith"] = final_data["Name"].apply(checkEnd)
final_data

Unnamed: 0,Name,Dialogues,Gender,Movie,EndsWith
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You,consonant
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You,consonant
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You,consonant
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You,vowel
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You,consonant
...,...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx,consonant
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx,consonant
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx,vowel
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx,consonant


In [20]:
def letterClass(name):
    name_list = [x for x in name]
    vowel_counter = 0
    consonant_counter = 0
    for letter in name_list:
        if letter in ['a','e','i','o','u']:
            vowel_counter+=1
        else:
            consonant_counter+=1
    
    return vowel_counter, consonant_counter

final_data['VowelCount'] = final_data['Name'].apply(lambda x: letterClass(x)[0])
final_data['ConsonantCount'] = final_data['Name'].apply(lambda x: letterClass(x)[1])

final_data

Unnamed: 0,Name,Dialogues,Gender,Movie,EndsWith,VowelCount,ConsonantCount
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You,consonant,2,4
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You,consonant,1,2
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You,consonant,1,2
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You,vowel,3,3
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You,consonant,2,3
...,...,...,...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx,consonant,2,5
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx,consonant,2,5
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx,vowel,2,3
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx,consonant,3,8


In [21]:
def getLength(name):
    return len(name)

final_data["Length"] = final_data["Name"].apply(getLength)

In [22]:
final_data

Unnamed: 0,Name,Dialogues,Gender,Movie,EndsWith,VowelCount,ConsonantCount,Length
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You,consonant,2,4,6
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You,consonant,1,2,3
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You,consonant,1,2,3
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You,vowel,3,3,6
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You,consonant,2,3,5
...,...,...,...,...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx,consonant,2,5,7
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx,consonant,2,5,7
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx,vowel,2,3,5
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx,consonant,3,8,11


In [23]:
def checkMoreVowels(data):
    data['vcCompare']= np.where(data['ConsonantCount'] == data['VowelCount'], 'equal', np.where(
    data['ConsonantCount'] > data['VowelCount'], 'moreConsonants', 'moreVowels')) 

checkMoreVowels(final_data)
final_data

Unnamed: 0,Name,Dialogues,Gender,Movie,EndsWith,VowelCount,ConsonantCount,Length,vcCompare
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You,consonant,2,4,6,moreConsonants
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You,consonant,1,2,3,moreConsonants
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You,consonant,1,2,3,moreConsonants
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You,vowel,3,3,6,equal
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You,consonant,2,3,5,moreConsonants
...,...,...,...,...,...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx,consonant,2,5,7,moreConsonants
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx,consonant,2,5,7,moreConsonants
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx,vowel,2,3,5,moreConsonants
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx,consonant,3,8,11,moreConsonants


In [24]:
def checkVCDifference(data):
    data['vcDifference']= data['ConsonantCount'] - data['VowelCount']

checkVCDifference(final_data)


In [25]:
final_data

Unnamed: 0,Name,Dialogues,Gender,Movie,EndsWith,VowelCount,ConsonantCount,Length,vcCompare,vcDifference
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You,consonant,2,4,6,moreConsonants,2
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You,consonant,1,2,3,moreConsonants,1
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You,consonant,1,2,3,moreConsonants,1
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You,vowel,3,3,6,equal,0
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You,consonant,2,3,5,moreConsonants,1
...,...,...,...,...,...,...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx,consonant,2,5,7,moreConsonants,3
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx,consonant,2,5,7,moreConsonants,3
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx,vowel,2,3,5,moreConsonants,1
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx,consonant,3,8,11,moreConsonants,5


In [26]:
def checkVowelCount(col):
    if col > 2:
        return 'more'
    return 'less'

def checkConsonantCount(col):
    if col > 3:
        return 'more'
    return 'less'

final_data["VowelCountClass"] = final_data["VowelCount"].apply(checkVowelCount)
final_data["ConsonantCountClass"] = final_data["ConsonantCount"].apply(checkConsonantCount)

In [27]:
def checkMoreVowels(col):
    if col == 'moreConsonants':
        return 'less'
    return 'more'

final_data["MoreVowels"] = final_data["vcCompare"].apply(checkMoreVowels)
final_data

Unnamed: 0,Name,Dialogues,Gender,Movie,EndsWith,VowelCount,ConsonantCount,Length,vcCompare,vcDifference,VowelCountClass,ConsonantCountClass,MoreVowels
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You,consonant,2,4,6,moreConsonants,2,less,more,less
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You,consonant,1,2,3,moreConsonants,1,less,less,less
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You,consonant,1,2,3,moreConsonants,1,less,less,less
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You,vowel,3,3,6,equal,0,more,less,more
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You,consonant,2,3,5,moreConsonants,1,less,less,less
...,...,...,...,...,...,...,...,...,...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx,consonant,2,5,7,moreConsonants,3,less,more,less
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx,consonant,2,5,7,moreConsonants,3,less,more,less
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx,vowel,2,3,5,moreConsonants,1,less,less,less
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx,consonant,3,8,11,moreConsonants,5,more,more,less


In [28]:
def checkMoreDifference(col):
    if col > 1:
        return 'more'
    return 'less'

final_data["MoreDifferenceClass"] = final_data["vcDifference"].apply(checkMoreDifference)
final_data

Unnamed: 0,Name,Dialogues,Gender,Movie,EndsWith,VowelCount,ConsonantCount,Length,vcCompare,vcDifference,VowelCountClass,ConsonantCountClass,MoreVowels,MoreDifferenceClass
1,sharon,"[In the microwave., What's a synonym for throb...",female,10 Things I Hate About You,consonant,2,4,6,moreConsonants,2,less,more,less,more
2,guy,"[Drink up, sister.]",male,10 Things I Hate About You,consonant,1,2,3,moreConsonants,1,less,less,less,less
3,kat,"[Leave it, Why didn't we just read the Hardy B...",female,10 Things I Hate About You,consonant,1,2,3,moreConsonants,1,less,less,less,less
4,bianca,"[Did you change your hair?, You might wanna th...",female,10 Things I Hate About You,vowel,3,3,6,equal,0,more,less,more,less
5,derek,"[ Michael, my brother, peace, Kat, my lady, yo...",male,10 Things I Hate About You,consonant,2,3,5,moreConsonants,1,less,less,less,less
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42839,gibbons,"[Evening, Sam., Not a whole helluva lot. His f...",male,xXx,consonant,2,5,7,moreConsonants,3,less,more,less,more
42840,trucker,"[I said, you got a problem, boy?, With what? I...",male,xXx,consonant,2,5,7,moreConsonants,3,less,more,less,more
42841,yorgi,"[This pizda? Never seen him before., Cops. Lik...",male,xXx,vowel,2,3,5,moreConsonants,1,less,less,less,less
42842,nerdy agent,[This is your communicator. You'll identify yo...,male,xXx,consonant,3,8,11,moreConsonants,5,more,more,less,more


In [29]:
def getASCII(name):
    asciiVal = 0
    for x in name:
        asciiVal += ord(x)-97
    return asciiVal/len(name)

final_data["ASCIIval"] = final_data["Name"].apply(getASCII)

In [30]:
def makeBinary(data, cols, zero, one):
    for col in cols:
        data[col] = np.where((data[col] == zero),0,data[col])        
        data[col] = np.where((data[col] == one),1,data[col])
    return data

final_data = makeBinary(final_data, ['VowelCountClass', 'MoreVowels', 'MoreDifferenceClass'], 'less', 'more')
final_data = makeBinary(final_data, ['EndsWith'], 'consonant', 'vowel')
final_data = makeBinary(final_data, ['Gender'], 'male', 'female')

In [40]:
def dropColumns(data, collist):
    data = data.drop(columns=collist)
    return data

final_data2 = dropColumns(final_data, ['vcCompare', 'ConsonantCountClass', 'Name', 'Movie'])

In [41]:
final_data2

Unnamed: 0,Dialogues,Gender,EndsWith,VowelCount,ConsonantCount,Length,vcDifference,VowelCountClass,MoreVowels,MoreDifferenceClass,ASCIIval
1,"[In the microwave., What's a synonym for throb...",1,0,2,4,6,2,0,0,1,11.500000
2,"[Drink up, sister.]",0,0,1,2,3,1,0,0,0,16.666667
3,"[Leave it, Why didn't we just read the Hardy B...",1,0,1,2,3,1,0,0,0,9.666667
4,"[Did you change your hair?, You might wanna th...",1,1,3,3,6,0,1,1,0,4.000000
5,"[ Michael, my brother, peace, Kat, my lady, yo...",0,0,2,3,5,1,0,0,0,7.600000
...,...,...,...,...,...,...,...,...,...,...,...
42839,"[Evening, Sam., Not a whole helluva lot. His f...",0,0,2,5,7,3,0,0,1,8.714286
42840,"[I said, you got a problem, boy?, With what? I...",0,0,2,5,7,3,0,0,1,12.714286
42841,"[This pizda? Never seen him before., Cops. Lik...",0,1,2,3,5,1,0,0,0,13.800000
42842,[This is your communicator. You'll identify yo...,0,0,3,8,11,5,1,0,1,3.454545


# 2.2 Features based on Dialogue

In [43]:
from textblob import TextBlob

def getPolarityAndSubjectivity(dialogue):
    p=0
    s=0
    ld= len(dialogue)
    for d in dialogue:
        p += TextBlob(d).sentiment.polarity
        s +=  TextBlob(d).sentiment.subjectivity
    return p/ld, s/ld 

final_data2['PolarityBlob'] = final_data2['Dialogues'].apply(lambda x: getPolarityAndSubjectivity(x)[0])

In [44]:
final_data2

Unnamed: 0,Dialogues,Gender,EndsWith,VowelCount,ConsonantCount,Length,vcDifference,VowelCountClass,MoreVowels,MoreDifferenceClass,ASCIIval,PolarityBlob
1,"[In the microwave., What's a synonym for throb...",1,0,2,4,6,2,0,0,1,11.500000,0.000000
2,"[Drink up, sister.]",0,0,1,2,3,1,0,0,0,16.666667,0.000000
3,"[Leave it, Why didn't we just read the Hardy B...",1,0,1,2,3,1,0,0,0,9.666667,0.029439
4,"[Did you change your hair?, You might wanna th...",1,1,3,3,6,0,1,1,0,4.000000,0.036752
5,"[ Michael, my brother, peace, Kat, my lady, yo...",0,0,2,3,5,1,0,0,0,7.600000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
42839,"[Evening, Sam., Not a whole helluva lot. His f...",0,0,2,5,7,3,0,0,1,8.714286,0.059354
42840,"[I said, you got a problem, boy?, With what? I...",0,0,2,5,7,3,0,0,1,12.714286,0.060580
42841,"[This pizda? Never seen him before., Cops. Lik...",0,1,2,3,5,1,0,0,0,13.800000,0.055119
42842,[This is your communicator. You'll identify yo...,0,0,3,8,11,5,1,0,1,3.454545,0.000000


In [45]:
final_data2['SubjectivityBlob'] = final_data2['Dialogues'].apply(lambda x: getPolarityAndSubjectivity(x)[1])

In [46]:
def getSentiMentfromPolarity(dial, col, pol, val):
    dial[col]=''
    dial.loc[dial[pol]>val,col]=1
    dial.loc[dial[pol]==val,col]=0
    dial.loc[dial[pol]<val,col]=-1
    return dial

final_data2 = getSentiMentfromPolarity(final_data2, 'SentiBlob', 'PolarityBlob', 0)

In [48]:
final_data2.to_csv('../data/task1/features-mid.csv', index=False)

In [49]:
final_data2

Unnamed: 0,Dialogues,Gender,EndsWith,VowelCount,ConsonantCount,Length,vcDifference,VowelCountClass,MoreVowels,MoreDifferenceClass,ASCIIval,PolarityBlob,SubjectivityBlob,SentiBlob
1,"[In the microwave., What's a synonym for throb...",1,0,2,4,6,2,0,0,1,11.500000,0.000000,0.000000,0
2,"[Drink up, sister.]",0,0,1,2,3,1,0,0,0,16.666667,0.000000,0.000000,0
3,"[Leave it, Why didn't we just read the Hardy B...",1,0,1,2,3,1,0,0,0,9.666667,0.029439,0.251265,1
4,"[Did you change your hair?, You might wanna th...",1,1,3,3,6,0,1,1,0,4.000000,0.036752,0.225850,1
5,"[ Michael, my brother, peace, Kat, my lady, yo...",0,0,2,3,5,1,0,0,0,7.600000,0.000000,0.333333,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42839,"[Evening, Sam., Not a whole helluva lot. His f...",0,0,2,5,7,3,0,0,1,8.714286,0.059354,0.358953,1
42840,"[I said, you got a problem, boy?, With what? I...",0,0,2,5,7,3,0,0,1,12.714286,0.060580,0.514921,1
42841,"[This pizda? Never seen him before., Cops. Lik...",0,1,2,3,5,1,0,0,0,13.800000,0.055119,0.261689,1
42842,[This is your communicator. You'll identify yo...,0,0,3,8,11,5,1,0,1,3.454545,0.000000,0.000000,0


In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def getPolarityVader(dialogue):
    c=0
    for d in dialogue:
        c+= analyzer.polarity_scores(dialogue)['compound']
    return c/len(dialogue)

final_data2['VADER'] = final_data2['Dialogues'].apply(getPolarityVader)

In [None]:
final_data2 = getSentiMentfromPolarity(final_data2, 'SentiVADER', 'VADER', 0.05)

In [None]:
final_data2.to_csv('../data/task1/features-mid-2.csv', index=False)