# Imports

In [22]:
import pandas as pd 
import json
import os
import numpy as np
import re
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from nltk.tokenize import punkt
import nltk
from nltk import word_tokenize
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from nltk.stem import WordNetLemmatizer 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.metrics import recall_score, accuracy_score, confusion_matrix
import string
from nltk.probability import FreqDist
import seaborn as sns
pd.options.display.max_rows = 999
pd.options.display.max_columns = 30
import lexnlp as lnlp
import importlib
import src
from src import *
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
importlib.reload(src)
%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/aidancoco/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# Importing Justice Words CSV

Here I used Pandas to import the csv I generated by filtering out the words said by justices to petitioners in the transcripts of the oral arguments, in my data cleaning notebook. 

In [23]:
df = pd.read_csv("../data/final_justice.csv")

In [24]:
df.text = df.text.astype(str) #making sure all of the text is string

In [25]:
df.head()#checking the DataFrame

Unnamed: 0,case,text,target,lib_or_con,majVotes
0,352us282,that instruction i take it in effect import in...,1,2.0,6
1,353us586,may i at this point ask to be declare inaudibl...,1,2.0,4
2,352us599,well of course of course you may yes now mr mr...,0,1.0,8
3,352us82,mr williams that do not mean that it be only a...,0,2.0,8
4,352us220,mr plauche you may proceed deny the united sta...,0,1.0,8


# Sentiment Analysis

Heres I used the sentiment intensity analyzer from nltk's vader package to analyze the sentiment of the words said by the justices in each case. I ranks the polarity of the text with positive scores being more positive and negative scores being more negative.

In [26]:
df['scores'] = df['text'].apply(lambda words: sid.polarity_scores(words))#applying the nltk vader corpus to determine sentiment

df.head()# seeing if it worked

Unnamed: 0,case,text,target,lib_or_con,majVotes,scores
0,352us282,that instruction i take it in effect import in...,1,2.0,6,"{'neg': 0.076, 'neu': 0.833, 'pos': 0.091, 'co..."
1,353us586,may i at this point ask to be declare inaudibl...,1,2.0,4,"{'neg': 0.072, 'neu': 0.876, 'pos': 0.053, 'co..."
2,352us599,well of course of course you may yes now mr mr...,0,1.0,8,"{'neg': 0.042, 'neu': 0.882, 'pos': 0.075, 'co..."
3,352us82,mr williams that do not mean that it be only a...,0,2.0,8,"{'neg': 0.109, 'neu': 0.838, 'pos': 0.054, 'co..."
4,352us220,mr plauche you may proceed deny the united sta...,0,1.0,8,"{'neg': 0.042, 'neu': 0.778, 'pos': 0.179, 'co..."


I wanted a single variable I could incorporate into my analysis so I made a compound score

In [27]:
df['compound']  = df['scores'].apply(lambda score_dict: score_dict['compound'])#using the scores to generate a single compound score

df.head()


Unnamed: 0,case,text,target,lib_or_con,majVotes,scores,compound
0,352us282,that instruction i take it in effect import in...,1,2.0,6,"{'neg': 0.076, 'neu': 0.833, 'pos': 0.091, 'co...",0.9808
1,353us586,may i at this point ask to be declare inaudibl...,1,2.0,4,"{'neg': 0.072, 'neu': 0.876, 'pos': 0.053, 'co...",-0.9909
2,352us599,well of course of course you may yes now mr mr...,0,1.0,8,"{'neg': 0.042, 'neu': 0.882, 'pos': 0.075, 'co...",0.9904
3,352us82,mr williams that do not mean that it be only a...,0,2.0,8,"{'neg': 0.109, 'neu': 0.838, 'pos': 0.054, 'co...",-0.8481
4,352us220,mr plauche you may proceed deny the united sta...,0,1.0,8,"{'neg': 0.042, 'neu': 0.778, 'pos': 0.179, 'co...",0.9995


I then simplified this even further with a positive or a negative categorization 

In [28]:
df['comp_score'] = df['compound'].apply(lambda c: 1 if c >=0 else 2)#making it so positive numbers get a 1 and negative numbers get a 2

df.head()


Unnamed: 0,case,text,target,lib_or_con,majVotes,scores,compound,comp_score
0,352us282,that instruction i take it in effect import in...,1,2.0,6,"{'neg': 0.076, 'neu': 0.833, 'pos': 0.091, 'co...",0.9808,1
1,353us586,may i at this point ask to be declare inaudibl...,1,2.0,4,"{'neg': 0.072, 'neu': 0.876, 'pos': 0.053, 'co...",-0.9909,2
2,352us599,well of course of course you may yes now mr mr...,0,1.0,8,"{'neg': 0.042, 'neu': 0.882, 'pos': 0.075, 'co...",0.9904,1
3,352us82,mr williams that do not mean that it be only a...,0,2.0,8,"{'neg': 0.109, 'neu': 0.838, 'pos': 0.054, 'co...",-0.8481,2
4,352us220,mr plauche you may proceed deny the united sta...,0,1.0,8,"{'neg': 0.042, 'neu': 0.778, 'pos': 0.179, 'co...",0.9995,1


In [32]:
df.to_csv("../data/justice_sentiment.csv")# exporting to a csv so I can incorporate this into my analysis