# Sentiment Analysis of Financial Text

In [21]:
import pandas as pd
import numpy as np
from pathlib import Path
import nltk

import matplotlib.pyplot as plt
plt.style.use('ggplot')

df = pd.read_csv(Path('input/fin-sent-data.csv'))
df

Unnamed: 0,Sentence,Sentiment
0,The GeoSolutions technology will leverage Bene...,positive
1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative
2,"For the last quarter of 2010 , Componenta 's n...",positive
3,According to the Finnish-Russian Chamber of Co...,neutral
4,The Swedish buyout firm has sold its remaining...,neutral
...,...,...
5837,RISING costs have forced packaging producer Hu...,negative
5838,Nordic Walking was first used as a summer trai...,neutral
5839,"According shipping company Viking Line , the E...",neutral
5840,"In the building and home improvement trade , s...",neutral


In [22]:
# General understanding of data distributino
df["Sentiment"].value_counts()

neutral     3130
positive    1852
negative     860
Name: Sentiment, dtype: int64

# Creating a dataframe providing polarity scores for sentences

In [23]:
#nltk.download('vader_lexicon')

from nltk.sentiment import SentimentIntensityAnalyzer

sia = SentimentIntensityAnalyzer()

In [24]:
# Run the polarity score on the entire dataset
df_length = len(df)
result = {}

for i in range(0, df_length):
    sentence = df['Sentence'][i]
    result[i] = sia.polarity_scores(sentence)

#adding scores to a dataframe (tranposing so it can be read properly)
scores_df = pd.DataFrame(result).T
scores_df

In [28]:
#adding a mergable column
scores_df['id'] = range(0, len(df))
df['id'] = range(0, len(df))
scores_df = scores_df.merge(df, how='left')
scores_df

Unnamed: 0,neg,neu,pos,compound,id,Sentence,Sentiment
0,0.000,0.847,0.153,0.5423,0,The GeoSolutions technology will leverage Bene...,positive
1,0.167,0.833,0.000,-0.2023,1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative
2,0.064,0.856,0.080,0.1531,2,"For the last quarter of 2010 , Componenta 's n...",positive
3,0.000,1.000,0.000,0.0000,3,According to the Finnish-Russian Chamber of Co...,neutral
4,0.000,1.000,0.000,0.0000,4,The Swedish buyout firm has sold its remaining...,neutral
...,...,...,...,...,...,...,...
5837,0.239,0.761,0.000,-0.5267,5837,RISING costs have forced packaging producer Hu...,negative
5838,0.000,1.000,0.000,0.0000,5838,Nordic Walking was first used as a summer trai...,neutral
5839,0.000,0.870,0.130,0.2023,5839,"According shipping company Viking Line , the E...",neutral
5840,0.000,0.824,0.176,0.4588,5840,"In the building and home improvement trade , s...",neutral


# Determining words with intense effects on the polarity

In [34]:
neg_df = scores_df.loc[scores_df["Sentiment"] == 'negative', :]
neg_df

Unnamed: 0,neg,neu,pos,compound,id,Sentence,Sentiment
1,0.167,0.833,0.000,-0.2023,1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative
6,0.222,0.778,0.000,-0.2500,6,Shell's $70 Billion BG Deal Meets Shareholder ...,negative
7,0.065,0.847,0.088,0.2103,7,SSH COMMUNICATIONS SECURITY CORP STOCK EXCHANG...,negative
11,0.231,0.655,0.114,-0.4019,11,$SAP Q1 disappoints as #software licenses down...,negative
39,0.319,0.563,0.117,-0.6486,39,$AAPL afternoon selloff as usual will be bruta...,negative
...,...,...,...,...,...,...,...
5829,0.160,0.840,0.000,-0.2732,5829,"$SBUX down PM, from $DB downgrade.. PT cut fro...",negative
5831,0.064,0.936,0.000,-0.1280,5831,Finnish developer and manufacturer of mobile p...,negative
5832,0.000,0.805,0.195,0.4404,5832,Operating profit fell to EUR 38.1 mn from EUR ...,negative
5835,0.000,1.000,0.000,0.0000,5835,HSBC Says Unit to Book $585 Million Charge on ...,negative
