In [1]:
!pip install vaderSentiment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 KB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [2]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
import pandas as pd

# VADER

In [3]:
def get_features_vader(vader, sentence):
  sentiment_dict = vader.polarity_scores(sentence)
  vader_neg = sentiment_dict['neg']
  vader_neu = sentiment_dict['neu']
  vader_pos = sentiment_dict['pos']
  vader_compound = sentiment_dict['compound']
  return vader_neg, vader_neu, vader_pos, vader_compound

In [4]:
vader = SentimentIntensityAnalyzer()

In [16]:
df = pd.read_excel('data_financial reports.xlsx')
df = df[['date', 'stock_name', 'report_type', 'report_section', 'content', 'Note']]
print(df.head())

        date stock_name report_type report_section  \
0 2018-02-01       AAPL         8-K       8-K|2.02   
1 2018-02-14       AAPL         8-K       8-K|5.07   
2 2018-05-01       AAPL         8-K       8-K|2.02   
3 2018-05-07       AAPL         8-K       8-K|8.01   
4 2018-05-08       AAPL         8-K       8-K|8.01   

                                             content Note  
0  On February 1, 2018, Apple Inc. (“Apple”) issu...  NaN  
1  The Annual Meeting of Shareholders of Apple In...  NaN  
2  On May 1, 2018, Apple Inc. (“Apple”) issued a ...  NaN  
3  On April 30, 2018, the Superior Court of the S...  NaN  
4  On May 7, 2018, Apple Inc. filed a Current Rep...  NaN  


In [17]:
df_vader = df.copy()
df_vader['vader_neg'], df_vader['vader_neu'], df_vader['vader_pos'], df_vader['vader_compound'] = \
  zip(*df_vader['content'].apply(lambda x: get_features_vader(vader, x)))

In [18]:
print(df_vader.head())

        date stock_name report_type report_section  \
0 2018-02-01       AAPL         8-K       8-K|2.02   
1 2018-02-14       AAPL         8-K       8-K|5.07   
2 2018-05-01       AAPL         8-K       8-K|2.02   
3 2018-05-07       AAPL         8-K       8-K|8.01   
4 2018-05-08       AAPL         8-K       8-K|8.01   

                                             content Note  vader_neg  \
0  On February 1, 2018, Apple Inc. (“Apple”) issu...  NaN      0.000   
1  The Annual Meeting of Shareholders of Apple In...  NaN      0.032   
2  On May 1, 2018, Apple Inc. (“Apple”) issued a ...  NaN      0.000   
3  On April 30, 2018, the Superior Court of the S...  NaN      0.013   
4  On May 7, 2018, Apple Inc. filed a Current Rep...  NaN      0.017   

   vader_neu  vader_pos  vader_compound  
0      0.963      0.037          0.5267  
1      0.885      0.083          0.7380  
2      0.963      0.037          0.5267  
3      0.889      0.098          0.8934  
4      0.961      0.022         

In [28]:
df_vader.to_excel('output_VADER.xlsx')

# TextBlob

In [20]:
def get_features_textblob(sentence):
  sentiment_res = TextBlob(sentence)
  tb_polarity = sentiment_res.sentiment.polarity
  tb_subjectivity = sentiment_res.sentiment.subjectivity
  return tb_polarity, tb_subjectivity

In [21]:
df_textblob = df.copy()
df_textblob['tb_polarity'], df_textblob['tb_subjectivity'] = \
  zip(*df_textblob['content'].apply(lambda x: get_features_textblob(x)))

In [22]:
print(df_textblob.head())

        date stock_name report_type report_section  \
0 2018-02-01       AAPL         8-K       8-K|2.02   
1 2018-02-14       AAPL         8-K       8-K|5.07   
2 2018-05-01       AAPL         8-K       8-K|2.02   
3 2018-05-07       AAPL         8-K       8-K|8.01   
4 2018-05-08       AAPL         8-K       8-K|8.01   

                                             content Note  tb_polarity  \
0  On February 1, 2018, Apple Inc. (“Apple”) issu...  NaN     0.035714   
1  The Annual Meeting of Shareholders of Apple In...  NaN    -0.014286   
2  On May 1, 2018, Apple Inc. (“Apple”) issued a ...  NaN     0.000000   
3  On April 30, 2018, the Superior Court of the S...  NaN     0.268750   
4  On May 7, 2018, Apple Inc. filed a Current Rep...  NaN     0.187500   

   tb_subjectivity  
0         0.308333  
1         0.170238  
2         0.260714  
3         0.568750  
4         0.575000  


In [30]:
df_textblob.to_excel('output_TextBlob.xlsx')

In [24]:
# s1 = '''The Company is party to various legal and regulatory proceedings. Some of these proceedings involve complex claims that are subject to substantial uncertainties and unascertainable damages. For those proceedings where a loss is determined to be only reasonably possible or probable but not estimable, the Company has disclosed the nature of the claim. Additionally, unless otherwise disclosed below with respect to these proceedings, the Company cannot provide an estimate of the possible loss or range of loss. Although the Company believes that it has strong defenses for the litigation and regulatory proceedings described below, it could, in the future, incur judgments or fines or enter into settlements of claims that could have a material adverse effect on the Company’s financial position, results of operations or cash flows. From time to time, the Company may engage in settlement discussions or mediations with respect to one or more of its outstanding litigation matters, either on its own behalf or collectively with other parties.
# The litigation accrual is an estimate and is based on management’s understanding of its litigation profile, the specifics of each case, advice of counsel to the extent appropriate and management’s best estimate of incurred loss as of the balance sheet date.
# The following table summarizes the activity related to accrued litigation:
# 2022		2021
#  	(in millions)
# Balance at beginning of period	$	983 			$	914 	
# Provision for uncovered legal matters	6 			4 	
# Provision for covered legal matters	885 			125 	
# Payments for legal matters	(418)			(60)	
# Balance at end of period	$	1,456 			$	983 	
#  '''

In [25]:
# s2 = '''he Company is party to various legal and regulatory proceedings. Some of these proceedings involve complex claims that are subject to substantial uncertainties and unascertainable damages. For those proceedings where a loss is determined to be only reasonably possible or probable but not estimable, the Company has disclosed the nature of the claim. Additionally, unless otherwise disclosed below with respect to these proceedings, the Company cannot provide an estimate of the possible loss or range of loss. Although the Company believes that it has strong defenses for the litigation and regulatory proceedings described below, it could, in the future, incur judgments or fines or enter into settlements of claims that could have a material adverse effect on the Company’s financial position, results of operations or cash flows. From time to time, the Company may engage in settlement discussions or mediations with respect to one or more of its outstanding litigation matters, either on its own behalf or collectively with other parties.
# The litigation accrual is an estimate and is based on management’s understanding of its litigation profile, the specifics of each case, advice of counsel to the extent appropriate and management’s best estimate of incurred loss as of the balance sheet date.
# The following table summarizes the activity related to accrued litigation:
# 2022		2021
#  	(in millions)
# Balance at beginning of period	$	100 			$	100 	
# Provision for uncovered legal matters	6 			4 	
# Provision for covered legal matters	300 			500 	
# Payments for legal matters	(418)			(60)	
# Balance at end of period	$	200 			$	200 	
# '''

In [26]:
# vader = SentimentIntensityAnalyzer()

In [27]:
# print(get_features_vader(vader, s1))
# print(get_features_vader(vader, s2))