In [None]:
# connect to Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
FOLDERNAME = 'Colab Notebooks'
%cd drive/MyDrive/$FOLDERNAME

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks


### Import packages

In [None]:
from pathlib import Path
import shutil
import os
import logging
import sys
sys.path.append('..')

In [None]:
from textblob import TextBlob
from pprint import pprint
from sklearn.metrics import classification_report

In [None]:
from transformers import AutoModelForSequenceClassification

from finbert import *
import utils as tools

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



In [None]:
import argparse
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split

### Build finbert model
Reference: https://github.com/ProsusAI/finBERT/blob/master/notebooks/finbert_training.ipynb

In [None]:
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.ERROR)

In [None]:
# path
project_dir = Path.cwd().parent
cl_path = project_dir/'Colab Notebooks'/'finbert-sentiment'
cl_data_path = project_dir/'Colab Notebooks'

In [None]:
# model
bertmodel = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased', cache_dir=None, num_labels=3)


config = Config(   data_dir=cl_data_path,
                   bert_model=bertmodel,
                   num_train_epochs=4,
                   model_dir=cl_path,
                   max_seq_length = 48,
                   train_batch_size = 32,
                   learning_rate = 2e-5,
                   output_mode='classification',
                   warm_up_proportion=0.2,
                   local_rank=-1,
                   discriminate=True,
                   gradual_unfreeze=True)

finbert = FinBert(config)
finbert.base_model = 'bert-base-uncased'
finbert.config.discriminate=True
finbert.config.gradual_unfreeze=True

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# label
finbert.prepare_model(label_list=['positive','negative','neutral'])

ValueError: Output directory (/content/drive/MyDrive/Colab Notebooks/finbert-sentiment) already exists and is not empty.

### Split data into train, validation and test datasets

In [None]:
data = pd.read_csv(os.path.join(cl_data_path, 'sentences_1.txt'), sep='.@', names=['text','label'], encoding_errors='ignore')

train, test = train_test_split(data, test_size=0.2, random_state=0)
train, valid = train_test_split(train, test_size=0.1, random_state=0)

train.to_csv('train.csv',sep='\t')
test.to_csv('test.csv',sep='\t')
valid.to_csv('validation.csv',sep='\t')

  data = pd.read_csv(os.path.join(cl_data_path, 'sentences_1.txt'), sep='.@', names=['text','label'], encoding_errors='ignore')


### Train the finbert model

In [None]:
# Get the training examples
train_data = finbert.get_data('train')

AttributeError: 'FinBert' object has no attribute 'processor'

In [None]:
model = finbert.create_the_model()



In [None]:
# train the model
trained_model = finbert.train(train_examples=train_data, model=model)

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Validation losses: [0.3569155748073871]
No best model found


Epoch:  25%|██▌       | 1/4 [12:56<38:50, 777.00s/it]

Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Validation losses: [0.3569155748073871, 0.33707844523283154]


Epoch:  50%|█████     | 2/4 [31:54<32:57, 988.86s/it]

Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Validation losses: [0.3569155748073871, 0.33707844523283154, 0.33707844523283154]


Epoch:  75%|███████▌  | 3/4 [57:38<20:42, 1242.55s/it]

Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Validation losses: [0.3569155748073871, 0.33707844523283154, 0.33707844523283154, 0.33707844523283154]


Epoch: 100%|██████████| 4/4 [1:26:26<00:00, 1296.56s/it]
  checkpoint = torch.load(self.config.model_dir / ('temporary' + str(best_model)))


### Read in Fed speeches and yield spread

In [None]:
fed = pd.read_csv('FED_speech.csv')
spread = pd.read_csv('yield_spread.csv')

# Ensure 'DATE' and 'date' are of the same type
fed['date'] = pd.to_datetime(fed['date'])
spread['DATE'] = pd.to_datetime(spread['DATE'])

# replace the null value with its previous row value
spread['T10Y2Y'] = spread['T10Y2Y'].replace('.', pd.NA).fillna(method='ffill')
spread['T10Y2Y'] = spread['T10Y2Y'].astype(float)

# Filter rows where the 'DATE' in spread matches the 'dates' in fed_sample
spread_filtered = spread[spread['DATE'].isin(fed['date'])]

# Filter rows in 'fed_sample' that have dates matching 'spread_filtered'
fed_filtered = fed[fed['date'].isin(spread_filtered['DATE'])]

  fed['date'] = pd.to_datetime(fed['date'])
  spread['T10Y2Y'] = spread['T10Y2Y'].replace('.', pd.NA).fillna(method='ffill')


In [None]:
# merge two dataframes
merged_df = pd.merge(fed_filtered, spread_filtered, left_on='date', right_on='DATE', how='inner')
merged_df = merged_df.drop('DATE', axis=1)

# filter data
merged_df = merged_df.loc[merged_df.date < datetime(2024, 10, 1)]
merged_df

Unnamed: 0,title,speaker,date,article,T10Y2Y
7,Economic Outlook,Chair Jerome H. Powell,2024-09-30,I have some brief comments on the economy and ...,0.15
8,Recent Views on Monetary Policy and the Econom...,Governor Michelle W. Bowman,2024-09-30,Good morning. I would like to thank the Georgi...,0.15
9,What Will Artificial Intelligence Mean for Ame...,Governor Lisa D. Cook,2024-09-26,I am grateful for the educational opportunitie...,0.19
10,Supporting Market Resilience and Financial Sta...,Vice Chair for Supervision Michael S. Barr,2024-09-26,"Thank you, and thank you for the opportunity t...",0.19
11,Opening Remarks,Chair Jerome H. Powell,2024-09-26,"Hello, everyone, and welcome to the 10th Annua...",0.19
...,...,...,...,...,...
335,The Digitalization of Payments and Currency: S...,Governor Lael Brainard,2020-02-05,I want to thank Darrell Duffie for inviting me...,0.22
336,"Spontaneity and Order: Transparency, Accountab...",Vice Chair for Supervision Randal K. Quarles,2020-01-17,It's a great pleasure to be with you today at ...,0.26
337,The Outlook for Housing,Governor Michelle W. Bowman,2020-01-16,Few sectors are as central to the success of o...,0.23
338,U.S. Economic Outlook and Monetary Policy,Vice Chair Richard H. Clarida,2020-01-09,Thank you for the opportunity to join you brig...,0.27


In [None]:
# speeches
fed_speeches = merged_df.article

### Predict sentiment using the model that was trained earlier

In [None]:
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
# import the model that was trained earlier
model = AutoModelForSequenceClassification.from_pretrained(cl_path, cache_dir=None, num_labels=3)

In [None]:
# results
model_results = []
model_sentiments = []

i = 0

# predict the speeches based on the model we trained earlier
for speech in fed_speeches:
  result = predict(speech, model)

  average_score = round(result.sentiment_score.mean(), 2)
  model_results.append(average_score)

  if average_score >= 0.05:
    model_sentiments.append('positive')
  elif average_score <= -0.05:
    model_sentiments.append('negative')
  else:
    model_sentiments.append('neutral')

  i += 1
  print(i)

  result = pd.concat([result, batch_result], ignore_index=True)


1


  result = pd.concat([result, batch_result], ignore_index=True)


2


  result = pd.concat([result, batch_result], ignore_index=True)


3


  result = pd.concat([result, batch_result], ignore_index=True)


4


  result = pd.concat([result, batch_result], ignore_index=True)


5


  result = pd.concat([result, batch_result], ignore_index=True)


6


  result = pd.concat([result, batch_result], ignore_index=True)


7


  result = pd.concat([result, batch_result], ignore_index=True)


8


  result = pd.concat([result, batch_result], ignore_index=True)


9


  result = pd.concat([result, batch_result], ignore_index=True)


10


  result = pd.concat([result, batch_result], ignore_index=True)


11


  result = pd.concat([result, batch_result], ignore_index=True)


12


  result = pd.concat([result, batch_result], ignore_index=True)


13


  result = pd.concat([result, batch_result], ignore_index=True)


14


  result = pd.concat([result, batch_result], ignore_index=True)


15


  result = pd.concat([result, batch_result], ignore_index=True)


16


  result = pd.concat([result, batch_result], ignore_index=True)


17


  result = pd.concat([result, batch_result], ignore_index=True)


18


  result = pd.concat([result, batch_result], ignore_index=True)


19


  result = pd.concat([result, batch_result], ignore_index=True)


20


  result = pd.concat([result, batch_result], ignore_index=True)


21


  result = pd.concat([result, batch_result], ignore_index=True)


22


  result = pd.concat([result, batch_result], ignore_index=True)


23


  result = pd.concat([result, batch_result], ignore_index=True)


24


  result = pd.concat([result, batch_result], ignore_index=True)


25


  result = pd.concat([result, batch_result], ignore_index=True)


26


  result = pd.concat([result, batch_result], ignore_index=True)


27


  result = pd.concat([result, batch_result], ignore_index=True)


28


  result = pd.concat([result, batch_result], ignore_index=True)


29


  result = pd.concat([result, batch_result], ignore_index=True)


30


  result = pd.concat([result, batch_result], ignore_index=True)


31


  result = pd.concat([result, batch_result], ignore_index=True)


32


  result = pd.concat([result, batch_result], ignore_index=True)


33


  result = pd.concat([result, batch_result], ignore_index=True)


34


  result = pd.concat([result, batch_result], ignore_index=True)


35


  result = pd.concat([result, batch_result], ignore_index=True)


36


  result = pd.concat([result, batch_result], ignore_index=True)


37


  result = pd.concat([result, batch_result], ignore_index=True)


38


  result = pd.concat([result, batch_result], ignore_index=True)


39


  result = pd.concat([result, batch_result], ignore_index=True)


40


  result = pd.concat([result, batch_result], ignore_index=True)


41


  result = pd.concat([result, batch_result], ignore_index=True)


42


  result = pd.concat([result, batch_result], ignore_index=True)


43


  result = pd.concat([result, batch_result], ignore_index=True)


44


  result = pd.concat([result, batch_result], ignore_index=True)


45


  result = pd.concat([result, batch_result], ignore_index=True)


46


  result = pd.concat([result, batch_result], ignore_index=True)


47


  result = pd.concat([result, batch_result], ignore_index=True)


48


  result = pd.concat([result, batch_result], ignore_index=True)


49


  result = pd.concat([result, batch_result], ignore_index=True)


50


  result = pd.concat([result, batch_result], ignore_index=True)


51


  result = pd.concat([result, batch_result], ignore_index=True)


52


  result = pd.concat([result, batch_result], ignore_index=True)


53


  result = pd.concat([result, batch_result], ignore_index=True)


54


  result = pd.concat([result, batch_result], ignore_index=True)


55


  result = pd.concat([result, batch_result], ignore_index=True)


56


  result = pd.concat([result, batch_result], ignore_index=True)


57


  result = pd.concat([result, batch_result], ignore_index=True)


58


  result = pd.concat([result, batch_result], ignore_index=True)


59


  result = pd.concat([result, batch_result], ignore_index=True)


60


  result = pd.concat([result, batch_result], ignore_index=True)


61


  result = pd.concat([result, batch_result], ignore_index=True)


62


  result = pd.concat([result, batch_result], ignore_index=True)


63


  result = pd.concat([result, batch_result], ignore_index=True)


64


  result = pd.concat([result, batch_result], ignore_index=True)


65


  result = pd.concat([result, batch_result], ignore_index=True)


66


  result = pd.concat([result, batch_result], ignore_index=True)


67


  result = pd.concat([result, batch_result], ignore_index=True)


68


  result = pd.concat([result, batch_result], ignore_index=True)


69


  result = pd.concat([result, batch_result], ignore_index=True)


70


  result = pd.concat([result, batch_result], ignore_index=True)


71


  result = pd.concat([result, batch_result], ignore_index=True)


72


  result = pd.concat([result, batch_result], ignore_index=True)


73


  result = pd.concat([result, batch_result], ignore_index=True)


74


  result = pd.concat([result, batch_result], ignore_index=True)


75


  result = pd.concat([result, batch_result], ignore_index=True)


76


  result = pd.concat([result, batch_result], ignore_index=True)


77


  result = pd.concat([result, batch_result], ignore_index=True)


78


  result = pd.concat([result, batch_result], ignore_index=True)


79


  result = pd.concat([result, batch_result], ignore_index=True)


80


  result = pd.concat([result, batch_result], ignore_index=True)


81


  result = pd.concat([result, batch_result], ignore_index=True)


82


  result = pd.concat([result, batch_result], ignore_index=True)


83


  result = pd.concat([result, batch_result], ignore_index=True)


84


  result = pd.concat([result, batch_result], ignore_index=True)


85


  result = pd.concat([result, batch_result], ignore_index=True)


86


  result = pd.concat([result, batch_result], ignore_index=True)


87


  result = pd.concat([result, batch_result], ignore_index=True)


88


  result = pd.concat([result, batch_result], ignore_index=True)


89


  result = pd.concat([result, batch_result], ignore_index=True)


90


  result = pd.concat([result, batch_result], ignore_index=True)


91


  result = pd.concat([result, batch_result], ignore_index=True)


92


  result = pd.concat([result, batch_result], ignore_index=True)


93


  result = pd.concat([result, batch_result], ignore_index=True)


94


  result = pd.concat([result, batch_result], ignore_index=True)


95


  result = pd.concat([result, batch_result], ignore_index=True)


96


  result = pd.concat([result, batch_result], ignore_index=True)


97


  result = pd.concat([result, batch_result], ignore_index=True)


98


  result = pd.concat([result, batch_result], ignore_index=True)


99


  result = pd.concat([result, batch_result], ignore_index=True)


100


  result = pd.concat([result, batch_result], ignore_index=True)


101


  result = pd.concat([result, batch_result], ignore_index=True)


102


  result = pd.concat([result, batch_result], ignore_index=True)


103


  result = pd.concat([result, batch_result], ignore_index=True)


104


  result = pd.concat([result, batch_result], ignore_index=True)


105


  result = pd.concat([result, batch_result], ignore_index=True)


106


  result = pd.concat([result, batch_result], ignore_index=True)


107


  result = pd.concat([result, batch_result], ignore_index=True)


108


  result = pd.concat([result, batch_result], ignore_index=True)


109


  result = pd.concat([result, batch_result], ignore_index=True)


110


  result = pd.concat([result, batch_result], ignore_index=True)


111


  result = pd.concat([result, batch_result], ignore_index=True)


112


  result = pd.concat([result, batch_result], ignore_index=True)


113


  result = pd.concat([result, batch_result], ignore_index=True)


114


  result = pd.concat([result, batch_result], ignore_index=True)


115


  result = pd.concat([result, batch_result], ignore_index=True)


116


  result = pd.concat([result, batch_result], ignore_index=True)


117


  result = pd.concat([result, batch_result], ignore_index=True)


118


  result = pd.concat([result, batch_result], ignore_index=True)


119


  result = pd.concat([result, batch_result], ignore_index=True)


120


  result = pd.concat([result, batch_result], ignore_index=True)


121


  result = pd.concat([result, batch_result], ignore_index=True)


122


  result = pd.concat([result, batch_result], ignore_index=True)


123


  result = pd.concat([result, batch_result], ignore_index=True)


124


  result = pd.concat([result, batch_result], ignore_index=True)


125


  result = pd.concat([result, batch_result], ignore_index=True)


126


  result = pd.concat([result, batch_result], ignore_index=True)


127


  result = pd.concat([result, batch_result], ignore_index=True)


128


  result = pd.concat([result, batch_result], ignore_index=True)


129


  result = pd.concat([result, batch_result], ignore_index=True)


130


  result = pd.concat([result, batch_result], ignore_index=True)


131


  result = pd.concat([result, batch_result], ignore_index=True)


132


  result = pd.concat([result, batch_result], ignore_index=True)


133


  result = pd.concat([result, batch_result], ignore_index=True)


134


  result = pd.concat([result, batch_result], ignore_index=True)


135


  result = pd.concat([result, batch_result], ignore_index=True)


136


  result = pd.concat([result, batch_result], ignore_index=True)


137


  result = pd.concat([result, batch_result], ignore_index=True)


138


  result = pd.concat([result, batch_result], ignore_index=True)


139


  result = pd.concat([result, batch_result], ignore_index=True)


140


  result = pd.concat([result, batch_result], ignore_index=True)


141


  result = pd.concat([result, batch_result], ignore_index=True)


142


  result = pd.concat([result, batch_result], ignore_index=True)


143


  result = pd.concat([result, batch_result], ignore_index=True)


144


  result = pd.concat([result, batch_result], ignore_index=True)


145


  result = pd.concat([result, batch_result], ignore_index=True)


146


  result = pd.concat([result, batch_result], ignore_index=True)


147


  result = pd.concat([result, batch_result], ignore_index=True)


148


  result = pd.concat([result, batch_result], ignore_index=True)


149


  result = pd.concat([result, batch_result], ignore_index=True)


150


  result = pd.concat([result, batch_result], ignore_index=True)


151


  result = pd.concat([result, batch_result], ignore_index=True)


152


  result = pd.concat([result, batch_result], ignore_index=True)


153


  result = pd.concat([result, batch_result], ignore_index=True)


154


  result = pd.concat([result, batch_result], ignore_index=True)


155


  result = pd.concat([result, batch_result], ignore_index=True)


156


  result = pd.concat([result, batch_result], ignore_index=True)


157


  result = pd.concat([result, batch_result], ignore_index=True)


158


  result = pd.concat([result, batch_result], ignore_index=True)


159


  result = pd.concat([result, batch_result], ignore_index=True)


160


  result = pd.concat([result, batch_result], ignore_index=True)


161


  result = pd.concat([result, batch_result], ignore_index=True)


162


  result = pd.concat([result, batch_result], ignore_index=True)


163


  result = pd.concat([result, batch_result], ignore_index=True)


164


  result = pd.concat([result, batch_result], ignore_index=True)


165


  result = pd.concat([result, batch_result], ignore_index=True)


166


  result = pd.concat([result, batch_result], ignore_index=True)


167


  result = pd.concat([result, batch_result], ignore_index=True)


168


  result = pd.concat([result, batch_result], ignore_index=True)


169


  result = pd.concat([result, batch_result], ignore_index=True)


170


  result = pd.concat([result, batch_result], ignore_index=True)


171


  result = pd.concat([result, batch_result], ignore_index=True)


172


  result = pd.concat([result, batch_result], ignore_index=True)


173


  result = pd.concat([result, batch_result], ignore_index=True)


174


  result = pd.concat([result, batch_result], ignore_index=True)


175


  result = pd.concat([result, batch_result], ignore_index=True)


176


  result = pd.concat([result, batch_result], ignore_index=True)


177


  result = pd.concat([result, batch_result], ignore_index=True)


178


  result = pd.concat([result, batch_result], ignore_index=True)


179


  result = pd.concat([result, batch_result], ignore_index=True)


180


  result = pd.concat([result, batch_result], ignore_index=True)


181


  result = pd.concat([result, batch_result], ignore_index=True)


182


  result = pd.concat([result, batch_result], ignore_index=True)


183


  result = pd.concat([result, batch_result], ignore_index=True)


184


  result = pd.concat([result, batch_result], ignore_index=True)


185


  result = pd.concat([result, batch_result], ignore_index=True)


186


  result = pd.concat([result, batch_result], ignore_index=True)


187


  result = pd.concat([result, batch_result], ignore_index=True)


188


  result = pd.concat([result, batch_result], ignore_index=True)


189


  result = pd.concat([result, batch_result], ignore_index=True)


190


  result = pd.concat([result, batch_result], ignore_index=True)


191


  result = pd.concat([result, batch_result], ignore_index=True)


192


  result = pd.concat([result, batch_result], ignore_index=True)


193


  result = pd.concat([result, batch_result], ignore_index=True)


194


  result = pd.concat([result, batch_result], ignore_index=True)


195


  result = pd.concat([result, batch_result], ignore_index=True)


196


  result = pd.concat([result, batch_result], ignore_index=True)


197


  result = pd.concat([result, batch_result], ignore_index=True)


198


  result = pd.concat([result, batch_result], ignore_index=True)


199


  result = pd.concat([result, batch_result], ignore_index=True)


200


  result = pd.concat([result, batch_result], ignore_index=True)


201


  result = pd.concat([result, batch_result], ignore_index=True)


202


  result = pd.concat([result, batch_result], ignore_index=True)


203


  result = pd.concat([result, batch_result], ignore_index=True)


204


  result = pd.concat([result, batch_result], ignore_index=True)


205


  result = pd.concat([result, batch_result], ignore_index=True)


206


  result = pd.concat([result, batch_result], ignore_index=True)


207


  result = pd.concat([result, batch_result], ignore_index=True)


208


  result = pd.concat([result, batch_result], ignore_index=True)


209


  result = pd.concat([result, batch_result], ignore_index=True)


210


  result = pd.concat([result, batch_result], ignore_index=True)


211


  result = pd.concat([result, batch_result], ignore_index=True)


212


  result = pd.concat([result, batch_result], ignore_index=True)


213


  result = pd.concat([result, batch_result], ignore_index=True)


214


  result = pd.concat([result, batch_result], ignore_index=True)


215


  result = pd.concat([result, batch_result], ignore_index=True)


216


  result = pd.concat([result, batch_result], ignore_index=True)


217


  result = pd.concat([result, batch_result], ignore_index=True)


218


  result = pd.concat([result, batch_result], ignore_index=True)


219


  result = pd.concat([result, batch_result], ignore_index=True)


220


  result = pd.concat([result, batch_result], ignore_index=True)


221


  result = pd.concat([result, batch_result], ignore_index=True)


222


  result = pd.concat([result, batch_result], ignore_index=True)


223


  result = pd.concat([result, batch_result], ignore_index=True)


224


  result = pd.concat([result, batch_result], ignore_index=True)


225


  result = pd.concat([result, batch_result], ignore_index=True)


226


  result = pd.concat([result, batch_result], ignore_index=True)


227


  result = pd.concat([result, batch_result], ignore_index=True)


228


  result = pd.concat([result, batch_result], ignore_index=True)


229


  result = pd.concat([result, batch_result], ignore_index=True)


230


  result = pd.concat([result, batch_result], ignore_index=True)


231


  result = pd.concat([result, batch_result], ignore_index=True)


232


  result = pd.concat([result, batch_result], ignore_index=True)


233


  result = pd.concat([result, batch_result], ignore_index=True)


234


  result = pd.concat([result, batch_result], ignore_index=True)


235


  result = pd.concat([result, batch_result], ignore_index=True)


236


  result = pd.concat([result, batch_result], ignore_index=True)


237


  result = pd.concat([result, batch_result], ignore_index=True)


238


  result = pd.concat([result, batch_result], ignore_index=True)


239


  result = pd.concat([result, batch_result], ignore_index=True)


240


  result = pd.concat([result, batch_result], ignore_index=True)


241


  result = pd.concat([result, batch_result], ignore_index=True)


242


  result = pd.concat([result, batch_result], ignore_index=True)


243


  result = pd.concat([result, batch_result], ignore_index=True)


244


  result = pd.concat([result, batch_result], ignore_index=True)


245


  result = pd.concat([result, batch_result], ignore_index=True)


246


  result = pd.concat([result, batch_result], ignore_index=True)


247


  result = pd.concat([result, batch_result], ignore_index=True)


248


  result = pd.concat([result, batch_result], ignore_index=True)


249


  result = pd.concat([result, batch_result], ignore_index=True)


250


  result = pd.concat([result, batch_result], ignore_index=True)


251


  result = pd.concat([result, batch_result], ignore_index=True)


252


  result = pd.concat([result, batch_result], ignore_index=True)


253


  result = pd.concat([result, batch_result], ignore_index=True)


254


  result = pd.concat([result, batch_result], ignore_index=True)


255


  result = pd.concat([result, batch_result], ignore_index=True)


256


  result = pd.concat([result, batch_result], ignore_index=True)


257


  result = pd.concat([result, batch_result], ignore_index=True)


258


  result = pd.concat([result, batch_result], ignore_index=True)


259


  result = pd.concat([result, batch_result], ignore_index=True)


260


  result = pd.concat([result, batch_result], ignore_index=True)


261


  result = pd.concat([result, batch_result], ignore_index=True)


262


  result = pd.concat([result, batch_result], ignore_index=True)


263


  result = pd.concat([result, batch_result], ignore_index=True)


264


  result = pd.concat([result, batch_result], ignore_index=True)


265


  result = pd.concat([result, batch_result], ignore_index=True)


266


  result = pd.concat([result, batch_result], ignore_index=True)


267


  result = pd.concat([result, batch_result], ignore_index=True)


268


  result = pd.concat([result, batch_result], ignore_index=True)


269


  result = pd.concat([result, batch_result], ignore_index=True)


270


  result = pd.concat([result, batch_result], ignore_index=True)


271


  result = pd.concat([result, batch_result], ignore_index=True)


272


  result = pd.concat([result, batch_result], ignore_index=True)


273


  result = pd.concat([result, batch_result], ignore_index=True)


274


  result = pd.concat([result, batch_result], ignore_index=True)


275


  result = pd.concat([result, batch_result], ignore_index=True)


276


  result = pd.concat([result, batch_result], ignore_index=True)


277


  result = pd.concat([result, batch_result], ignore_index=True)


278


  result = pd.concat([result, batch_result], ignore_index=True)


279


  result = pd.concat([result, batch_result], ignore_index=True)


280


  result = pd.concat([result, batch_result], ignore_index=True)


281


  result = pd.concat([result, batch_result], ignore_index=True)


282


  result = pd.concat([result, batch_result], ignore_index=True)


283


  result = pd.concat([result, batch_result], ignore_index=True)


284


  result = pd.concat([result, batch_result], ignore_index=True)


285


  result = pd.concat([result, batch_result], ignore_index=True)


286


  result = pd.concat([result, batch_result], ignore_index=True)


287


  result = pd.concat([result, batch_result], ignore_index=True)


288


  result = pd.concat([result, batch_result], ignore_index=True)


289


  result = pd.concat([result, batch_result], ignore_index=True)


290


  result = pd.concat([result, batch_result], ignore_index=True)


291


  result = pd.concat([result, batch_result], ignore_index=True)


292


  result = pd.concat([result, batch_result], ignore_index=True)


293


  result = pd.concat([result, batch_result], ignore_index=True)


294


  result = pd.concat([result, batch_result], ignore_index=True)


295


  result = pd.concat([result, batch_result], ignore_index=True)


296


  result = pd.concat([result, batch_result], ignore_index=True)


297


  result = pd.concat([result, batch_result], ignore_index=True)


298


  result = pd.concat([result, batch_result], ignore_index=True)


299


  result = pd.concat([result, batch_result], ignore_index=True)


300


  result = pd.concat([result, batch_result], ignore_index=True)


301


  result = pd.concat([result, batch_result], ignore_index=True)


302


  result = pd.concat([result, batch_result], ignore_index=True)


303


  result = pd.concat([result, batch_result], ignore_index=True)


304


  result = pd.concat([result, batch_result], ignore_index=True)


305


  result = pd.concat([result, batch_result], ignore_index=True)


306


  result = pd.concat([result, batch_result], ignore_index=True)


307


  result = pd.concat([result, batch_result], ignore_index=True)


308


  result = pd.concat([result, batch_result], ignore_index=True)


309


  result = pd.concat([result, batch_result], ignore_index=True)


310


  result = pd.concat([result, batch_result], ignore_index=True)


311


  result = pd.concat([result, batch_result], ignore_index=True)


312


  result = pd.concat([result, batch_result], ignore_index=True)


313


  result = pd.concat([result, batch_result], ignore_index=True)


314


  result = pd.concat([result, batch_result], ignore_index=True)


315


  result = pd.concat([result, batch_result], ignore_index=True)


316


  result = pd.concat([result, batch_result], ignore_index=True)


317


  result = pd.concat([result, batch_result], ignore_index=True)


318


  result = pd.concat([result, batch_result], ignore_index=True)


319


  result = pd.concat([result, batch_result], ignore_index=True)


320


  result = pd.concat([result, batch_result], ignore_index=True)


321


  result = pd.concat([result, batch_result], ignore_index=True)


322


  result = pd.concat([result, batch_result], ignore_index=True)


323


  result = pd.concat([result, batch_result], ignore_index=True)


324


  result = pd.concat([result, batch_result], ignore_index=True)


325


  result = pd.concat([result, batch_result], ignore_index=True)


326


  result = pd.concat([result, batch_result], ignore_index=True)


327


  result = pd.concat([result, batch_result], ignore_index=True)


328


  result = pd.concat([result, batch_result], ignore_index=True)


329


  result = pd.concat([result, batch_result], ignore_index=True)


330


  result = pd.concat([result, batch_result], ignore_index=True)


331


  result = pd.concat([result, batch_result], ignore_index=True)


332


  result = pd.concat([result, batch_result], ignore_index=True)


333


### Regression Analysis (finbert)

In [None]:
import statsmodels.api as sm

In [None]:
X = sm.add_constant(model_results)
Y = list(merged_df['T10Y2Y'])
tf_est = sm.OLS(Y, X)
tf_est2 = tf_est.fit()
tf_est2.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.013
Model:,OLS,Adj. R-squared:,0.01
Method:,Least Squares,F-statistic:,4.525
Date:,"Thu, 17 Oct 2024",Prob (F-statistic):,0.0341
Time:,23:52:10,Log-Likelihood:,-355.42
No. Observations:,340,AIC:,714.8
Df Residuals:,338,BIC:,722.5
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0295,0.046,0.640,0.522,-0.061,0.120
x1,0.5451,0.256,2.127,0.034,0.041,1.049

0,1,2,3
Omnibus:,40.379,Durbin-Watson:,0.041
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32.48
Skew:,0.663,Prob(JB):,8.85e-08
Kurtosis:,2.268,Cond. No.,6.92


### Textblob model

In [None]:
from textblob import TextBlob

In [None]:
# result
speech_avg_score = []

for speech in fed_speeches:
  sentence_count = 0
  score = 0

  blob = TextBlob(speech)

  for sentence in blob.sentences:
    sentence_count += 1
    score += sentence.sentiment.polarity

  speech_avg_score.append(score/sentence_count)

In [None]:
# regression analysis
X = sm.add_constant(speech_avg_score)
Y = list(merged_df['T10Y2Y'])
tf_est = sm.OLS(Y, X)
tf_est2 = tf_est.fit()
tf_est2.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.005
Model:,OLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,1.814
Date:,"Tue, 22 Oct 2024",Prob (F-statistic):,0.179
Time:,14:33:47,Log-Likelihood:,-356.77
No. Observations:,340,AIC:,717.5
Df Residuals:,338,BIC:,725.2
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.2336,0.115,2.025,0.044,0.007,0.461
x1,-1.2909,0.958,-1.347,0.179,-3.176,0.594

0,1,2,3
Omnibus:,50.456,Durbin-Watson:,0.023
Prob(Omnibus):,0.0,Jarque-Bera (JB):,32.103
Skew:,0.624,Prob(JB):,1.07e-07
Kurtosis:,2.158,Cond. No.,25.8


### Roberta

In [None]:
from transformers import pipeline

In [None]:
# model
classifier = pipeline('zero-shot-classification', model='roberta-large-mnli')
# label
candidate_labels = ['hawkish', 'dovish', 'neutral']

# result
speech_score = []
speech_sentiment = []
i = 0
for speech in fed_speeches:

  result = classifier(speech, candidate_labels)
  index_h = result['labels'].index('hawkish')
  score_h = result['scores'][index_h]
  index_d = result['labels'].index('dovish')
  score_d = result['scores'][index_d]
  score = score_h - score_d

  speech_score.append(score)

  if score > 0.05:
    speech_sentiment.append('hawkish')
  elif score < -0.05:
    speech_sentiment.append('dovish')
  else:
    speech_sentiment.append('neutral')

  i += 1
  print(i)

config.json:   0%|          | 0.00/688 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.43G [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-large-mnli were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277


In [None]:
# regression analysis
X = sm.add_constant(speech_score)
Y = list(merged_df['T10Y2Y'])
tf_est = sm.OLS(Y, X)
tf_est2 = tf_est.fit()
tf_est2.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.014
Method:,Least Squares,F-statistic:,5.658
Date:,"Wed, 23 Oct 2024",Prob (F-statistic):,0.0179
Time:,13:48:17,Log-Likelihood:,-354.86
No. Observations:,340,AIC:,713.7
Df Residuals:,338,BIC:,721.4
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.1410,0.044,3.220,0.001,0.055,0.227
x1,0.9580,0.403,2.379,0.018,0.166,1.750

0,1,2,3
Omnibus:,38.736,Durbin-Watson:,0.045
Prob(Omnibus):,0.0,Jarque-Bera (JB):,31.734
Skew:,0.657,Prob(JB):,1.29e-07
Kurtosis:,2.284,Cond. No.,10.8


In [None]:
# adding sentiment results into merged dataframe
merged_df['score_1'] = model_results
merged_df['score_2'] = speech_score

# export the dataframe to csv file
merged_df.to_csv('merged_df.csv', index=False)

### Adding independent variables (macro data)

In [None]:
# read in data
data = pd.read_csv('merged_df.csv')
data['date'] = pd.to_datetime(data['date'])

# read in macro data
jolts = pd.read_excel('GDPC1.xlsx', sheet_name='JOLTS')
infla = pd.read_excel('GDPC1.xlsx', sheet_name='Inflation')
nonfarm = pd.read_excel('GDPC1.xlsx', sheet_name='Nonfarm')
gdp = pd.read_excel('GDPC1.xlsx', sheet_name='GDP')

In [None]:
# merge macro data into a single dataframe
frames = [jolts, infla.Inflation, nonfarm.PAYEMS, gdp.GDP]
macro = pd.concat(frames, axis=1)
macro

Unnamed: 0,Date,JOLTS,Inflation,PAYEMS,GDP
0,2020-01-01,7170,2.5,152045,20693.238
1,2020-02-01,6974,2.3,152309,20147.697667
2,2020-03-01,5924,1.5,150898,19602.157333
3,2020-04-01,4637,0.3,130421,19056.617
4,2020-05-01,5593,0.1,133040,19554.009
5,2020-06-01,6156,0.6,137655,20051.401
6,2020-07-01,6491,1.0,139240,20548.793
7,2020-08-01,6369,1.3,140774,20623.092333
8,2020-09-01,6505,1.4,141820,20697.391667
9,2020-10-01,6838,1.2,142493,20771.691


In [None]:
# assign macro independent variables based on the corresponding dates
for i, row in data.iterrows():
    for _, row_2 in macro.iterrows():
        if row['date'].year == row_2['Date'].year and row['date'].month == row_2['Date'].month:
            data.at[i, 'jolts'] = row_2['JOLTS']
            data.at[i, 'infla'] = row_2['Inflation']
            data.at[i, 'nonfarm'] = row_2['PAYEMS']
            data.at[i, 'gdp'] = row_2['GDP']

In [None]:
data

Unnamed: 0,title,speaker,date,article,T10Y2Y,score_1,score_2,jolts,infla,nonfarm,gdp
0,Economic Outlook,Chair Jerome H. Powell,2024-09-30,I have some brief comments on the economy and ...,0.15,0.20,-0.127254,7443.0,2.4,159105.0,23386.248000
1,Recent Views on Monetary Policy and the Econom...,Governor Michelle W. Bowman,2024-09-30,Good morning. I would like to thank the Georgi...,0.15,-0.04,-0.042487,7443.0,2.4,159105.0,23386.248000
2,What Will Artificial Intelligence Mean for Ame...,Governor Lisa D. Cook,2024-09-26,I am grateful for the educational opportunitie...,0.19,0.14,0.091379,7443.0,2.4,159105.0,23386.248000
3,Supporting Market Resilience and Financial Sta...,Vice Chair for Supervision Michael S. Barr,2024-09-26,"Thank you, and thank you for the opportunity t...",0.19,0.24,-0.007030,7443.0,2.4,159105.0,23386.248000
4,Opening Remarks,Chair Jerome H. Powell,2024-09-26,"Hello, everyone, and welcome to the 10th Annua...",0.19,0.30,0.013738,7443.0,2.4,159105.0,23386.248000
...,...,...,...,...,...,...,...,...,...,...,...
328,The Digitalization of Payments and Currency: S...,Governor Lael Brainard,2020-02-05,I want to thank Darrell Duffie for inviting me...,0.22,0.12,0.019982,6974.0,2.3,152309.0,20147.697667
329,"Spontaneity and Order: Transparency, Accountab...",Vice Chair for Supervision Randal K. Quarles,2020-01-17,It's a great pleasure to be with you today at ...,0.26,0.09,-0.054675,7170.0,2.5,152045.0,20693.238000
330,The Outlook for Housing,Governor Michelle W. Bowman,2020-01-16,Few sectors are as central to the success of o...,0.23,0.06,0.040500,7170.0,2.5,152045.0,20693.238000
331,U.S. Economic Outlook and Monetary Policy,Vice Chair Richard H. Clarida,2020-01-09,Thank you for the opportunity to join you brig...,0.27,0.26,-0.218619,7170.0,2.5,152045.0,20693.238000


In [None]:
# regression analysis with finbert result and macro independent variables
X = data[['score_1', 'jolts', 'infla', 'nonfarm', 'gdp']]
X  = sm.add_constant(X)
Y = data['T10Y2Y']
tf_est = sm.OLS(Y, X).fit()
tf_est.summary()

0,1,2,3
Dep. Variable:,T10Y2Y,R-squared:,0.64
Model:,OLS,Adj. R-squared:,0.634
Method:,Least Squares,F-statistic:,116.0
Date:,"Wed, 30 Oct 2024",Prob (F-statistic):,2.88e-70
Time:,16:40:44,Log-Likelihood:,-183.88
No. Observations:,333,AIC:,379.8
Df Residuals:,327,BIC:,402.6
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,10.9187,0.615,17.760,0.000,9.709,12.128
score_1,0.2800,0.160,1.744,0.082,-0.036,0.596
jolts,0.0003,4.4e-05,6.682,0.000,0.000,0.000
infla,-0.1701,0.031,-5.432,0.000,-0.232,-0.108
nonfarm,-0.0001,9.74e-06,-10.264,0.000,-0.000,-8.08e-05
gdp,0.0001,6.37e-05,1.716,0.087,-1.6e-05,0.000

0,1,2,3
Omnibus:,8.802,Durbin-Watson:,0.095
Prob(Omnibus):,0.012,Jarque-Bera (JB):,5.601
Skew:,-0.156,Prob(JB):,0.0608
Kurtosis:,2.447,Cond. No.,4090000.0


In [None]:
# regression analysis with roberta result and macro independent variables
X = data[['score_2', 'jolts', 'infla', 'nonfarm', 'gdp']]
X  = sm.add_constant(X)
Y = data['T10Y2Y']
tf_est = sm.OLS(Y, X).fit()
tf_est.summary()

0,1,2,3
Dep. Variable:,T10Y2Y,R-squared:,0.639
Model:,OLS,Adj. R-squared:,0.634
Method:,Least Squares,F-statistic:,116.0
Date:,"Wed, 30 Oct 2024",Prob (F-statistic):,3.07e-70
Time:,16:43:09,Log-Likelihood:,-183.94
No. Observations:,333,AIC:,379.9
Df Residuals:,327,BIC:,402.7
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,11.0565,0.608,18.173,0.000,9.860,12.253
score_2,0.4314,0.253,1.706,0.089,-0.066,0.929
jolts,0.0003,4.41e-05,6.444,0.000,0.000,0.000
infla,-0.1645,0.031,-5.240,0.000,-0.226,-0.103
nonfarm,-0.0001,9.74e-06,-10.285,0.000,-0.000,-8.11e-05
gdp,0.0001,6.38e-05,1.728,0.085,-1.52e-05,0.000

0,1,2,3
Omnibus:,10.046,Durbin-Watson:,0.093
Prob(Omnibus):,0.007,Jarque-Bera (JB):,5.708
Skew:,-0.12,Prob(JB):,0.0576
Kurtosis:,2.405,Cond. No.,4040000.0
