# Load the FinBERT Model

In [1]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.22.1-py3-none-any.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 2.1 MB/s 
Collecting huggingface-hub<1.0,>=0.9.0
  Downloading huggingface_hub-0.9.1-py3-none-any.whl (120 kB)
[K     |████████████████████████████████| 120 kB 45.8 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 42.8 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.9.1 tokenizers-0.12.1 transformers-4.22.1


In [2]:
from transformers import BertForSequenceClassification, BertTokenizer, pipeline

In [3]:
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

Downloading:   0%|          | 0.00/533 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/439M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

# Read our data

In [7]:
import pandas as pd

In [23]:
df_train_data = pd.read_csv('project2_training_data.txt', delimiter = '\n', header = None, names = ['sentence'])

In [24]:
df_train_labels = pd.read_csv('project2_training_data_labels.txt', delimiter = '\n', header = None, names = ['sentiment'])

In [25]:
print(len(df_train_data), len(df_train_labels))

1811 1811


In [26]:
df_train_data

Unnamed: 0,sentence
0,Merrill Lynch analyst Campbell Morgan upgraded...
1,Eriikka S+Âderstr+Âm has previously held sever...
2,The webcast may be followed online on the comp...
3,"Typical end-uses include roof structures , flo..."
4,The sale will be finalized in September or Oct...
...,...
1806,With this appointment Kaupthing Bank aims to f...
1807,Jon Risfelt has previously held operational ex...
1808,The group intends to relocate warehouse and of...
1809,"The contract includes software licences , appl..."


In [27]:
df_train_labels

Unnamed: 0,sentiment
0,positive
1,neutral
2,neutral
3,neutral
4,neutral
...,...
1806,positive
1807,neutral
1808,neutral
1809,neutral


# Predict sentiment using FinBERT

In [28]:
nlp = pipeline("text-classification", model=finbert, tokenizer=tokenizer)

In [34]:
input_sentences = df_train_data['sentence'].values.tolist()

In [35]:
input_sentences

["Merrill Lynch analyst Campbell Morgan upgraded his recommendation on PaperlinX from `` neutral '' to `` buy '' in May .",
 'Eriikka S+Âderstr+Âm has previously held several positions in finance and control at Nokia Networks including acting as the Business Group Controller and having the corporate controller position at Nokia Siemens Networks .',
 'The webcast may be followed online on the company website at www.ruukki.com/investors .',
 'Typical end-uses include roof structures , floorings , walls and ceilings , non-visible structures in vehicles , packaging and boxes , construction site structures , fencing and shelters , and formwork with a limited number of concrete pourings .',
 'The sale will be finalized in September or October , the company said .',
 'Finnish steel maker Rautaruukki Oyj ( Ruukki ) said on July 7 , 2008 that it won a 9.0 mln euro ( $ 14.1 mln ) contract to supply and install steel superstructures for Partihallsforbindelsen bridge project in Gothenburg , wester

In [42]:
results = nlp(input_sentences)

In [49]:
pred_labels = []
for i in range(df_train_data.shape[0]):
    pred_labels.append(results[i]['label'])

In [57]:
pred_labels = [x.lower() for x in pred_labels]

In [59]:
set(pred_labels)

{'negative', 'neutral', 'positive'}

# Evaluation of FinBERT

In [67]:
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

In [68]:
print(classification_report(df_train_labels['sentiment'], pred_labels))

              precision    recall  f1-score   support

    negative       0.88      0.92      0.90       242
     neutral       0.91      0.98      0.94      1113
    positive       0.95      0.74      0.83       456

    accuracy                           0.91      1811
   macro avg       0.91      0.88      0.89      1811
weighted avg       0.92      0.91      0.91      1811



In [69]:
print('Micro avg Precision:',precision_score(df_train_labels['sentiment'], pred_labels, average='micro'))
print('Micro avg Recall:',recall_score(df_train_labels['sentiment'], pred_labels, average='micro'))
print('Micro avg F1 score',f1_score(df_train_labels['sentiment'], pred_labels, average='micro'))

Micro avg Precision: 0.9133075648812811
Micro avg Recall: 0.9133075648812811
Micro avg F1 score 0.9133075648812811


# RESULT: FinBERT Micro Averaged Precision, Recall and F-measure is **0.91**.