# FinBERT BaseLine


## Modules

In [1]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

from pathlib import Path
import shutil

import os
import logging
import sys
sys.path.append('..')
from textblob import TextBlob
from pprint import pprint
from sklearn.metrics import classification_report

from transformers import AutoModelForSequenceClassification

from google.colab import drive
drive.mount('/content/drive')


#!cp /content/drive/finBERT-master/finBERT-master.finbert.finbert.py .
!cp -r /content/drive/MyDrive/finBERT-master/finBERT-master/finbert .
from finbert.finbert import *
import finbert.utils as tools


%load_ext autoreload
%autoreload 2

project_dir = Path.cwd().parent
pd.set_option('max_colwidth', -1)



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  pd.set_option('max_colwidth', -1)


In [2]:
logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                    datefmt = '%m/%d/%Y %H:%M:%S',
                    level = logging.ERROR)

## Prepare the model

In [3]:
lm_path = project_dir/'models'/'language_model'/'finbertTRC2'
cl_path = project_dir/'models'/'classifier_model'/'finbert-sentiment'
cl_data_path = project_dir/'data'/'sentiment_data'

###  Configuring training parameters

In [4]:
# Clean the cl_path
try:
    shutil.rmtree(cl_path)
except:
    pass

bertmodel = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")


config = Config(   data_dir=cl_data_path,
                   bert_model=bertmodel,
                   num_train_epochs=4,
                   model_dir=cl_path,
                   max_seq_length = 48,
                   train_batch_size = 32,
                   learning_rate = 2e-5,
                   output_mode='classification',
                   warm_up_proportion=0.2,
                   local_rank=-1,
                   discriminate=True,
                   gradual_unfreeze=True)

In [5]:
finbert = FinBert(config)
finbert.base_model = 'bert-base-uncased'
finbert.config.discriminate=True
finbert.config.gradual_unfreeze=True

In [6]:
finbert.prepare_model(label_list=['positive','negative','neutral'])

## Fine-tune the model

In [7]:
# Get the training examples
train_data = finbert.get_data('train')

In [8]:
model = finbert.create_the_model()



### Training

In [9]:
trained_model = finbert.train(train_examples = train_data, model = model)

Epoch:   0%|          | 0/4 [00:00<?, ?it/s]

Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Validation losses: [0.36050240580852216]
No best model found


Epoch:  25%|██▌       | 1/4 [00:21<01:03, 21.22s/it]

Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch:  50%|█████     | 2/4 [00:42<00:42, 21.50s/it]

Validation losses: [0.36050240580852216, 0.394982805618873]


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch:  75%|███████▌  | 3/4 [01:09<00:23, 23.80s/it]

Validation losses: [0.36050240580852216, 0.394982805618873, 0.3875008305678001]


Iteration:   0%|          | 0/109 [00:00<?, ?it/s]

Validating:   0%|          | 0/13 [00:00<?, ?it/s]

Epoch: 100%|██████████| 4/4 [01:37<00:00, 24.44s/it]

Validation losses: [0.36050240580852216, 0.394982805618873, 0.3875008305678001, 0.3869855168920297]





## Test the model

`bert.evaluate` outputs the DataFrame, where true labels and logit values for each example is given

In [10]:
test_data = finbert.get_data('test')

In [11]:
results = finbert.evaluate(examples=test_data, model=trained_model)

Testing:   0%|          | 0/31 [00:00<?, ?it/s]

### Prepare the classification report

In [12]:
def report(df, cols=['label','prediction','logits']):
    #print('Validation loss:{0:.2f}'.format(metrics['best_validation_loss']))
    cs = CrossEntropyLoss(weight=finbert.class_weights)
    loss = cs(torch.tensor(list(df[cols[2]])),torch.tensor(list(df[cols[0]])))
    print("Loss:{0:.2f}".format(loss))
    print("Accuracy:{0:.2f}".format((df[cols[0]] == df[cols[1]]).sum() / df.shape[0]) )
    print("\nClassification Report:")
    print(classification_report(df[cols[0]], df[cols[1]]))

In [13]:
results['prediction'] = results.predictions.apply(lambda x: np.argmax(x,axis=0))

In [14]:
report(results,cols=['labels','prediction','predictions'])

Loss:0.41
Accuracy:0.83

Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.84      0.79       267
           1       0.73      0.90      0.81       128
           2       0.91      0.82      0.86       575

    accuracy                           0.83       970
   macro avg       0.80      0.85      0.82       970
weighted avg       0.84      0.83      0.83       970



  loss = cs(torch.tensor(list(df[cols[2]])),torch.tensor(list(df[cols[0]])))


### Get predictions

With the `predict` function, given a piece of text, we split it into a list of sentences and then predict sentiment for each sentence. The output is written into a dataframe. Predictions are represented in three different columns:

1) `logit`: probabilities for each class

2) `prediction`: predicted label

3) `sentiment_score`: sentiment score calculated as: probability of positive - probability of negative

Below we analyze a paragraph taken out of [this](https://www.economist.com/finance-and-economics/2019/01/03/a-profit-warning-from-apple-jolts-markets) article from The Economist. For comparison purposes, we also put the sentiments predicted with TextBlob.
> Later that day Apple said it was revising down its earnings expectations in the fourth quarter of 2018, largely because of lower sales and signs of economic weakness in China. The news rapidly infected financial markets. Apple’s share price fell by around 7% in after-hours trading and the decline was extended to more than 10% when the market opened. The dollar fell by 3.7% against the yen in a matter of minutes after the announcement, before rapidly recovering some ground. Asian stockmarkets closed down on January 3rd and European ones opened lower. Yields on government bonds fell as investors fled to the traditional haven in a market storm.

In [15]:
text = "Later that day Apple said it was revising down its earnings expectations in \
the fourth quarter of 2018, largely because of lower sales and signs of economic weakness in China. \
The news rapidly infected financial markets. Apple’s share price fell by around 7% in after-hours \
trading and the decline was extended to more than 10% when the market opened. The dollar fell \
by 3.7% against the yen in a matter of minutes after the announcement, before rapidly recovering \
some ground. Asian stockmarkets closed down on January 3rd and European ones opened lower. \
Yields on government bonds fell as investors fled to the traditional haven in a market storm."

In [16]:
cl_path = project_dir/'models'/'classifier_model'/'finbert-sentiment'
model = AutoModelForSequenceClassification.from_pretrained(cl_path, cache_dir=None, num_labels=3)

In [17]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [18]:
result = predict(text,model)

In [19]:
blob = TextBlob(text)
result['textblob_prediction'] = [sentence.sentiment.polarity for sentence in blob.sentences]
result

Unnamed: 0,sentence,logit,prediction,sentiment_score,textblob_prediction
0,"Later that day Apple said it was revising down its earnings expectations in the fourth quarter of 2018, largely because of lower sales and signs of economic weakness in China.","[0.001831928, 0.98960215, 0.0085659465]",negative,-0.98777,0.051746
1,The news rapidly infected financial markets.,"[0.0040112287, 0.9658445, 0.030144237]",negative,-0.961833,0.0
2,Apple’s share price fell by around 7% in after-hours trading and the decline was extended to more than 10% when the market opened.,"[0.0017531227, 0.99091655, 0.00733037]",negative,-0.989163,0.5
3,"The dollar fell by 3.7% against the yen in a matter of minutes after the announcement, before rapidly recovering some ground.","[0.012198663, 0.98047537, 0.0073259403]",negative,-0.968277,0.0
4,Asian stockmarkets closed down on January 3rd and European ones opened lower.,"[0.0018882557, 0.99099207, 0.0071196854]",negative,-0.989104,-0.051111
5,Yields on government bonds fell as investors fled to the traditional haven in a market storm.,"[0.0024789155, 0.98728, 0.01024102]",negative,-0.984801,0.0


In [20]:
print(f'Average sentiment is %.2f.' % (result.sentiment_score.mean()))

Average sentiment is -0.98.


Here is another example

In [21]:
text2 = "Shares in the spin-off of South African e-commerce group Naspers surged more than 25% \
in the first minutes of their market debut in Amsterdam on Wednesday. Bob van Dijk, CEO of \
Naspers and Prosus Group poses at Amsterdam's stock exchange, as Prosus begins trading on the \
Euronext stock exchange in Amsterdam, Netherlands, September 11, 2019. REUTERS/Piroschka van de Wouw \
Prosus comprises Naspers’ global empire of consumer internet assets, with the jewel in the crown a \
31% stake in Chinese tech titan Tencent. There is 'way more demand than is even available, so that’s \
good,' said the CEO of Euronext Amsterdam, Maurice van Tilburg. 'It’s going to be an interesting \
hour of trade after opening this morning.' Euronext had given an indicative price of 58.70 euros \
per share for Prosus, implying a market value of 95.3 billion euros ($105 billion). The shares \
jumped to 76 euros on opening and were trading at 75 euros at 0719 GMT."

In [22]:
result2 = predict(text2,model)
blob = TextBlob(text2)
result2['textblob_prediction'] = [sentence.sentiment.polarity for sentence in blob.sentences]

In [23]:
result2

Unnamed: 0,sentence,logit,prediction,sentiment_score,textblob_prediction
0,Shares in the spin-off of South African e-commerce group Naspers surged more than 25% in the first minutes of their market debut in Amsterdam on Wednesday.,"[0.9794534, 0.008575279, 0.011971273]",positive,0.970878,0.25
1,"Bob van Dijk, CEO of Naspers and Prosus Group poses at Amsterdam's stock exchange, as Prosus begins trading on the Euronext stock exchange in Amsterdam, Netherlands, September 11, 2019.","[0.012840262, 0.010072946, 0.97708684]",neutral,0.002767,0.0
2,"REUTERS/Piroschka van de Wouw Prosus comprises Naspers’ global empire of consumer internet assets, with the jewel in the crown a 31% stake in Chinese tech titan Tencent.","[0.04867328, 0.0034167252, 0.94790995]",neutral,0.045257,0.0
3,"There is 'way more demand than is even available, so that’s good,' said the CEO of Euronext Amsterdam, Maurice van Tilburg.","[0.97567016, 0.006590482, 0.017739302]",positive,0.96908,0.533333
4,'It’s going to be an interesting hour of trade after opening this morning.',"[0.9220724, 0.009341237, 0.06858641]",positive,0.912731,0.5
5,"Euronext had given an indicative price of 58.70 euros per share for Prosus, implying a market value of 95.3 billion euros ($105 billion).","[0.060572542, 0.0047025983, 0.93472487]",neutral,0.05587,0.0
6,The shares jumped to 76 euros on opening and were trading at 75 euros at 0719 GMT.,"[0.1863314, 0.0065774424, 0.8070911]",neutral,0.179754,0.0


In [24]:
print(f'Average sentiment is %.2f.' % (result2.sentiment_score.mean()))

Average sentiment is 0.45.


In [25]:
#!pip install tensorflow==2.14.0
#!pip install update urllib3

#!pip install textattack


from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
#model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")




In [26]:
import textattack
model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(trained_model, tokenizer)


##Generate attack

In [33]:
datalist_test = []
for data in test_data:
  if(data.label=='positive'):
      l=0

  elif(data.label=='negative'):
      l=1

  else:
      l=2


  datalist_test.append((data.text,l))
  #print(data.guid)
#print(datalist_test)
dataset_test = textattack.datasets.Dataset(datalist_test)
datalist_train = []
for data in train_data:
  if(data.label=='positive'):
      l=0

  elif(data.label=='negative'):
      l=1

  else:
      l=2


  datalist_train.append((data.text,l))
  #print(data.guid)
#print(datalist_test)
dataset_train = textattack.datasets.Dataset(datalist_train)

In [None]:
#dataset_D = textattack.datasets.HuggingFaceDataset("financial_phrasebank", split = "train" , subset="sentences_50agree",label_map={0:1,1:2,2:0})
# Attack 20 samples with CSV logging and checkpoint saved every 5 interval
attack = textattack.attack_recipes.PWWSRen2019.build(model_wrapper)
#print(dataset_D.output_column)
attack_args = textattack.AttackArgs(num_examples=len(train_data), log_to_csv="log.csv", checkpoint_interval=100, checkpoint_dir="checkpoints", disable_stdout=True)
attacker = textattack.Attacker(attack, dataset_train, attack_args)
train_attack = attacker.attack_dataset()

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
textattack: Unknown if model of class <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  weighted-saliency
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapWordNet
  (constraints): 
    (0): RepeatModification
    (1): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 76 / 15 / 9 / 100:   3%|▎         | 100/3488 [06:28<3:39:26,  3.89s/it]textattack: Saving checkpoint under "checkpoints/1702744968559.ta.chkpt" at 2023-12-16 16:42:48 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 150 / 35 / 15 / 200:   6%|▌         | 200/3488 [13:30<3:42:04,  4.05s/it]textattack: Saving checkpoint under "checkpoints/1702745390455.ta.chkpt" at 2023-12-16 16:49:50 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 225 / 56 / 19 / 300:   9%|▊         | 300/3488 [20:27<3:37:21,  4.09s/it]textattack: Saving checkpoint under "checkpoints/1702745807159.ta.chkpt" at 2023-12-16 16:56:47 after 300 attacks.







[Succeeded / Failed / Skipped / Total] 300 / 71 / 29 / 400:  11%|█▏        | 400/3488 [26:34<3:25:08,  3.99s/it]textattack: Saving checkpoint under "checkpoints/1702746174301.ta.chkpt" at 2023-12-16 17:02:54 after 400 attacks.







[Succeeded / Failed / Skipped / Total] 366 / 93 / 41 / 500:  14%|█▍        | 500/3488 [33:49<3:22:09,  4.06s/it]textattack: Saving checkpoint under "checkpoints/1702746609587.ta.chkpt" at 2023-12-16 17:10:09 after 500 attacks.







[Succeeded / Failed / Skipped / Total] 423 / 107 / 46 / 576:  17%|█▋        | 576/3488 [39:01<3:17:17,  4.06s/it]

In [32]:
#dataset_T = textattack.datasets.HuggingFaceDataset("financial_phrasebank", split = "testing" , subset="sentences_50agree",label_map={0:1,1:2,2:0})
# Attack 20 samples with CSV logging and checkpoint saved every 5 interval
#print(dataset_test.output_column)
attack = textattack.attack_recipes.PWWSRen2019.build(model_wrapper)

attack_args = textattack.AttackArgs(num_examples=len(test_data), log_to_csv="log_T.csv", checkpoint_interval=100, checkpoint_dir="checkpoints", disable_stdout=True)
attacker_T = textattack.Attacker(attack, dataset_test, attack_args)
attacker_T.attack_dataset()

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
textattack: Unknown if model of class <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.
textattack: Logging to CSV at path log_T.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  weighted-saliency
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapWordNet
  (constraints): 
    (0): RepeatModification
    (1): StopwordModification
  (is_black_box):  True
) 



[Succeeded / Failed / Skipped / Total] 69 / 16 / 15 / 100:  10%|█         | 100/970 [06:51<59:41,  4.12s/it]textattack: Saving checkpoint under "checkpoints/1702740835979.ta.chkpt" at 2023-12-16 15:33:55 after 100 attacks.







[Succeeded / Failed / Skipped / Total] 135 / 36 / 29 / 200:  21%|██        | 200/970 [12:49<49:22,  3.85s/it]textattack: Saving checkpoint under "checkpoints/1702741193760.ta.chkpt" at 2023-12-16 15:39:53 after 200 attacks.







[Succeeded / Failed / Skipped / Total] 204 / 52 / 44 / 300:  31%|███       | 300/970 [19:35<43:46,  3.92s/it]textattack: Saving checkpoint under "checkpoints/1702741600113.ta.chkpt" at 2023-12-16 15:46:40 after 300 attacks.







[Succeeded / Failed / Skipped / Total] 267 / 72 / 61 / 400:  41%|████      | 400/970 [25:50<36:48,  3.88s/it]textattack: Saving checkpoint under "checkpoints/1702741974318.ta.chkpt" at 2023-12-16 15:52:54 after 400 attacks.







[Succeeded / Failed / Skipped / Total] 327 / 91 / 82 / 500:  52%|█████▏    | 500/970 [31:11<29:19,  3.74s/it]textattack: Saving checkpoint under "checkpoints/1702742295603.ta.chkpt" at 2023-12-16 15:58:15 after 500 attacks.







[Succeeded / Failed / Skipped / Total] 392 / 114 / 94 / 600:  62%|██████▏   | 600/970 [38:03<23:27,  3.81s/it]textattack: Saving checkpoint under "checkpoints/1702742707391.ta.chkpt" at 2023-12-16 16:05:07 after 600 attacks.







[Succeeded / Failed / Skipped / Total] 451 / 135 / 114 / 700:  72%|███████▏  | 700/970 [44:13<17:03,  3.79s/it]textattack: Saving checkpoint under "checkpoints/1702743077308.ta.chkpt" at 2023-12-16 16:11:17 after 700 attacks.







[Succeeded / Failed / Skipped / Total] 519 / 150 / 131 / 800:  82%|████████▏ | 800/970 [50:42<10:46,  3.80s/it]textattack: Saving checkpoint under "checkpoints/1702743466586.ta.chkpt" at 2023-12-16 16:17:46 after 800 attacks.







[Succeeded / Failed / Skipped / Total] 585 / 168 / 147 / 900:  93%|█████████▎| 900/970 [57:25<04:28,  3.83s/it]textattack: Saving checkpoint under "checkpoints/1702743870224.ta.chkpt" at 2023-12-16 16:24:30 after 900 attacks.







[Succeeded / Failed / Skipped / Total] 628 / 184 / 158 / 970: 100%|██████████| 970/970 [1:01:51<00:00,  3.83s/it]


+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 628    |
| Number of failed attacks:     | 184    |
| Number of skipped attacks:    | 158    |
| Original accuracy:            | 83.71% |
| Accuracy under attack:        | 18.97% |
| Attack success rate:          | 77.34% |
| Average perturbed word %:     | 12.69% |
| Average num. words per input: | 21.23  |
| Avg num queries:              | 141.5  |
+-------------------------------+--------+





[<textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef51c297b0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef71d493c0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79f03960e950>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef1ff7fbe0>,
 <textattack.attack_results.failed_attack_result.FailedAttackResult at 0x79f03960d4b0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef1ffd4250>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef1ff7e7d0>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef1ff16a40>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef1e514370>,
 <textattack.attack_results.successful_attack_result.SuccessfulAttackResult at 0x79ef1ff7c4c0>,
 <textattack.attack_results.successful_attack_re