In [None]:
# connect google drive folder if using colab, gpu will be needed for gpu enabled models

from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/SMU_MITB_NLP/project/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/SMU_MITB_NLP/project


In [None]:
# run cell to read file from data if using colab

import pandas as pd
labelled_sentences = pd.read_excel('FOMC Labelled Sentences.xlsx')
statements = pd.read_excel('FOMC Statements 1997-2023.xlsx', names=['Date','Statements'])

In [None]:
# copy of statements for original data only
labelled_sentences_og = labelled_sentences.copy()

In [None]:
# save copy of statements with remove label to test i.e. 6 classes
labelled_sentences_aug = labelled_sentences.copy()

class_counts_aug = labelled_sentences_aug['Score'].value_counts()
print(class_counts_aug)

0         57
-1        42
0.5       36
1         32
-0.5      21
Remove    12
Name: Score, dtype: int64


In [None]:
!pip install nlpaug # needed if using colab, else skip

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as nafc
from gensim.models import KeyedVectors

In [None]:
# original data
# leave out 20% of the data for testing
from sklearn.model_selection import train_test_split

# only keep sentence and score columns
labelled_sentences_og['Score'] = labelled_sentences_og['Score'].astype(str)
labelled_sentences_og = labelled_sentences_og[['Sentence', 'Score']]

train_og, test_og = train_test_split(labelled_sentences_og, test_size=0.2, random_state=23, stratify=labelled_sentences_og['Score'])

class_counts_train = train_og['Score'].value_counts()
class_counts_test = test_og['Score'].value_counts()
# Print the counts
print(class_counts_train)
print(class_counts_test)

0         45
-1        34
0.5       29
1         26
-0.5      17
Remove     9
Name: Score, dtype: int64
0         12
-1         8
0.5        7
1          6
-0.5       4
Remove     3
Name: Score, dtype: int64


In [None]:
# augmented data
# leave out 20% of the data for testing
from sklearn.model_selection import train_test_split

# only keep sentence and score columns

labelled_sentences_aug['Score'] = labelled_sentences_aug['Score'].astype(str)
labelled_sentences_aug = labelled_sentences_aug[['Sentence', 'Score']]

train_aug, test_aug = train_test_split(labelled_sentences_aug, test_size=0.2, random_state=23, stratify=labelled_sentences_aug['Score'])

class_counts_train_aug = train_aug['Score'].value_counts()
class_counts_test_aug = test_aug['Score'].value_counts()
# Print the counts
print(class_counts_train_aug)
print(class_counts_test_aug)

0         45
-1        34
0.5       29
1         26
-0.5      17
Remove     9
Name: Score, dtype: int64
0         12
-1         8
0.5        7
1          6
-0.5       4
Remove     3
Name: Score, dtype: int64


In [None]:
# Function to create new dataframe with augmented data and label by providing augmenter

def augment_dataset(data, augmenter, num_augmented=2):
    data_augmented = data.copy()
    augmented_data = []

    for index, row in data_augmented.iterrows():
        for _ in range(num_augmented):
            augmented_data.append([ ' '.join(augmenter.augment(row['Sentence'])), row['Score']])

    augmented_data_df = pd.DataFrame(augmented_data, columns=['Sentence', 'Score'])

    return augmented_data_df

def combine_datasets(original_data, augmented_data):
    # Combine the original dataset and the augmented dataset into a single dataframe
    combined_data = pd.concat([original_data, augmented_data], ignore_index=True)
    
    return combined_data

In [None]:
aug = naw.SynonymAug(aug_src='wordnet')
augmented_data = augment_dataset(train_aug, aug)
augmented_data

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Unnamed: 0,Sentence,Score
0,Statement Regarding Dealings in Bureau Mortgag...,0
1,Statement Regarding Transactions in Agency Mor...,0
2,"In particular, the Committee decided today to ...",-1
3,"In particular, the Committee decided today to ...",-1
4,Inflation is expected to remain low in the nea...,-0.5
...,...,...
315,In light of the current shortfall of inflation...,-0.5
316,The experience of the final several year has r...,0.5
317,The experience of the last various years has r...,0.5
318,Vote for the FOMC monetary policy action were:...,Remove


In [None]:
train_augmented_aug = combine_datasets(train_aug, augmented_data)
train_augmented_aug

Unnamed: 0,Sentence,Score
0,Statement Regarding Transactions in Agency Mor...,0
1,"In particular, the Committee decided today to ...",-1
2,Inflation is expected to remain low in the nea...,-0.5
3,In light of the current shortfall of inflation...,-0.5
4,"In addition, the Committee will continue reduc...",1
...,...,...
475,In light of the current shortfall of inflation...,-0.5
476,The experience of the final several year has r...,0.5
477,The experience of the last various years has r...,0.5
478,Vote for the FOMC monetary policy action were:...,Remove


In [None]:
!pip install transformers --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.30.0-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m65.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m125.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.utils import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup, RobertaForSequenceClassification, RobertaTokenizer
import pandas as pd
import numpy as np
from collections import defaultdict
import random
import time
import datetime
from sklearn.metrics import f1_score, balanced_accuracy_score, confusion_matrix, precision_score, recall_score, classification_report
from torch.nn import functional as F

In [None]:
class MCRoberta(RobertaForSequenceClassification):
    def train(self):
        self.training = True
    def eval(self):
        self.training = True

class RobertaSentimentClassifier:
    def __init__(self, model_name='roberta-base', num_labels=6, device='cuda', max_length=100):
        self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
        self.tokenizer = RobertaTokenizer.from_pretrained(model_name, do_lower_case=True)
        self.model = MCRoberta.from_pretrained(
            model_name,
            num_labels = num_labels,
            output_attentions = False,
            output_hidden_states = False
        ).to(self.device)
        self.max_length = max_length

    def tokenize(self, sentences):
        input_ids, attention_masks = [], []
        for sent in sentences:
            encoded_dict = self.tokenizer.encode_plus(
                sent,
                add_special_tokens=True,
                max_length=self.max_length,
                padding='max_length',
                truncation=True,
                return_attention_mask=True,
                return_tensors='pt',
            )
            input_ids.append(encoded_dict['input_ids'])
            attention_masks.append(encoded_dict['attention_mask'])
        input_ids = torch.cat(input_ids, dim=0)
        attention_masks = torch.cat(attention_masks, dim=0)
        return input_ids, attention_masks

    def prepare_data(self, df, test_size=0.2, batch_size=32):
        x_train, x_test, y_train, y_test = train_test_split(df['Sentence'], df['Score'], test_size=test_size, random_state=23)
        score_mapping = {'-1': 0, '-0.5': 1, '0': 2, '0.5': 3, '1': 4, 'Remove': 5}
        y_train = y_train.map(score_mapping)
        y_test = y_test.map(score_mapping)

        train_input_ids, train_attention_mask = self.tokenize(x_train.values.tolist())
        dev_input_ids, dev_attention_mask = self.tokenize(x_test.values.tolist())

        train_labels = torch.tensor(y_train.values)
        dev_labels = torch.tensor(y_test.values)

        train_data = TensorDataset(train_input_ids, train_attention_mask, train_labels)
        train_sampler = RandomSampler(train_data, replacement=False, generator=torch.Generator().manual_seed(23))
        self.train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
        dev_data = TensorDataset(dev_input_ids, dev_attention_mask, dev_labels)
        dev_sampler = SequentialSampler(dev_data)
        self.dev_dataloader = DataLoader(dev_data, sampler=dev_sampler, batch_size=batch_size)
        return self

    def train(self, epochs, learning_rate=5e-5, eps=1e-8):
        optimizer = AdamW(self.model.parameters(), lr=learning_rate, eps=eps)

        total_steps = len(self.train_dataloader) * epochs

        # Create the learning rate scheduler
        scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

        for epoch_i in range(0, epochs):
            print('\n======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
            t0 = time.time()

            total_train_loss = 0

            self.model.train()

            for step, batch in enumerate(self.train_dataloader):

                b_input_ids = batch[0].to(self.device)
                b_input_mask = batch[1].to(self.device)
                b_labels = batch[2].to(self.device)

                self.model.zero_grad()        

                outputs = self.model(b_input_ids, 
                                    token_type_ids=None, 
                                    attention_mask=b_input_mask, 
                                    labels=b_labels)

                loss = outputs.loss

                total_train_loss += loss.item()

                loss.backward()

                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

                optimizer.step()

                scheduler.step()

            avg_train_loss = total_train_loss / len(self.train_dataloader)            

            print("\n  Average training loss: {0:.2f}".format(avg_train_loss))
            print("  Training epoch took: {:}".format(self.format_time(time.time() - t0)))

            t0 = time.time()

            self.model.eval()

            total_eval_accuracy = 0
            total_eval_loss = 0

            correct_preds_per_class = defaultdict(int)
            total_preds_per_class = defaultdict(int)

            for batch in self.dev_dataloader:

                b_input_ids = batch[0].to(self.device)
                b_input_mask = batch[1].to(self.device)
                b_labels = batch[2].to(self.device)

                with torch.no_grad():        
                    outputs = self.model(b_input_ids, 
                                        token_type_ids=None, 
                                        attention_mask=b_input_mask,
                                        labels=b_labels)

                loss = outputs.loss
                logits = outputs.logits

                total_eval_loss += loss.item()

                logits = logits.detach().cpu().numpy()
                label_ids = b_labels.to('cpu').numpy()

                pred_flat = np.argmax(logits, axis=1).flatten()

                for label in np.unique(label_ids):
                    correct_preds_per_class[label] += np.sum((pred_flat == label_ids) & (label_ids == label))
                    total_preds_per_class[label] += np.sum(label_ids == label)

                total_eval_accuracy += self.flat_accuracy(logits, label_ids)

            avg_val_accuracy = total_eval_accuracy / len(self.dev_dataloader)
            print("\n  Accuracy: {0:.2f}".format(avg_val_accuracy))

            avg_val_loss = total_eval_loss / len(self.dev_dataloader)

            for label in total_preds_per_class.keys():
                accuracy = correct_preds_per_class[label] / total_preds_per_class[label]
                print(f"Accuracy for class {label}: {accuracy:.2f}")

            print("  Validation Loss: {0:.2f}".format(avg_val_loss))
            print("  Validation took: {:}".format(self.format_time(time.time() - t0)))

        print("\nTraining complete!")

        return self

    def predict(self, test):
        test_input_ids, test_attention_mask = self.tokenize(test['Sentence'].values.tolist())

        # Convert inputs to tensors
        test_inputs = torch.tensor(test_input_ids)
        test_masks = torch.tensor(test_attention_mask)

        # Create DataLoader for the test data
        batch_size = 32

        test_data = TensorDataset(test_inputs, test_masks)
        test_sampler = SequentialSampler(test_data)
        test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

        # For the MCRoberta model, we keep it in train mode to enable dropout
        self.model.train()

        predicted_scores = []
        uncertainties = []

        n_mc_samples = 30

        for batch in test_dataloader:
            batch = tuple(t.to(self.device) for t in batch)
            b_input_ids, b_input_mask = batch

            mc_samples = []

            for _ in range(n_mc_samples):
                with torch.no_grad():
                    outputs = self.model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
                    
                logits = outputs[0]
                logits = logits.detach().cpu().numpy()
                mc_samples.append(logits)

            mc_samples = np.array(mc_samples)
            predicted_score = mc_samples.mean(axis=0)
            uncertainty = mc_samples.std(axis=0)

            predicted_scores.extend(np.argmax(predicted_score, axis=1).flatten())
            uncertainties.extend(uncertainty.max(axis=1).flatten())

        score_mapping = {'-1': 0, '-0.5': 1, '0': 2, '0.5': 3, '1': 4, 'Remove': 5}
        inverse_class_mapping = {v: k for k, v in score_mapping.items()}

        # Inverse map the classes to their original values
        predicted_scores = np.vectorize(inverse_class_mapping.get)(predicted_scores)

        accuracy = np.mean(predicted_scores == test['Score'].values)
        print(f'Test Accuracy: {accuracy*100:.2f}%')

        # Add the original sentences, their predicted scores and uncertainties to a DataFrame
        predictions_df = pd.DataFrame({'Sentence': test['Sentence'], 'Ground Truth': test['Score'], 'Predicted_Score': predicted_scores, 'Uncertainty': uncertainties})

        return predictions_df  

    def predict_scores(self, test):
        test_input_ids, test_attention_mask = self.tokenize(test['Sentence'].values.tolist())

        # Convert inputs to tensors
        test_inputs = test_input_ids.clone().detach()
        test_masks = test_attention_mask.clone().detach()

        # Create DataLoader for the test data
        batch_size = 32

        test_data = TensorDataset(test_inputs, test_masks)
        test_sampler = SequentialSampler(test_data)
        test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)

        # For the MCRoberta model, we keep it in train mode to enable dropout
        self.model.train()

        predicted_scores = []
        uncertainties = []

        n_mc_samples = 30

        for batch in test_dataloader:
            batch = tuple(t.to(self.device) for t in batch)
            b_input_ids, b_input_mask = batch

            mc_samples = []

            for _ in range(n_mc_samples):
                with torch.no_grad():
                    outputs = self.model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
                    
                logits = outputs[0]
                logits = logits.detach().cpu().numpy()
                mc_samples.append(logits)

            mc_samples = np.array(mc_samples)
            predicted_score = mc_samples.mean(axis=0)
            uncertainty = mc_samples.std(axis=0)

            predicted_scores.extend(np.argmax(predicted_score, axis=1).flatten())
            uncertainties.extend(uncertainty.max(axis=1).flatten())

        score_mapping = {'-1': 0, '-0.5': 1, '0': 2, '0.5': 3, '1': 4, 'Remove': 5}
        inverse_class_mapping = {v: k for k, v in score_mapping.items()}

        # Inverse map the classes to their original values
        predicted_scores = np.vectorize(inverse_class_mapping.get)(predicted_scores)

        return predicted_scores 

    def compute_metrics(self, y_true, predicted_scores):
        # Convert arrays to string type
        y_true_str = y_true
        predicted_scores_str = predicted_scores

        # Compute F1 score and Balanced Accuracy
        f1 = f1_score(y_true_str, predicted_scores_str, average='weighted')
        balanced_accuracy = balanced_accuracy_score(y_true_str, predicted_scores_str)

        # Compute Precision and Recall
        precision = precision_score(y_true_str, predicted_scores_str, average='weighted')
        recall = recall_score(y_true_str, predicted_scores_str, average='weighted')

        # Compute Confusion Matrix
        confusion_mat = confusion_matrix(y_true_str, predicted_scores_str)

        # Print the scores
        print(f'F1 Score: {f1*100:.2f}%')
        print(f'Balanced Accuracy: {balanced_accuracy*100:.2f}%')
        print(f'Precision: {precision*100:.2f}%')
        print(f'Recall: {recall*100:.2f}%')
        
        return {
            "F1 Score": f1,
            "Balanced Accuracy": balanced_accuracy,
            "Precision": precision,
            "Recall": recall,
            "Confusion Matrix": confusion_mat
        }

    @staticmethod
    def flat_accuracy(preds, labels):
        pred_flat = np.argmax(preds, axis=1).flatten()
        labels_flat = labels.flatten()
        return np.sum(pred_flat == labels_flat) / len(labels_flat)

    @staticmethod
    def flat_accuracy_per_class(preds, labels):
        pred_flat = np.argmax(preds, axis=1).flatten()
        labels_flat = labels.flatten()

        correct_per_class = defaultdict(int)
        total_per_class = defaultdict(int)

        for pred, label in zip(pred_flat, labels_flat):
            if pred == label:
                correct_per_class[label] += 1
            total_per_class[label] += 1

        accuracies_per_class = {label: correct / total for label, correct, total in zip(correct_per_class.keys(), correct_per_class.values(), total_per_class.values())}
        return accuracies_per_class

    @staticmethod
    # Function to format time
    def format_time(elapsed):
        elapsed_rounded = int(round((elapsed)))
        return str(datetime.timedelta(seconds=elapsed_rounded))


# RoBERTa with original data

In [None]:
model_og = RobertaSentimentClassifier(model_name='roberta-base', num_labels=6, device='cuda', max_length=200)

model_og.prepare_data(train_og, test_size=0.2, batch_size=32)
model_og.train(epochs=18, learning_rate=5e-5, eps=1e-8)

Some weights of the model checkpoint at roberta-base were not used when initializing MCRoberta: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing MCRoberta from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MCRoberta from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MCRoberta were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infer



  Average training loss: 1.79
  Training epoch took: 0:00:04

  Accuracy: 0.25
Accuracy for class 0: 0.00
Accuracy for class 1: 0.00
Accuracy for class 2: 1.00
Accuracy for class 3: 0.00
Accuracy for class 4: 0.00
Accuracy for class 5: 0.00
  Validation Loss: 1.71
  Validation took: 0:00:00


  Average training loss: 1.67
  Training epoch took: 0:00:04

  Accuracy: 0.25
Accuracy for class 0: 0.00
Accuracy for class 1: 0.00
Accuracy for class 2: 1.00
Accuracy for class 3: 0.00
Accuracy for class 4: 0.00
Accuracy for class 5: 0.00
  Validation Loss: 1.65
  Validation took: 0:00:00


  Average training loss: 1.56
  Training epoch took: 0:00:04

  Accuracy: 0.31
Accuracy for class 0: 0.57
Accuracy for class 1: 0.00
Accuracy for class 2: 0.75
Accuracy for class 3: 0.00
Accuracy for class 4: 0.00
Accuracy for class 5: 0.00
  Validation Loss: 1.51
  Validation took: 0:00:00


  Average training loss: 1.38
  Training epoch took: 0:00:04

  Accuracy: 0.47
Accuracy for class 0: 0.57
Accuracy f

<__main__.RobertaSentimentClassifier at 0x7f8091828c70>

In [None]:
predictions = model_og.predict(test_og)

predictions

  test_inputs = torch.tensor(test_input_ids)
  test_masks = torch.tensor(test_attention_mask)


Test Accuracy: 60.00%


Unnamed: 0,Sentence,Ground Truth,Predicted_Score,Uncertainty
150,"In any event, the Committee will respond to ch...",0,0,0.214178
192,Economic growth appears to have been moderate ...,0,-1,0.664381
69,The Committee will carefully monitor actual an...,0,0,0.20021
81,Household spending and business fixed investme...,0.5,1,0.53772
23,\n\n_x000D_\nThe Federal Open Market Committee...,0,1,0.575925
51,"This policy, by keeping the Committee's holdin...",-0.5,-1,0.507897
126,In view of realized and expected labor market ...,1,1,0.452922
194,Although economic activity is likely to remain...,-1,-1,0.271582
131,The Committee will continue to assess the effe...,0,0,0.400178
82,When the Committee decides to begin to remove ...,0.5,0,0.478268


In [None]:
metrics = model_og.compute_metrics(test_og['Score'].values, predictions['Predicted_Score'].values)

F1 Score: 59.18%
Balanced Accuracy: 53.37%
Precision: 59.48%
Recall: 60.00%


In [None]:
# Dataset for whole FOMC statement 
statements_test = statements.copy()
statements_test

Unnamed: 0,Date,Statements
0,1997-03-25,_x000D_\n_x000D_\n\tThe Federal Open Market Co...
1,1998-09-29,_x000D_\nThe Federal Open Market Committee dec...
2,1998-10-15,_x000D_\nThe Federal Reserve today announced t...
3,1998-11-17,_x000D_\nThe Federal Reserve today announced t...
4,1999-05-18,_x000D_\nThe Federal Reserve released the foll...
...,...,...
201,2022-09-21,\nRecent indicators point to modest growth in ...
202,2022-11-02,\nRecent indicators point to modest growth in ...
203,2022-12-14,\nRecent indicators point to modest growth in ...
204,2023-02-01,\nRecent indicators point to modest growth in ...


In [None]:
from tqdm import tqdm
import numpy as np

progress_bar = tqdm(total=len(statements_test), ncols=75, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')

def cal_scores(x):
    progress_bar.update(1)
    return ([model_og.predict_scores(pd.DataFrame({'Sentence': [sentence.strip()]})) for sentence in x.split('.') if sentence.strip()])

# apply the function to the 'Statements' column
statements_test['Score'] = statements_test['Statements'].apply(cal_scores)

progress_bar.close()

100%|█████████████████████████████████████████████████████████████| 206/206


In [None]:
statements_test

Unnamed: 0,Date,Statements,Score
0,1997-03-25,_x000D_\n_x000D_\n\tThe Federal Open Market Co...,"[[1], [-1], [0.5], [0.5], [0], [0]]"
1,1998-09-29,_x000D_\nThe Federal Open Market Committee dec...,"[[1], [-1], [0], [0], [0.5], [0], [0]]"
2,1998-10-15,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-1], [0.5], [0], [0], [0], [0]]"
3,1998-11-17,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-0.5], [0.5], [0], [0], [0]]"
4,1999-05-18,_x000D_\nThe Federal Reserve released the foll...,"[[1], [0], [1], [-1], [0]]"
...,...,...,...
201,2022-09-21,\nRecent indicators point to modest growth in ...,"[[0], [1], [1], [-1], [1], [0], [0], [1], [1],..."
202,2022-11-02,\nRecent indicators point to modest growth in ...,"[[0], [1], [1], [-1], [1], [0], [0], [1], [-0...."
203,2022-12-14,\nRecent indicators point to modest growth in ...,"[[0.5], [1], [1], [-1], [1], [0], [0], [1], [-..."
204,2023-02-01,\nRecent indicators point to modest growth in ...,"[[0], [1], [-0.5], [-1], [0], [0], [1], [-0.5]..."


In [None]:
from scipy import stats
from collections import Counter
import statistics as s

def calculate_average(score_list):
    # try-except block to manage potential situations where the score_list is empty or contains only 'Remove'.
    try:
        # Filter out 'Remove' and then calculate mean.
        scores = [score[0] for score in score_list if score[0] != 'Remove']
        # Convert to float
        scores = [float(score) for score in scores]
        return np.mean(scores)
    except:
        return np.nan

def round_to_half(number):
    return round(number * 2) / 2

def calculate_mode(score_list):
    scores = [i for i in score_list if i != 'Remove']
    mode = stats.mode(scores)
    return float(mode[0][0])

def calculate_mode_average(score_list):
    scores = ([i[0] for i in score_list if i != 'Remove'])
    mode = s.multimode(scores)
    # return average if multiple modes found
    mode = [float(i) for i in mode]
    mode = sum(mode)/len(mode)

    return round(mode * 2) / 2 

statements_test['Score_mean'] = statements_test['Score'].apply(calculate_average)
statements_test['Score_mean_round'] = statements_test['Score_mean'].apply(round_to_half)
statements_test['Score_mode'] = statements_test['Score'].apply(calculate_mode)
statements_test['Score_mode_multiple'] = statements_test['Score'].apply(calculate_mode_average)
statements_test

  mode = stats.mode(scores)
  mode = stats.mode(scores)


Unnamed: 0,Date,Statements,Score,Score_mean,Score_mean_round,Score_mode,Score_mode_multiple
0,1997-03-25,_x000D_\n_x000D_\n\tThe Federal Open Market Co...,"[[1], [-1], [0.5], [0.5], [0], [0]]",0.166667,0.0,0.0,0.0
1,1998-09-29,_x000D_\nThe Federal Open Market Committee dec...,"[[1], [-1], [0], [0], [0.5], [0], [0]]",0.071429,0.0,0.0,0.0
2,1998-10-15,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-1], [0.5], [0], [0], [0], [0]]",-0.062500,0.0,0.0,0.0
3,1998-11-17,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-0.5], [0.5], [0], [0], [0]]",0.000000,0.0,0.0,0.0
4,1999-05-18,_x000D_\nThe Federal Reserve released the foll...,"[[1], [0], [1], [-1], [0]]",0.200000,0.0,0.0,0.5
...,...,...,...,...,...,...,...
201,2022-09-21,\nRecent indicators point to modest growth in ...,"[[0], [1], [1], [-1], [1], [0], [0], [1], [1],...",0.250000,0.0,0.0,0.0
202,2022-11-02,\nRecent indicators point to modest growth in ...,"[[0], [1], [1], [-1], [1], [0], [0], [1], [-0....",0.205882,0.0,0.0,0.0
203,2022-12-14,\nRecent indicators point to modest growth in ...,"[[0.5], [1], [1], [-1], [1], [0], [0], [1], [-...",0.250000,0.0,0.0,0.0
204,2023-02-01,\nRecent indicators point to modest growth in ...,"[[0], [1], [-0.5], [-1], [0], [0], [1], [-0.5]...",0.062500,0.0,0.0,0.0


In [None]:
statements_test.to_csv("statements_roberta_og.csv", index = False)

# RoBERTa with Augmentation of data

In [None]:
model_aug = RobertaSentimentClassifier(model_name='roberta-base', num_labels=6, device='cuda', max_length=200)

model_aug.prepare_data(train_aug, test_size=0.2, batch_size=32)
model_aug.train(epochs=14, learning_rate=5e-5, eps=1e-8)

Some weights of the model checkpoint at roberta-base were not used when initializing MCRoberta: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing MCRoberta from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MCRoberta from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of MCRoberta were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infer



  Average training loss: 1.77
  Training epoch took: 0:00:04

  Accuracy: 0.22
Accuracy for class 0: 0.57
Accuracy for class 1: 0.00
Accuracy for class 2: 0.38
Accuracy for class 3: 0.00
Accuracy for class 4: 0.00
Accuracy for class 5: 0.00
  Validation Loss: 1.67
  Validation took: 0:00:00


  Average training loss: 1.66
  Training epoch took: 0:00:04

  Accuracy: 0.22
Accuracy for class 0: 0.00
Accuracy for class 1: 0.00
Accuracy for class 2: 0.88
Accuracy for class 3: 0.00
Accuracy for class 4: 0.00
Accuracy for class 5: 0.00
  Validation Loss: 1.66
  Validation took: 0:00:00


  Average training loss: 1.59
  Training epoch took: 0:00:04

  Accuracy: 0.34
Accuracy for class 0: 0.57
Accuracy for class 1: 0.00
Accuracy for class 2: 0.88
Accuracy for class 3: 0.00
Accuracy for class 4: 0.00
Accuracy for class 5: 0.00
  Validation Loss: 1.60
  Validation took: 0:00:00


  Average training loss: 1.47
  Training epoch took: 0:00:04

  Accuracy: 0.34
Accuracy for class 0: 0.57
Accuracy f

<__main__.RobertaSentimentClassifier at 0x7f7dbed02200>

In [None]:
predictions_aug = model_aug.predict(test_aug)

predictions_aug

  test_inputs = torch.tensor(test_input_ids)
  test_masks = torch.tensor(test_attention_mask)


Test Accuracy: 70.00%


Unnamed: 0,Sentence,Ground Truth,Predicted_Score,Uncertainty
150,"In any event, the Committee will respond to ch...",0,0,0.352192
192,Economic growth appears to have been moderate ...,0,0.5,0.41172
69,The Committee will carefully monitor actual an...,0,0,0.237701
81,Household spending and business fixed investme...,0.5,0.5,0.387105
23,\n\n_x000D_\nThe Federal Open Market Committee...,0,-1,0.656016
51,"This policy, by keeping the Committee's holdin...",-0.5,-0.5,0.550546
126,In view of realized and expected labor market ...,1,0,0.736318
194,Although economic activity is likely to remain...,-1,-1,0.273897
131,The Committee will continue to assess the effe...,0,0,0.239589
82,When the Committee decides to begin to remove ...,0.5,0,0.347475


In [None]:
metrics_aug = model_aug.compute_metrics(test_aug['Score'].values, predictions['Predicted_Score'].values)

F1 Score: 59.18%
Balanced Accuracy: 53.37%
Precision: 59.48%
Recall: 60.00%


In [None]:
# Dataset for whole FOMC statement 
statements_test = statements.copy()
statements_test

Unnamed: 0,Date,Statements
0,1997-03-25,_x000D_\n_x000D_\n\tThe Federal Open Market Co...
1,1998-09-29,_x000D_\nThe Federal Open Market Committee dec...
2,1998-10-15,_x000D_\nThe Federal Reserve today announced t...
3,1998-11-17,_x000D_\nThe Federal Reserve today announced t...
4,1999-05-18,_x000D_\nThe Federal Reserve released the foll...
...,...,...
201,2022-09-21,\nRecent indicators point to modest growth in ...
202,2022-11-02,\nRecent indicators point to modest growth in ...
203,2022-12-14,\nRecent indicators point to modest growth in ...
204,2023-02-01,\nRecent indicators point to modest growth in ...


In [None]:
from tqdm import tqdm
import numpy as np

progress_bar = tqdm(total=len(statements_test), ncols=75, bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}')

def cal_scores(x):
    progress_bar.update(1)
    return ([model_og.predict_scores(pd.DataFrame({'Sentence': [sentence.strip()]})) for sentence in x.split('.') if sentence.strip()])

# apply the function to the 'Statements' column
statements_test['Score'] = statements_test['Statements'].apply(cal_scores)

progress_bar.close()

100%|█████████████████████████████████████████████████████████████| 206/206


In [None]:
statements_test

Unnamed: 0,Date,Statements,Score
0,1997-03-25,_x000D_\n_x000D_\n\tThe Federal Open Market Co...,"[[1], [-1], [0.5], [0.5], [0], [0]]"
1,1998-09-29,_x000D_\nThe Federal Open Market Committee dec...,"[[1], [-1], [0], [0], [0.5], [0], [0]]"
2,1998-10-15,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-1], [0.5], [0], [0], [0], [0]]"
3,1998-11-17,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-0.5], [0.5], [0], [0], [0]]"
4,1999-05-18,_x000D_\nThe Federal Reserve released the foll...,"[[1], [0], [1], [-1], [0]]"
...,...,...,...
201,2022-09-21,\nRecent indicators point to modest growth in ...,"[[0], [1], [-1], [-1], [1], [0], [0], [1], [1]..."
202,2022-11-02,\nRecent indicators point to modest growth in ...,"[[0.5], [1], [1], [-1], [1], [0], [0], [1], [-..."
203,2022-12-14,\nRecent indicators point to modest growth in ...,"[[0], [1], [1], [-1], [1], [0], [0], [1], [-0...."
204,2023-02-01,\nRecent indicators point to modest growth in ...,"[[0], [1], [-0.5], [-1], [0], [0], [1], [-0.5]..."


In [None]:
from scipy import stats
from collections import Counter
import statistics as s

def calculate_average(score_list):
    # try-except block to manage potential situations where the score_list is empty or contains only 'Remove'.
    try:
        # Filter out 'Remove' and then calculate mean.
        scores = [score[0] for score in score_list if score[0] != 'Remove']
        # Convert to float
        scores = [float(score) for score in scores]
        return np.mean(scores)
    except:
        return np.nan

def round_to_half(number):
    return round(number * 2) / 2

def calculate_mode(score_list):
    scores = [i for i in score_list if i != 'Remove']
    mode = stats.mode(scores)
    return float(mode[0][0])

def calculate_mode_average(score_list):
    scores = ([i[0] for i in score_list if i != 'Remove'])
    mode = s.multimode(scores)
    # return average if multiple modes found
    mode = [float(i) for i in mode]
    mode = sum(mode)/len(mode)

    return round(mode * 2) / 2 

statements_test['Score_mean'] = statements_test['Score'].apply(calculate_average)
statements_test['Score_mean_round'] = statements_test['Score_mean'].apply(round_to_half)
statements_test['Score_mode'] = statements_test['Score'].apply(calculate_mode)
statements_test['Score_mode_multiple'] = statements_test['Score'].apply(calculate_mode_average)
statements_test

  mode = stats.mode(scores)
  mode = stats.mode(scores)


Unnamed: 0,Date,Statements,Score,Score_mean,Score_mean_round,Score_mode,Score_mode_multiple
0,1997-03-25,_x000D_\n_x000D_\n\tThe Federal Open Market Co...,"[[1], [-1], [0.5], [0.5], [0], [0]]",0.166667,0.0,0.0,0.0
1,1998-09-29,_x000D_\nThe Federal Open Market Committee dec...,"[[1], [-1], [0], [0], [0.5], [0], [0]]",0.071429,0.0,0.0,0.0
2,1998-10-15,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-1], [0.5], [0], [0], [0], [0]]",-0.062500,0.0,0.0,0.0
3,1998-11-17,_x000D_\nThe Federal Reserve today announced t...,"[[1], [-1], [-0.5], [0.5], [0], [0], [0]]",0.000000,0.0,0.0,0.0
4,1999-05-18,_x000D_\nThe Federal Reserve released the foll...,"[[1], [0], [1], [-1], [0]]",0.200000,0.0,0.0,0.5
...,...,...,...,...,...,...,...
201,2022-09-21,\nRecent indicators point to modest growth in ...,"[[0], [1], [-1], [-1], [1], [0], [0], [1], [1]...",0.107143,0.0,0.0,0.0
202,2022-11-02,\nRecent indicators point to modest growth in ...,"[[0.5], [1], [1], [-1], [1], [0], [0], [1], [-...",0.250000,0.0,0.0,0.0
203,2022-12-14,\nRecent indicators point to modest growth in ...,"[[0], [1], [1], [-1], [1], [0], [0], [1], [-0....",0.218750,0.0,0.0,0.0
204,2023-02-01,\nRecent indicators point to modest growth in ...,"[[0], [1], [-0.5], [-1], [0], [0], [1], [-0.5]...",0.062500,0.0,0.0,0.0


In [None]:
statements_test.to_csv("statements_roberta_aug.csv", index = False)