In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import os
unlabelled_df = pd.read_csv('/content/gdrive/MyDrive/unlabeled_data_1106.csv')
train_df = pd.read_csv('/content/gdrive/MyDrive/augmented_labeled_data_1106.csv')
forex = pd.read_csv('/content/gdrive/MyDrive/sentiment_annotated_with_texts.csv')
test = pd.read_csv('/content/gdrive/MyDrive/test_1106.csv')


In [3]:
unlabelled_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33634 entries, 0 to 33633
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    33634 non-null  object
dtypes: object(1)
memory usage: 262.9+ KB


In [4]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6016 entries, 0 to 6015
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    6016 non-null   object
 1   label   6016 non-null   object
dtypes: object(2)
memory usage: 94.1+ KB


In [5]:
forex = forex[['text','true_sentiment']].copy()
forex.rename(columns={'true_sentiment': 'label'}, inplace=True)
forex['label'] = forex['label'].str.lower()
forex.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2291 entries, 0 to 2290
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    2291 non-null   object
 1   label   2291 non-null   object
dtypes: object(2)
memory usage: 35.9+ KB


In [6]:
train_df = train_df[['text']].copy()

In [7]:
train_augmented = pd.concat([train_df, unlabelled_df], axis=0)
train_augmented.reset_index(drop=True, inplace=True)
train_augmented = train_augmented.sample(frac=1).reset_index(drop=True).copy()
train_augmented.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39650 entries, 0 to 39649
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    39650 non-null  object
dtypes: object(1)
memory usage: 309.9+ KB


In [8]:
#train_augmented, valid_augmented = train_test_split(augmented_df, test_size=0.1, random_state=0)
#train_unlabelled, valid_unlabelled = train_test_split(unlabelled_df, test_size=0.1, random_state=0)


In [9]:
# List of variables to keep
#variables_to_keep = ['train_augmented', 'valid_augmented', 'train_unlabelled', 'valid_unlabelled', 'test', 'forex','unlabelled_df']
variables_to_keep = ['train_df', 'train_augmented', 'test', 'forex','unlabelled_df']

# Get the current global variables
current_globals = globals().copy()

# Delete variables that are not in the variables_to_keep list
for var_name in current_globals:
    if var_name not in variables_to_keep and not var_name.startswith('__'):
        del globals()[var_name]

# Verify remaining variables
print(globals().keys())

dict_keys(['__name__', '__doc__', '__package__', '__loader__', '__spec__', '__builtin__', '__builtins__', '__', '___', 'unlabelled_df', 'train_df', 'forex', 'test', 'train_augmented', 'current_globals', 'var_name'])


In [10]:
!pip install transformers[torch] accelerate -U
!pip install datasets
import torch
import random
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
from datasets import Dataset, load_metric
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import numpy as np
from torch.cuda.amp import GradScaler, autocast
from torch.autograd import Function
import torch.optim as optim

# Set seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# Define the label list
label_list = ['positive', 'negative', 'neutral']
label_to_id = {label: i for i, label in enumerate(label_list)}
id_to_label = {i: label for i, label in enumerate(label_list)}

# Load the pre-trained tokenizer and model
#model_name = "ProsusAI/finbert"
# teacher_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(label_list))
teacher_model = AutoModelForSequenceClassification.from_pretrained('/content/gdrive/MyDrive/finetuned_finbert_25052024_v6_GPT4o_rephrased_and_GPT_DFT_STL', num_labels=len(label_list))

# Load the pretrained TinyBERT model
student_model_name = "huawei-noah/TinyBERT_General_4L_312D"
tokenizer = AutoTokenizer.from_pretrained(student_model_name, do_lower_case=True)
student_model = AutoModelForSequenceClassification.from_pretrained(student_model_name, num_labels=len(label_list))

# Load your datasets
train_augmented_dataset = Dataset.from_pandas(train_augmented)
train_df_dataset = Dataset.from_pandas(train_df)
# valid_dataset = Dataset.from_pandas(valid_augmented)
# train_unlabelled = Dataset.from_pandas(train_unlabelled)
# valid_unlabelled = Dataset.from_pandas(valid_unlabelled)
test_dataset = Dataset.from_pandas(test)
forex_dataset = Dataset.from_pandas(forex)

# del globals()['train_augmented']
# del globals()['valid_augmented']
# del globals()['test']
# del globals()['forex']

# Tokenize the datasets
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=64)

train_augmented_dataset = train_augmented_dataset.map(tokenize_function, batched=True)
train_df_dataset = train_df_dataset.map(tokenize_function, batched=True)
# valid_dataset = valid_dataset.map(tokenize_function, batched=True)
# train_unlabelled = train_unlabelled.map(tokenize_function, batched=True)
# valid_unlabelled = valid_unlabelled.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)
forex_dataset = forex_dataset.map(tokenize_function, batched=True)

# Encode the labels
def encode_labels(examples):
    examples['label'] = [label_to_id[label] for label in examples['label']]
    return examples

# train_dataset = train_dataset.map(encode_labels, batched=True)
# valid_dataset = valid_dataset.map(encode_labels, batched=True)
test_dataset = test_dataset.map(encode_labels, batched=True)
forex_dataset = forex_dataset.map(encode_labels, batched=True)

# Set the format for PyTorch
train_augmented_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
train_df_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
# valid_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
# train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
# valid_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
# train_unlabelled.set_format(type='torch', columns=['input_ids', 'attention_mask'])
# valid_unlabelled.set_format(type='torch', columns=['input_ids', 'attention_mask'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
forex_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Create DataLoaders
sampler = RandomSampler(train_augmented_dataset)
train_augmented_loader = DataLoader(train_augmented_dataset, batch_size=32, sampler=sampler)
sampler = RandomSampler(train_df_dataset)
train_df_loader = DataLoader(train_df_dataset, batch_size=32, sampler=sampler)
# valid_loader = DataLoader(valid_dataset, batch_size=32)
eval_sampler = SequentialSampler(test_dataset)
test_loader = DataLoader(test_dataset, batch_size=32,sampler=eval_sampler)
eval_sampler = SequentialSampler(forex_dataset)
forex_loader = DataLoader(forex_dataset, batch_size=32,sampler=eval_sampler)
# train_unlabelled_loader = DataLoader(train_unlabelled, batch_size=32, shuffle=True)
# valid_unlabelled_loader = DataLoader(valid_unlabelled, batch_size=32)


Collecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->transformers[torch])
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->transformers[torch])
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/409 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/62.7M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at huawei-noah/TinyBERT_General_4L_312D and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/39650 [00:00<?, ? examples/s]

Map:   0%|          | 0/6016 [00:00<?, ? examples/s]

Map:   0%|          | 0/970 [00:00<?, ? examples/s]

Map:   0%|          | 0/2291 [00:00<?, ? examples/s]

Map:   0%|          | 0/970 [00:00<?, ? examples/s]

Map:   0%|          | 0/2291 [00:00<?, ? examples/s]

In [11]:
# adapted from https://github.com/zhengli97/CTKD
import torch
import torch.nn as nn
import torch.nn.functional as F

class Temp_BERT(nn.Module):
    def __init__(self, num_classes):
        super(Temp_BERT, self).__init__()

        # Assume num_classes = 3 for positive, negative, neutral
        self.fc1 = nn.Linear(2 * num_classes, 256)  # Directly use all outputs
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(256, 1)
        self.grl = GradientReversal()

        nn.init.constant_(self.fc2.weight.data, 0)
        nn.init.constant_(self.fc2.bias.data, 0)

    def forward(self, teacher_logits, student_logits, lambda_):
        tea_probs = F.softmax(teacher_logits, dim=1)
        stu_probs = F.softmax(student_logits, dim=1)

        # Directly concatenate all softmax probabilities
        comb_out = torch.cat([tea_probs, stu_probs], dim=1)

        T = self.fc1(comb_out)
        T = self.relu(T)
        T = self.fc2(T)
        T = self.grl(T, lambda_)
        T = T.view(-1)

        return T

from torch.autograd import Function
class GradientReversalFunction(Function):
    @staticmethod
    def forward(ctx, x, lambda_):
        ctx.lambda_ = lambda_
        return x.clone()

    @staticmethod
    def backward(ctx, grads):
        lambda_ = ctx.lambda_
        lambda_ = grads.new_tensor(lambda_)
        dx = lambda_ * grads
        return dx, None


class GradientReversal(torch.nn.Module):
    def __init__(self):
        super(GradientReversal, self).__init__()
        # self.lambda_ = lambda_

    def forward(self, x, lambda_):
        return GradientReversalFunction.apply(x, lambda_)

In [12]:
# # Gradual unfreezing
# def unfreeze_model(model, epoch, total_epochs):
#     # Initial Freezing of all layers
#     for param in model.parameters():
#         param.requires_grad = False

#     # Number of layers to unfreeze per epoch
#     layers_to_unfreeze = epoch * (len(model.bert.encoder.layer) // total_epochs)

#     # Unfreeze layers from the top as epoch increases
#     for layer in model.bert.encoder.layer[-layers_to_unfreeze:]:
#         for param in layer.parameters():
#             param.requires_grad = True

# Evaluation function
def evaluate(model, eval_loader, device):
    model.eval()
    accuracy_metric = load_metric('accuracy', trust_remote_code=True)
    f1_metric = load_metric('f1', trust_remote_code=True)

    for batch in eval_loader:
        inputs = {key: val.to(device) for key, val in batch.items() if key in tokenizer.model_input_names}
        labels = batch['label'].to(device)

        with torch.no_grad():
            outputs = model(**inputs)
        predictions = outputs.logits.argmax(dim=-1)

        accuracy_metric.add_batch(predictions=predictions, references=labels)
        f1_metric.add_batch(predictions=predictions, references=labels)

    accuracy = accuracy_metric.compute()
    f1 = f1_metric.compute(average='weighted')
    return accuracy, f1

In [13]:
import json

def result_to_file(result, file_name):
    with open(file_name, "a") as writer:
        json.dump(result, writer)
        writer.write("\n")

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

In [15]:
# # Initialize the Temp_BERT module
# temp_model = Temp_BERT(num_classes=3).to(device)

In [16]:
# Moving teacher model to device separately to avoid any potential issues
teacher_model = teacher_model.to(device)

In [17]:
student_model = student_model.to(device)

In [18]:
projection_layer = nn.Linear(768, 312).to(device)

In [19]:
# Evaluate the student model on the test data before training
initial_test_accuracy, initial_test_f1 = evaluate(student_model, test_loader, device)
print(f"Initial Test Accuracy: {initial_test_accuracy['accuracy']}")
print(f"Initial Test F1 Score: {initial_test_f1['f1']}")

# Evaluate the student model on the forex data before training
initial_forex_accuracy, initial_forex_f1 = evaluate(student_model, forex_loader, device)
print(f"Initial Forex Accuracy: {initial_forex_accuracy['accuracy']}")
print(f"Initial Forex F1 Score: {initial_forex_f1['f1']}")

  accuracy_metric = load_metric('accuracy', trust_remote_code=True)


Downloading builder script:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

Initial Test Accuracy: 0.13298969072164948
Initial Test F1 Score: 0.03290890920930817
Initial Forex Accuracy: 0.30947184635530334
Initial Forex F1 Score: 0.14627702604394005


In [25]:
# adapted from https://github.com/huawei-noah/Pretrained-Language-Model/tree/master

import torch
import sys
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.nn import MSELoss
import random
import numpy as np
import os
from tqdm.notebook import tqdm, trange
from transformers import AdamW, get_linear_schedule_with_warmup

def Distillation(teacher_model, student_model, output_dir, max_seq_length=64,
         do_eval=False, train_batch_size=32, learning_rate=5e-5, weight_decay=1e-4,
         num_train_epochs=3.0, warmup_proportion=0.1, seed=42, device=device,
         aug_train=True, eval_step=50, pred_distill=False, temperature=1.0):

    # Seed setup
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.device_count() > 0:
        torch.cuda.manual_seed_all(seed)

    # Ensure output directory exists
    if os.path.exists(output_dir) and os.listdir(output_dir):
        raise ValueError("Output directory ({}) already exists and is not empty.".format(output_dir))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Data preparation
    if not do_eval:
        if not aug_train:
            train_dataloader = train_df_loader
        else:
            train_dataloader = train_augmented_loader

        num_train_optimization_steps = int(
            len(train_dataloader) / train_batch_size) * num_train_epochs

    eval_dataloader = test_loader

    # Training or evaluation
    if do_eval:
        student_model.eval()
        test_accuracy, test_f1 = evaluate(student_model, eval_dataloader, device)
        print(f"Test Accuracy: {test_accuracy['accuracy']}")
        print(f"Test F1 Score: {test_f1['f1']}")

    else:
        if n_gpu > 1:
            student_model = torch.nn.DataParallel(student_model)
            teacher_model = torch.nn.DataParallel(teacher_model)
        # Prepare optimizer
        param_optimizer = list(student_model.named_parameters())
        size = 0
        for n, p in student_model.named_parameters():
            size += p.nelement()

        print('Total parameters: {}'.format(size))
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]

        optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate)

        # Set up learning rate scheduler
        if not pred_distill:
            scheduler = None
        else:
            scheduler = get_linear_schedule_with_warmup(optimizer,
                                                        num_warmup_steps=int(warmup_proportion * num_train_optimization_steps),
                                                        num_training_steps=num_train_optimization_steps)

        # Prepare loss functions
        loss_mse = MSELoss()

        def soft_cross_entropy(predicts, targets):
            student_likelihood = torch.nn.functional.log_softmax(predicts, dim=-1)
            targets_prob = torch.nn.functional.softmax(targets, dim=-1)
            return (- targets_prob * student_likelihood).mean()

        # Train and evaluate
        global_step = 0
        best_dev_acc = 0.0
        output_eval_file = os.path.join(output_dir, "eval_results.txt")

        for epoch_ in trange(int(num_train_epochs), desc="Epoch"):
            tr_loss = 0.
            tr_att_loss = 0.
            tr_rep_loss = 0.
            tr_cls_loss = 0.

            student_model.train()
            nb_tr_examples, nb_tr_steps = 0, 0

            for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration", ascii=True)):
                # Move each tensor in the batch to the specified device
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)

                if input_ids.size()[0] != train_batch_size:
                    continue

                att_loss = 0.
                rep_loss = 0.
                cls_loss = 0.

                # Forward pass for student model
                student_outputs = student_model(input_ids, attention_mask=attention_mask,
                                                output_attentions=True, output_hidden_states=True)
                student_logits = student_outputs.logits
                student_atts = student_outputs.attentions
                student_reps = student_outputs.hidden_states

                with torch.no_grad():
                    # Forward pass for teacher model
                    teacher_outputs = teacher_model(input_ids, attention_mask=attention_mask,
                                                    output_attentions=True, output_hidden_states=True)
                    teacher_logits = teacher_outputs.logits
                    teacher_atts = teacher_outputs.attentions
                    teacher_reps = teacher_outputs.hidden_states

                if not pred_distill:
                    teacher_layer_num = len(teacher_atts)
                    student_layer_num = len(student_atts)
                    assert teacher_layer_num % student_layer_num == 0

                    layers_per_block = int(teacher_layer_num / student_layer_num)
                    new_teacher_atts = [teacher_atts[i * layers_per_block + layers_per_block - 1]
                                        for i in range(student_layer_num)]

                    for student_att, teacher_att in zip(student_atts, new_teacher_atts):
                        student_att = torch.where(student_att <= -1e2, torch.zeros_like(student_att).to(device),
                                                  student_att)
                        teacher_att = torch.where(teacher_att <= -1e2, torch.zeros_like(teacher_att).to(device),
                                                  teacher_att)

                        tmp_loss = loss_mse(student_att, teacher_att)
                        att_loss += tmp_loss

                    new_teacher_reps = [teacher_reps[i * layers_per_block] for i in range(student_layer_num + 1)]

                    # Project teacher representations to the student hidden size
                    projected_teacher_reps = [projection_layer(rep) for rep in new_teacher_reps]

                    new_student_reps = student_reps
                    for student_rep, teacher_rep in zip(new_student_reps, projected_teacher_reps):
                        tmp_loss = loss_mse(student_rep, teacher_rep)
                        rep_loss += tmp_loss

                    loss = rep_loss + att_loss
                    tr_att_loss += att_loss.item()
                    tr_rep_loss += rep_loss.item()
                else:

                    cls_loss = soft_cross_entropy(student_logits / temperature,
                                                  teacher_logits / temperature)

                    loss = cls_loss
                    tr_cls_loss += cls_loss.item()

                if n_gpu > 1:
                    loss = loss.mean()  # mean() to average on multi-gpu.

                loss.backward()

                tr_loss += loss.item()
                #nb_tr_examples += label_ids.size(0)
                nb_tr_steps += 1

                optimizer.step()
                if scheduler is not None:
                    scheduler.step()
                optimizer.zero_grad()
                global_step += 1

                if (global_step + 1) % eval_step == 0:
                    student_model.eval()

                    loss = tr_loss / (step + 1)
                    cls_loss = tr_cls_loss / (step + 1)
                    att_loss = tr_att_loss / (step + 1)
                    rep_loss = tr_rep_loss / (step + 1)

                    result = {}
                    if pred_distill:
                        accuracy, f1 = evaluate(student_model, eval_dataloader, device)
                        result['accuracy'] = accuracy['accuracy']
                        result['f1'] = f1['f1']
                        #result['accuracy'], result['f1'] = evaluate(student_model, eval_dataloader, device)

                    result['global_step'] = global_step
                    result['cls_loss'] = cls_loss
                    result['att_loss'] = att_loss
                    result['rep_loss'] = rep_loss
                    result['loss'] = loss

                    result_to_file(result, output_eval_file)

                    if not pred_distill:
                        save_model = True
                    else:
                        save_model = False

                        # Ensure 'accuracy' is in the result and is a float
                        if 'accuracy' in result and isinstance(result['accuracy'], (float, int)) and result['accuracy'] > best_dev_acc:
                            best_dev_acc = result['accuracy']
                            save_model = True
                        # if result['accuracy'] > best_dev_acc:
                        #     best_dev_acc = result['accuracy']
                        #     save_model = True

                    if save_model:
                        print("***** Save model *****")

                        model_to_save = student_model.module if hasattr(student_model, 'module') else student_model

                        model_save_path = output_dir
                        model_to_save.save_pretrained(model_save_path)
                        tokenizer.save_pretrained(model_save_path)

                    student_model.train()
    return student_model

In [21]:
student_model= Distillation(teacher_model, student_model, output_dir='/content/gdrive/MyDrive/finetuned_tinyfinbert_12062024_v1', max_seq_length=64,
         do_eval=False, train_batch_size=32, learning_rate=5e-5, weight_decay=1e-4,
         num_train_epochs=20, warmup_proportion=0.1, seed=42, device=device,
         aug_train=True, eval_step=1240, pred_distill=False, temperature=1.0)


Total parameters: 14351187




Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]



***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****


In [22]:
# Final evaluation on test data
final_test_accuracy, final_test_f1 = evaluate(student_model, test_loader, device)
print(f"Final Test Accuracy: {final_test_accuracy['accuracy']}")
print(f"Final Test F1 Score: {final_test_f1['f1']}")

# Evaluate the student model on the forex data after training
final_forex_accuracy, final_forex_f1 = evaluate(student_model, forex_loader, device)
print(f"Final Forex Accuracy: {final_forex_accuracy['accuracy']}")
print(f"Final Forex F1 Score: {final_forex_f1['f1']}")

Final Test Accuracy: 0.477319587628866
Final Test F1 Score: 0.46678141861846695
Final Forex Accuracy: 0.2780445220427761
Final Forex F1 Score: 0.22847378214508382


In [29]:
student_model=Distillation(teacher_model, student_model, output_dir='/content/gdrive/MyDrive/finetuned_tinyfinbert_12062024_v2', max_seq_length=64,
         do_eval=False, train_batch_size=32, learning_rate=5e-5, weight_decay=1e-4,
         num_train_epochs=3, warmup_proportion=0.1, seed=42, device=device,
         aug_train=True, eval_step=50, pred_distill=True, temperature=1.0)


Total parameters: 14351187




Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

***** Save model *****
***** Save model *****


Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

Iteration:   0%|          | 0/1240 [00:00<?, ?it/s]

In [None]:
# # Save the fine-tuned TinyBERT model
# student_model.save_pretrained("/content/gdrive/MyDrive/finetuned_tinyfinbert_26052024_v1")

In [None]:
# tokenizer.save_pretrained("/content/gdrive/MyDrive/finetuned_tinyfinbert_26052024_v1")

In [30]:
accuracy_metric = load_metric('accuracy', trust_remote_code=True)
f1_metric = load_metric('f1', trust_remote_code=True)
# Final evaluation on test data
final_test_accuracy, final_test_f1 = evaluate(student_model, test_loader, device)
print(f"Final Test Accuracy: {final_test_accuracy['accuracy']}")
print(f"Final Test F1 Score: {final_test_f1['f1']}")

# Evaluate the student model on the forex data after training
final_forex_accuracy, final_forex_f1 = evaluate(student_model, forex_loader, device)
print(f"Final Forex Accuracy: {final_forex_accuracy['accuracy']}")
print(f"Final Forex F1 Score: {final_forex_f1['f1']}")

Final Test Accuracy: 0.8237113402061855
Final Test F1 Score: 0.8256654323080563
Final Forex Accuracy: 0.46835443037974683
Final Forex F1 Score: 0.43494671035820853


In [None]:
# # Save the further fine-tuned model
# model.save_pretrained("/content/gdrive/MyDrive/finetuned_tinyfinbert_updated")
# tokenizer.save_pretrained("/content/gdrive/MyDrive/finetuned_tinyfinbert_updated")

# print("Further fine-tuning completed and model saved.")