# **Do not forget to change the runtime to A100**

The Llama model is very large and has 3.8 Billion parameters. Also, as the size of the dataset increases we need RAM large enough to fit the models. To run the following training we need A100 processing units.

**The following note book is re-used from** https://github.com/AI4Finance-Foundation/FinGPT

Install all required packages.
peft package is needed to run Lora (Low-rank adaptation (LoRA) is a way to adapt a large machine learning model for specific uses without retraining the entire model.)


In [None]:

!pip install transformers --upgrade
!pip install accelerate
!pip install -U bitsandbytes
!pip install loguru
!pip install --upgrade peft
# !pip install transformers==4.40.1

Check the version of transformers. Make sure it is >=4.40

In [None]:
!pip install protobuf  cpm_kernels torch>=2.0 gradio mdtex2html sentencepiece accelerate

In [None]:
import transformers
print(transformers.__version__)

In [None]:
import torch
# Check CUDA availability and set device
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    print("Warning: CUDA is not available. Using CPU instead.")

Restart the session for transformers and bitsandbytes to take effect


create a huggingFace api_key and save it in the secrets as HF_TOKEN

---



In [None]:
import os
from google.colab import userdata
import shutil

# Retrieve the token from Colab Secrets
hf_token = userdata.get("HF_TOKEN")
os.environ["HF_TOKEN"] = hf_token

In [None]:
from huggingface_hub import login
login(token=hf_token)

The following creates a data folder.

Load and Prepare Dataset:

Import necessary libraries from the datasets package
Load the Twitter Financial News Sentiment (TFNS) dataset and convert it to a Pandas dataframe.

Map numerical labels to their corresponding sentiments. Here we have 3 categories: (negative, positive, neutral).

Note: Though LLMs are capable of classifying into multiple categories, for higher accuracy, it is ideal to restrict number of classes.

Add instruction for each data entry, which is crucial for Instruction Tuning.
Convert the Pandas dataframe back to a Hugging Face Dataset object.

The following code creates a data folder in the present working path

Processed


In [None]:
# import os
import shutil

if not os.path.exists('./data'):
    os.makedirs('./data')


jsonl_path = "../data/dataset_new.jsonl"
save_path = '../data/dataset_new'


if os.path.exists(jsonl_path):
    os.remove(jsonl_path)

if os.path.exists(save_path):
    shutil.rmtree(save_path)

directory = "../data"
if not os.path.exists(directory):
    os.makedirs(directory)

In [None]:
!ls -l ./data/dataset_new

### 1.2 Load and Prepare Dataset

In [None]:
!pip install datasets

In [None]:
from datasets import load_dataset
import datasets

In [None]:
dic = {
    0:'negative',
    1:'positive',
    2:'neutral'
}

tfns = load_dataset('zeroshot/twitter-financial-news-sentiment') #tfns = Twitter Financial News Sentiment

tfns = tfns['train']
tfns = tfns.to_pandas()

tfns['label'] = tfns['label'].apply(lambda x : dic[x])  # Map numerical labels to their corresponding sentiments

#Add prompt instruction for each data entry, which is crucial for Instruction Tuning.
tfns['instruction'] = 'What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive}.'
tfns.columns = ['input','output','instruction']

#Convert the Pandas dataframe back to a Hugging Face Dataset object.
tfns = datasets.Dataset.from_pandas(tfns)
tfns

In [None]:
tmp_dataset = datasets.concatenate_datasets([tfns]*2) #Create a list that contains 2 tfns
train_dataset = tmp_dataset
print(tmp_dataset.num_rows)

In [None]:
all_dataset = train_dataset.shuffle(seed = 42)
all_dataset.shape

## Part 2: Dataset Formatting and Tokenization

### 2.1 Dataset Formatting

You must structure your data in a specific format that aligns with the training process.

In [None]:
import json
from tqdm.notebook import tqdm

In [None]:
def format_examle(example:dict) -> dict:    #Defines a function named format_example that takes a dictionary as input (example: dict) and returns a dictionary (-> dict).
  context = f"Instruction:{example['instruction']}\n"   #Initializes a string variable context using an f-string to format the instruction.
  if example.get('input'):     #Checks if the example dictionary has an input key and whether it contains a value.
    context += f"Input:{example['input']}\n"
  context += 'Answer: '
  target = example['output']
  return {"context": context , "target":target}  # This is the format of json data.



data_list = []
for item in all_dataset.to_pandas().itertuples():    #Iterates over each row of the dataset all_dataset, which has been converted into a Pandas DataFrame using .to_pandas().
  tmp = {}
  tmp['instruction'] = item.instruction
  tmp['input'] = item.input
  tmp['output'] = item.output
  data_list.append(tmp)

In [None]:
print(data_list[0])

In [None]:
# save to a json file
from tqdm.notebook import tqdm
import json

with open("../data/dataset_new.jsonl",'w') as f:
  for example in tqdm(data_list,desc = 'formatting..'):
    f.write(json.dumps(format_examle(example)) + '\n')

In [None]:
json_data_list = []  # Var to save json data

# Save to a jsonl file and store in json_data_list
with open("../data/dataset_new.jsonl", 'r') as f:
    for line in f:
        json_line = json.loads(line.strip())
        json_data_list.append(json_line)

In [None]:
print(json_data_list[0]['target'], json_data_list[0]['context'])

### 2.2 Tokenization

Tokenization is the process of converting input text into tokens that can be fed into the model.

In [None]:
from transformers import AutoTokenizer, AutoConfig

In [None]:
model_name = 'meta-llama/Meta-Llama-3-8B'   #Specifies the model you're working with
jsonl_path = '../data/dataset_new.jsonl'
save_path = '../data/dataset_new'    #The path where the processed dataset will be saved after tokenization or any other processing
max_seq_length = 512    #Maximum sequence length for the inputs. If an input exceeds this length, it will either be truncated or skipped.
skip_overlength = True    #A flag that determines whether to skip overlength examples that exceed max_seq_length

This preprocess function tokenizes the promt and target, combines them into Input ids, trims or pads the squence to the maximum squence length.

In [None]:
def preprocess(tokenizer, config, example, max_seq_length):
  prompt = example['context']
  target = example['target']
  prompt_ids = tokenizer.encode(   #ids refers to the numerical identifiers that correspond to tokens.These token ids are what the model processes, as models require numerical input rather than raw text.
      prompt,
      max_length = max_seq_length,
      truncation = True
      )
  target_ids = tokenizer.encode(
      target,
      max_length = max_seq_length,
      truncation = True,
      add_special_tokens = False
      )
  input_ids = prompt_ids + target_ids + [config.eos_token_id]  #[config.eos_token_id] is a sign that marks the end of the list.
  return {'input_ids':input_ids,'seq_len':len(prompt_ids)}

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, device_map='auto')

In [None]:
# check
example = json_data_list[0]
prompt = example['context']
target = example['target']

example['target']

input_ids is a complete list of token IDs that combines the input sentence (prompt), the target sentence (target), and the end-of-sequence token (eos_token_id).
This list is fed into the model for training or inference. The model uses these IDs to understand and process the input and generate the corresponding output.

The read_jsonl function reads each line from the JSONL file, preprocesses it using the preprocess function,
and then yields each preprocessed example.

In [None]:
def read_jsonl(path, max_seq_length, skip_overlength=False):
    tokenizer = AutoTokenizer.from_pretrained(    #Initializes a tokenizer using a pre-trained model specified by model_name.
        model_name, trust_remote_code=True)
    config = AutoConfig.from_pretrained(    #Loads the configuration for the model. device_map='auto' helps automatically map the model to available devices (e.g., GPU or CPU).
        model_name, trust_remote_code=True, device_map='auto')
    with open(path, "r") as f:
        for line in tqdm(f.readlines()):
            example = json.loads(line)
            #Preprocesses each example by tokenizing it and converting it into input_ids using the preprocess() function,
            #which takes the tokenizer, config, example, and max_seq_length as inputs.
            feature = preprocess(tokenizer, config, example, max_seq_length)
            if skip_overlength and len(feature["input_ids"]) > max_seq_length:
                continue
            feature["input_ids"] = feature["input_ids"][:max_seq_length]  #Truncates the input_ids to ensure they do not exceed max_seq_length.
            yield feature
#Uses yield to return one preprocessed feature at a time, making the function a generator.
#This allows you to iterate over the processed features one by one without loading everything into memory at once.

### 2.3 Save the Dataset

In [None]:
save_path = './data/dataset_new'

In [None]:
dataset = datasets.Dataset.from_generator(
    lambda: read_jsonl(jsonl_path, max_seq_length, skip_overlength)
    )
dataset.save_to_disk(save_path)

In [None]:
from datasets import load_from_disk

# Load Dataset
loaded_dataset = load_from_disk('./data/dataset_new')

# Check the structure of Dataset
print(loaded_dataset)

# Print the first sample of the dataset
print(loaded_dataset['input_ids'][0])

### 2.4 Save dataset to your own google drive

Every time you restart colab, you don't have to reformat the data, you can just load the formatted data directly from this google drive.

In [None]:
from google.colab import drive
drive.mount('/content/drive') #You'll be asked to authorize access to your Google Drive

In [None]:
save_path = '/content/drive/MyDrive/Colab Notebooks/AI4Finance/FinGPT/FinGPT: Training with LoRA and Llama-3/dataset_new' #Change to your own address
# Write your own Google drive saving address in xxxxxxxx part: '/content/drive/MyDrive/xxxxxxxxxxxxxxxxx/dataset_new'
dataset.save_to_disk(save_path)

## Part 3: Setup FinGPT training with LoRA and Llama-3

### 3.1 Training Arguments Setup:
Initialize and set training arguments.

In [None]:
from typing import List, Dict, Optional
import torch
from loguru import logger
from transformers import (
    AutoModel,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
    AutoModelForCausalLM
)
from peft import (
    TaskType,
    LoraConfig,
    get_peft_model,
    set_peft_model_state_dict,
    prepare_model_for_kbit_training,
)
from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING
from transformers import LlamaForCausalLM

In [None]:
training_args = TrainingArguments(
    output_dir='/content/drive/MyDrive/Colab Notebooks/AI4Finance/FinGPT/FinGPT: Training with LoRA and Llama-3/finetuned_model/',    # Path to save the fine-tuned model
    logging_steps = 500,               # Log every 500 steps
    # max_steps=10000,                 # Maximum number of training steps (commented out, can be enabled)
    num_train_epochs = 2,              # Number of training epochs (train for 2 epochs)
    per_device_train_batch_size=4,     # Batch size of 4 for training on each device (GPU/CPU)
    gradient_accumulation_steps=8,     # Accumulate gradients for 8 steps before updating weights
    learning_rate=1e-4,                # Learning rate set to 1e-4
    weight_decay=0.01,                 # Weight decay (L2 regularization) set to 0.01
    warmup_steps=1000,                 # Warm up the learning rate for the first 1000 steps
    # save_steps=500,
    fp16=True,                         # Enable FP16 mixed precision training to save memory and speed up training
    # bf16=True,                       # Enable BF16 mixed precision training (commented out)
    torch_compile = False,             # Whether to enable Torch compile (`False` means not enabled)
    load_best_model_at_end = True,     # Load the best-performing model at the end of training
    # evaluation_strategy="steps",       # Evaluation strategy is set to evaluate every few steps
    eval_strategy="steps",
    save_steps=500,  # # Save the model every 500 steps
    metric_for_best_model="loss",
    remove_unused_columns=False,       # Whether to remove unused columns during training (keep all columns)
    logging_dir="./logs",
)


### 3.2 Quantization Config Setup:
Set quantization configuration to reduce model size without losing significant precision.

In [None]:
!pip list | grep bitsandbytes

In [None]:
# quantitative allocation
q_config = BitsAndBytesConfig(load_in_4bit=False,
                                bnb_4bit_quant_type='nf4',
                                bnb_4bit_use_double_quant=True,
                                bnb_4bit_compute_dtype=torch.float16
                                )

In [None]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

### 3.3 Model Loading & Preparation:
Load the base model and tokenizer, and prepare the model for INT8 training.

Runtime -> Change runtime type -> A100 GPU

Restart runtime and run again if not working

In [None]:
model_name = "meta-llama/Meta-Llama-3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

In [None]:
from transformers.utils import is_bitsandbytes_available
is_bitsandbytes_available()

In [None]:
model = LlamaForCausalLM.from_pretrained(
        model_name,
        quantization_config = q_config,
        trust_remote_code=True,
        device_map='auto'
    )

In [None]:


# # Check CUDA availability and set device
# if torch.cuda.is_available():
#     device = torch.device("cuda")
# else:
#     device = torch.device("cpu")
#     print("Warning: CUDA is not available. Using CPU instead.")



### 3.4 LoRA Config & Setup

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

# LoRA for Llama3
target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING['llama']  # Modules for the Llama model
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=target_modules,
    bias='none',
)

# Loading LoRA for Llama3 models using PEFT (Parameter-Efficient Fine-Tuning)
model = get_peft_model(model, lora_config)

# Print the number of trainable parameters
print_trainable_parameters(model)

In [None]:
resume_from_checkpoint = None
if resume_from_checkpoint is not None:
    checkpoint_name = os.path.join(resume_from_checkpoint, 'pytorch_model.bin')
    if not os.path.exists(checkpoint_name):
        checkpoint_name = os.path.join(
            resume_from_checkpoint, 'adapter_model.bin'
        )
        resume_from_checkpoint = False
    if os.path.exists(checkpoint_name):
        logger.info(f'Restarting from {checkpoint_name}')
        adapters_weights = torch.load(checkpoint_name)
        set_peft_model_state_dict(model, adapters_weights)
    else:
        logger.info(f'Checkpoint {checkpoint_name} not found')

In [None]:
model.print_trainable_parameters()

## Part 4: Loading Data and Training FinGPT


In this segment, we'll delve into the loading of your pre-processed data, and finally, launch the training of your FinGPT model. Here's a stepwise breakdown of the script provided:



*   Need to purchase Google Colab GPU plans, Colab Pro is
sufficient or just buy 100 compute units for $10

### 4.1 Loading Your Data:

In [None]:
# load data
from datasets import load_from_disk
import datasets
from google.colab import drive

drive.mount('/content/drive') # You will be asked to authorize access to your Google Drive

save_path = '/content/drive/MyDrive/Collaboration_Jig_San/Chapter_2/AI4Finance/FinGPT/FinGPT: Training with LoRA and Llama-3/dataset_new'
# Load saved dataset
dataset = load_from_disk(save_path)
dataset = dataset.train_test_split(0.2, shuffle=True, seed = 42)

### 4.2 Training Configuration and Launch:



*   Customize the Trainer class for specific loss computation, prediction step, and model-saving methods.
*   Define a data collator function to process batches of data during training.
*   Set up TensorBoard for logging, instantiate your modified trainer, and begin training.

In [None]:
import torch.nn.functional as F

In [None]:
def data_collator(features: list) -> dict:
    # Check if pad_token_id is None, if it is then use eos_token_id as the padding value
    if tokenizer.pad_token_id is None:
        pad_token_id = tokenizer.eos_token_id  # Use eos_token_id as a fill symbol
    else:
        pad_token_id = tokenizer.pad_token_id

    len_ids = [len(feature["input_ids"]) for feature in features]
    longest = max(len_ids)

    input_ids = []
    labels_list = []

    for ids_l, feature in sorted(zip(len_ids, features), key=lambda x: -x[0]):
        ids = feature["input_ids"]
        seq_len = feature["seq_len"]

        # Padding with calculated pad_token_id
        labels = (
            [pad_token_id] * (seq_len - 1) + ids[(seq_len - 1) :] + [pad_token_id] * (longest - ids_l)
        )
        ids = ids + [pad_token_id] * (longest - ids_l)

        _ids = torch.LongTensor(ids)
        labels_list.append(torch.LongTensor(labels))
        input_ids.append(_ids)

    input_ids = torch.stack(input_ids)
    labels = torch.stack(labels_list)

    return {
        "input_ids": input_ids,
        "labels": labels,
    }

In [None]:
from torch.utils.tensorboard import SummaryWriter
from transformers.integrations import TensorBoardCallback

In [None]:
# Train
# Took about 10 compute units
writer = SummaryWriter()
# trainer = ModifiedTrainer(
trainer = Trainer(
    model=model,
    args=training_args,             # Trainer args
    train_dataset=dataset["train"], # Training set
    eval_dataset=dataset["test"],   # Testing set
    data_collator=data_collator,    # Data Collator
    callbacks=[TensorBoardCallback(writer)],
)
trainer.train()
writer.close()

# Save model to Google Drive
model_output_dir = '/content/drive/MyDrive/Collaboration_Jig_San/Chapter_2/AI4Finance/FinGPT/FinGPT: Training with LoRA and Llama-3/Model/'
model.save_pretrained(model_output_dir)


# model.save_pretrained(output_dir)

In [None]:
model_output_dir = '/content/drive/MyDrive/Collaboration_Jig_San/Chapter_2/AI4Finance/FinGPT/FinGPT: Training with LoRA and Llama-3/Model/'
model.save_pretrained(model_output_dir)

Now your model is trained and saved! You can download it and use it for generating financial insights or any other relevant tasks in the finance domain. The usage of TensorBoard allows you to deeply understand and visualize the training dynamics and performance of your model in real-time.

## Part 5: Inference and Benchmarks using FinGPT

Now that your model is trained, let’s understand how to use it to infer and run benchmarks.


*   Took about 10 compute units

### 5.1 Load the model

In [None]:
#clone the FinNLP repository
!git clone https://github.com/AI4Finance-Foundation/FinNLP.git


import sys
sys.path.append('/content/FinNLP/')

In [None]:
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM
from peft import PeftModel
import torch

# Load benchmark datasets from FinNLP
from finnlp.benchmarks.fpb import test_fpb
from finnlp.benchmarks.fiqa import test_fiqa , add_instructions
from finnlp.benchmarks.tfns import test_tfns
from finnlp.benchmarks.nwgi import test_nwgi

In [None]:
# load model from google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

# Fine-tuned PEFT model paths
path_to_check = '/content/drive/MyDrive/Collaboration_Jig_San/Chapter_2/AI4Finance/FinGPT/FinGPT: Training with LoRA and Llama-3/Model/'

# Check if the specified path exists
if os.path.exists(path_to_check):
    print("Path exists.")
else:
    print("Path does not exist.")

In [None]:
from transformers import AutoModelForSequenceClassification

In [None]:
from huggingface_hub import login

# login into hf
login(token=hf_token)

In [None]:

def eval_with_PEFT_model(base_model,peft_model):


  # Loda tokenizer
  tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
  tokenizer.pad_token = tokenizer.eos_token  # Use eos_token as pad_token
  tokenizer.padding_side = 'left'  # Important: Set as left padding


  model = LlamaForCausalLM.from_pretrained(base_model,
                                          trust_remote_code=True,
                                          load_in_8bit=True,
                                          device_map="cuda:0")  #Set the model to GPU

  # load peft's fine-tuned model weights
  model = PeftModel.from_pretrained(model, peft_model)

  return model.eval(), tokenizer

In [None]:
print(path_to_check)

In [None]:
base_model = "meta-llama/Meta-Llama-3-8B" # Loading the Llama base model and supporting text-generated models
peft_model = path_to_check  # Fine-tuned PEFT model paths

model,tokenizer = eval_with_PEFT_model(base_model,peft_model)

### 5.2 Run Benchmarks:

In [None]:
batch_size = 8
import logging
logging.getLogger("transformers").setLevel(logging.ERROR)

In [None]:
# TFNS Test Set, len 2388
# Available: 99.4 compute units
res_tfns = test_tfns(model, tokenizer, batch_size = batch_size)
# Available: 98.9 compute units
# Took about 0.5 compute unite to inference

In [None]:
# FPB, len 1212
res_fpb = test_fpb(model, tokenizer, batch_size = batch_size)
# since we are running on our dataset, select 'y'

In [None]:
# FiQA, len 275
res_fiqa = test_fiqa(model, tokenizer, prompt_fun = add_instructions, batch_size = batch_size)

In [None]:
# NWGI, len 4047
res_nwgi = test_nwgi(model, tokenizer, batch_size = batch_size)

In [None]:
res_nwgi

In [None]:
from sklearn.metrics import accuracy_score,f1_score
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def get_score(df):

  accuracy = accuracy_score(df['target'], df['new_out'])

  f1_macro = f1_score(df['target'], df['new_out'], average='macro')

  f1_weighted = f1_score(df['target'], df['new_out'], average='weighted')

  return round(accuracy,3), round(f1_macro,3), round(f1_weighted,3)


def form_socre_dic(dataset_name):
  score_list = get_score(dataset_name)

  score_dic = {
        'Accuracy': score_list[0],
        'F1_macro': score_list[1],
        'F1_weighted': score_list[2]
  }

  return score_dic

In [None]:
  score_dic = {
        'TFNS': form_socre_dic(res_tfns),
        'FPB': form_socre_dic(res_fpb),
        'FIQA': form_socre_dic(res_fiqa),
        'NWGI': form_socre_dic(res_nwgi)
  }

In [None]:
pd.DataFrame(score_dic)

In [None]:
score_dic

### 5.3 Result comparision

In [None]:
#Results of other fine-tuned model come from previous tranning results.
results = {
    "TFNS": {
        "FinGPT-ChatGlm2-6b": {"Acc": 0.856, "F1_macro": 0.806, "F1_weighted": 0.850},
        "FinGPT-V3.1": {"Acc": 0.876, "F1_macro": 0.841, "F1_weighted":  0.875},
    },
    "FPB": {
        "FinGPT-ChatGlm2-6b": {"Acc": 0.741, "F1_macro": 0.655, "F1_weighted": 0.694},
        "FinGPT-V3.1": {"Acc": 0.856, "F1_macro": 0.841, "F1_weighted": 0.855},
    },
    "FIQA": {
        "FinGPT-ChatGlm2-6b": {"Acc": 0.48, "F1_macro": 0.5,  "F1_weighted": 0.49},
        "FinGPT-V3.1": {"Acc": 0.836, "F1_macro":0.746, "F1_weighted": 0.850},
    },
    "NWGI": {
        "FinGPT-ChatGlm2-6b": {"Acc": 0.521, "F1_macro": 0.500, "F1_weighted":0.490},
        "FinGPT-V3.1": {"Acc": 0.642, "F1_macro": 0.650,"F1_weighted": 0.642},
    },
}

In [None]:
# Update the results dictionary to insert the value of FinGPT-Llama-8b.
for dataset_name, scores in score_dic.items():
    if dataset_name in results:

        if "FinGPT-Llama-8b" not in results[dataset_name]:
            results[dataset_name]["FinGPT-Llama-8b"] = {}

        results[dataset_name]["FinGPT-Llama-8b"].update({
            "Acc": scores['Accuracy'],
            "F1_macro": scores['F1_macro'],
            "F1_weighted": scores['F1_weighted']
        })

In [None]:
data = []
for dataset, models in results.items():
    for model, metrics in models.items():
        data.append([dataset, model, metrics.get("Acc", None), metrics.get("F1_macro", None),
                     metrics.get("F1_micro", None), metrics.get("F1_weighted", None)])

df = pd.DataFrame(data, columns=["Dataset", "Model", "Acc", "F1_macro", "F1_micro", "F1_weighted"])

# visualization
def plot_metric(metric_name):
    plt.figure(figsize=(10, 6))
    for model in df["Model"].unique():
        subset = df[df["Model"] == model]
        plt.plot(subset["Dataset"], subset[metric_name], marker='o', label=model)
    plt.title(f"{metric_name} Comparison Across Datasets")
    plt.xlabel("Dataset")
    plt.ylabel(metric_name)
    plt.legend()
    plt.grid(True)
    plt.show()

# Visualization of Accuracy, F1_macro and F1_weighted comparison
plot_metric("Acc")
plot_metric("F1_macro")
plot_metric("F1_weighted")

In [None]:
# Transpose the data table so that the rows are datasets and the columns are models for Acc, F1_macro, and F1_weighted, respectively.


acc_df = df.pivot(index='Dataset', columns='Model', values='Acc')


f1_macro_df = df.pivot(index='Dataset', columns='Model', values='F1_macro')


f1_weighted_df = df.pivot(index='Dataset', columns='Model', values='F1_weighted')

In [None]:
print("Accuracy DataFrame:")
acc_df

In [None]:
print("\nF1 Macro DataFrame:")
f1_macro_df

In [None]:
print("\nF1 Weighted DataFrame:")
f1_weighted_df

# upto here we have the code from FinGPT training LoRA