## Install requirements

In [5]:
!pip install -q bitsandbytes datasets accelerate
!pip install -q git+https://github.com/huggingface/transformers.git@main git+https://github.com/huggingface/peft.git@main


In [2]:
##Import model and tokenizer

In [1]:
!nvidia-smi

Mon Aug 26 11:34:45 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.81                 Driver Version: 560.81         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060      WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   55C    P8             13W /  170W |    1913MiB /  12288MiB |      1%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import torch

num_of_gpus = torch.cuda.device_count()
print(num_of_gpus)

1


## Setting the flan 

In [3]:
# Select CUDA device index
import os
import torch

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig

model_name = "google/flan-t5-small"

model = AutoModelForSeq2SeqLM.from_pretrained(model_name, quantization_config=BitsAndBytesConfig(load_in_8bit=True))
tokenizer = AutoTokenizer.from_pretrained(model_name)



## Setting the Zepyer 7b

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
import os
import torch

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-beta")

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/638 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/1.89G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

In [4]:
#!pip install accelerate

In [5]:
## Prepare model for training

In [6]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

In [7]:

from peft import LoraConfig, get_peft_model, TaskType


def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )


lora_config = LoraConfig(
    r=16, lora_alpha=32, target_modules=["q", "v"], lora_dropout=0.05, bias="none", task_type="SEQ_2_SEQ_LM"
)


model = get_peft_model(model, lora_config)
print_trainable_parameters(model)

trainable params: 688128 || all params: 77649280 || trainable%: 0.8862001038515747


In [8]:
## Load and process data

In [9]:
# loading dataset
dataset = load_dataset("financial_phrasebank", "sentences_allagree",trust_remote_code=True)
dataset = dataset["train"].train_test_split(test_size=0.1)
dataset["validation"] = dataset["test"]
del dataset["test"]

classes = dataset["train"].features["label"].names
dataset = dataset.map(
    lambda x: {"text_label": [classes[label] for label in x["label"]]},
    batched=True,
    num_proc=1,
)

Map:   0%|          | 0/2037 [00:00<?, ? examples/s]

Map:   0%|          | 0/227 [00:00<?, ? examples/s]

In [41]:
dataset['train'][0]

{'sentence': 'The share subscription period for C options will commence on 1 September 2008 and expire on 31 March 2011 .',
 'label': 1,
 'text_label': 'neutral'}

In [10]:
# data preprocessing
text_column = "sentence"
label_column = "text_label"
max_length = 128


def preprocess_function(examples):
    inputs = examples[text_column]
    targets = examples[label_column]
    model_inputs = tokenizer(inputs, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt")
    labels = tokenizer(targets, max_length=3, padding="max_length", truncation=True, return_tensors="pt")
    labels = labels["input_ids"]
    labels[labels == tokenizer.pad_token_id] = -100
    model_inputs["labels"] = labels
    return model_inputs


processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["validation"]

Running tokenizer on dataset:   0%|          | 0/2037 [00:00<?, ? examples/s]

Running tokenizer on dataset:   0%|          | 0/227 [00:00<?, ? examples/s]

In [37]:
train_dataset['labels']

[[7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [1465, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [1465, 1, -100],
 [7163, 1, -100],
 [1465, 1, -100],
 [2841, 1, -100],
 [1465, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [2841, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [1465, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [1465, 1, -100],
 [1465, 1, -100],
 [7163, 1, -100],
 [1465, 1, -100],
 [1465, 1, -100],
 [7163, 1, -100],
 [2841, 1, -100],
 [7163, 1, -100],
 [2841, 1, -100],
 [2841, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [2841, 1, -100],
 [7163, 1, -100],
 [2841, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [7163, 1, -100],
 [2841, 1, -100],
 [7163, 1, -100],
 [2841, 1, -100],
 [7163, 1,

In [13]:
#training the model

In [19]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    "temp",
    evaluation_strategy="epoch",
    learning_rate=1e-3,
    gradient_accumulation_steps=1,
    auto_find_batch_size=True,
    num_train_epochs=1,
    save_steps=10000,
    save_total_limit=8,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

In [20]:
trainer.train()


Epoch,Training Loss,Validation Loss
1,No log,0.045147


TrainOutput(global_step=255, training_loss=0.0503692065968233, metrics={'train_runtime': 72.4143, 'train_samples_per_second': 28.13, 'train_steps_per_second': 3.521, 'total_flos': 95741257973760.0, 'train_loss': 0.0503692065968233, 'epoch': 1.0})

In [21]:
##Qualitatively test our model


In [25]:
model.eval()
input_text = "In January-September 2009 , the Group 's net interest income increased to EUR 112.4 mn from EUR 74.3 mn in January-September 2008 ."
inputs = tokenizer(input_text, return_tensors="pt")

outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)

print("input sentence: ", input_text)
print(" output prediction: ", tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))
print(outputs)

input sentence:  In January-September 2009 , the Group 's net interest income increased to EUR 112.4 mn from EUR 74.3 mn in January-September 2008 .
 output prediction:  ['positive']
tensor([[   0, 1465,    1]])


In [22]:
## Deploying model on hub

In [31]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [32]:
model.push_to_hub("flan-t5-large-financial-phrasebank-lora", use_auth_token=True)




adapter_model.safetensors:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/LALAJI/flan-t5-large-financial-phrasebank-lora/commit/3ed6f1610f69548dfae0dbb9a166beddc6a7e499', commit_message='Upload model', commit_description='', oid='3ed6f1610f69548dfae0dbb9a166beddc6a7e499', pr_url=None, pr_revision=None, pr_num=None)