# FINETUNING

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging, TextStreamer
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
import os, torch, wandb, platform, gradio, warnings
import torch
from datasets import load_dataset
from trl import SFTTrainer
from huggingface_hub import notebook_login
from dotenv import dotenv_values

HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
base_model = "mistralai/Mistral-7B-v0.1"
new_model = "ferrazzipietro/mistral-7B-E3C-FT"

  from .autonotebook import tqdm as notebook_tqdm
  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [2]:
from torch import bfloat16
# Load base model(Mistral 7B)
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= bfloat16,
    bnb_4bit_use_double_quant= True,
)
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto"
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()


config.json: 100%|██████████| 571/571 [00:00<00:00, 244kB/s]


RuntimeError: No GPU found. A GPU is needed for quantization.

In [3]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True, padding_side='left')
# tokenizer.padding_side = 'left'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

(True, True)

In [4]:
dataset = load_dataset("ferrazzipietro/e3c_finetuning_processed")

In [6]:
dataset['en.layer1'][0]

{'input': '<s>[INST] Extract the entities contained in this text: <<<A 46-year-old man with hypertension and dyslipidemia diagnosed 4-months before, as well as new-onset diabetes mellitus unveiled 1-month earlier, was referred to emergency department for hypokalemia. Hormonal study and dynamic biochemical tests performed indicated ECS. Imaging and cytological findings pointed toward a likely primary right parotid malignancy with liver metastases. Somatostatin receptor scintigraphy has shown an increased uptake in the parotid gland and mild expression in liver metastasis. The patient underwent right parotidectomy, and histopathologic examination confirmed ACC. Meanwhile, hypercortisolism was managed with metyrapone, ketoconazole, and lanreotide. Despite chemotherapy onset, a rapid disease progression and clinical course deterioration was observed.\r\n>>> [/INST]',
 'output': 'offset: [23, 35] text: hypertension ||| offset: [40, 52] text: dyslipidemia ||| offset: [53, 62] text: diagnosed

In [5]:
#Adding the adapters in the layers
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
    )
model = get_peft_model(model, peft_config)

In [11]:
# Monitering the LLM
wandb.login(key = "6fc357afc502ac6974d3198a2031bbbc155f73f0")
run = wandb.init(project='Fine tuning mistral 7B', job_type="training", anonymous="allow")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mferrazzipietro[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['question'])):
        text = f"### Question: Can you talk me about {example['title'][i]}\n ### Answer: {example['text'][i]}"
        output_texts.append(text)
    return output_texts

In [7]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir= "./results",
    num_train_epochs= 2,
    per_device_train_batch_size= 8,
    gradient_accumulation_steps= 2,
    optim = "paged_adamw_8bit",
    save_steps= 1000,
    logging_steps= 30,
    learning_rate= 2e-4,
    weight_decay= 0.001,
    fp16= False,
    bf16= False,
    max_grad_norm= 0.3,
    max_steps= -1,
    warmup_ratio= 0.3,
    group_by_length= True,
    lr_scheduler_type= "constant",
    report_to="wandb"
)
training_arguments_A100 = TrainingArguments(
    output_dir= "./results",
    num_train_epochs= 2,
    per_device_train_batch_size= 32,
    gradient_accumulation_steps= 2,
    optim = "paged_adamw_8bit",
    save_steps= 1000,
    logging_steps= 30,
    learning_rate= 2e-4,
    weight_decay= 0.001,
    fp16= False,
    bf16= False,
    max_grad_norm= 0.3,
    max_steps= -1,
    warmup_ratio= 0.3,
    group_by_length= True,
    lr_scheduler_type= "constant",
    report_to="wandb"
)
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset, #.select(range(100000)),
    formatting_func=formatting_prompts_func,
    peft_config=peft_config,
    max_seq_length= None,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)
trainer.train()
trainer.push_to_hub(new_model, token = HF_TOKEN)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mferrazzipietro[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss


In [None]:
?????
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True
model.eval()

# Clear the memory footprint

del model, trainer
torch.cuda.empty_cache()

# Reload the base model
base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model, low_cpu_mem_usage=True,
    return_dict=True,torch_dtype=torch.bfloat16,
    device_map= {"": 0})
model = PeftModel.from_pretrained(base_model_reload, new_model)
model = model.merge_and_unload()

# Reload tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model.push_to_hub(new_model, use_temp_dir=False, token = HF_TOKEN)
tokenizer.push_to_hub(new_model, use_temp_dir=False, token = HF_TOKEN)

In [80]:
dataset[0]

{'pmid': '30599115',
 'title': 'Decreased heart rate recovery may predict a high SYNTAX score in patients with stable coronary artery disease.',
 'text': 'An impaired heart rate recovery (HRR) has been associated with increased risk of cardiovascular events, cardiovascular, and all-cause mortality. However, the diagnostic ability of HRR for the presence and severity of coronary artery disease (CAD) has not been clearly elucidated. Our aim was to investigate the relationship between HRR and the SYNTAX (SYNergy between percutaneous coronary intervention with TAXus and cardiac surgery) score in patients with stable CAD (SCAD). A total of 406 patients with an abnormal treadmill exercise test and ≥50% coronary stenosis on coronary angiography were included. The HRR was calculated by subtracting the HR in the first minute of the recovery period from the maximum HR during exercise. The SYNTAX score ≥23 was accepted as high. Correlation of HRR with SYNTAX score and independent predictors of hi

In [91]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): PeftModelForCausalLM(
      (base_model): LoraModel(
        (model): MistralForCausalLM(
          (model): MistralModel(
            (embed_tokens): Embedding(32000, 4096)
            (layers): ModuleList(
              (0-31): 32 x MistralDecoderLayer(
                (self_attn): MistralAttention(
                  (q_proj): Linear4bit(
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): ModuleDict(
                      (default): Linear(in_features=4096, out_features=16, bias=False)
                    )
                    (lora_B): ModuleDict(
                      (default): Linear(in_features=16, out_features=4096, bias=False)
                    )
                    (lora_embedding_A): ParameterDict()
                    (lora_embedding_B): ParameterDict()
                    (base_layer): Linear4bit(in

In [81]:
def stream(user_prompt, model):
    runtimeFlag = "cuda:0"
    system_prompt = ''
    B_INST, E_INST = "<s>", "</s>"

    prompt = f"{system_prompt}{B_INST} ###Question: {user_prompt.strip()}\n {E_INST}"

    inputs = tokenizer([prompt], return_tensors="pt").to(runtimeFlag)

    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    _ = model.generate(**inputs, streamer=streamer, max_new_tokens=200)

In [56]:
dataset[51110]

{'pmid': '18154643',
 'title': 'Process skill rather than motor skill seems to be a predictor of costs for rehabilitation after a stroke in working age; a longitudinal study with a 1 year follow up post discharge.',
 'text': 'In recent years a number of costs of stroke studies have been conducted based on incidence or prevalence and estimating costs at a given time. As there still is a need for a deeper understanding of factors influencing these costs the aim of this study was to calculate the direct and indirect costs in a younger (<65) sample of stroke patients and to explore factors affecting the costs. Fifty-eight patients included in a study of home rehabilitation and followed for 1 year after discharge from the rehabilitation unit, were interviewed about their use of health care services, assistance, medications and assistive devices. Costs (defined as the cost for society) were calculated. A linear regression of cost and variables of functioning, ability, community integration a

In [93]:
stream("What can an impaired heart rate recovery (HRR) been associated to?", model)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat2 in method wrapper_CUDA_mm)

In [73]:
stream("What language can you speak?", model)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


мммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммммм


In [16]:
del model, trainer
torch.cuda.empty_cache()

# Reload the base model
base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model, low_cpu_mem_usage=True,
    return_dict=True,torch_dtype=torch.bfloat16,
    device_map= {"": 0})
model = PeftModel.from_pretrained(base_model_reload, new_model)
model = model.merge_and_unload()

# Reload tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

Loading checkpoint shards: 100%|██████████| 2/2 [00:08<00:00,  4.06s/it]


In [90]:
model.save_pretrained("results/")

In [88]:
model.push_to_hub(new_model, use_temp_dir=False, token = HF_TOKEN)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

FileNotFoundError: [Errno 2] No such file or directory: 'mistral-7B-PubMed-0'

In [3]:
dataset = load_dataset("gathnex/Gath_baize", split="train")

Downloading readme: 100%|██████████| 21.0/21.0 [00:00<00:00, 22.3kB/s]
Downloading data: 100%|██████████| 222M/222M [00:43<00:00, 5.15MB/s]
Downloading data files: 100%|██████████| 1/1 [00:43<00:00, 43.44s/it]
Extracting data files: 100%|██████████| 1/1 [00:00<00:00, 369.28it/s]
Generating train split: 210311 examples [00:02, 73506.87 examples/s]


In [5]:
dataset

Dataset({
    features: ['chat_sample', 'dataset_origin'],
    num_rows: 210311
})

In [4]:
dataset["chat_sample"][0]

'The conversation between Human and AI assisatance named Gathnex [INST] Generate a headline given a content block.\nThe Sony Playstation 5 is the latest version of the console. It has improved graphics and faster processing power.\n[/INST] Experience Amazing Graphics and Speed with the New Sony Playstation 5'

In [5]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
HF_TOKEN='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'

tokenizer = AutoTokenizer.from_pretrained("ferrazzipietro/mistral-7B-FT-E3C-en-layer1-hub", token=HF_TOKEN, device_map="auto")
model = AutoModelForCausalLM.from_pretrained("ferrazzipietro/mistral-7B-FT-E3C-en-layer1-hub", token=HF_TOKEN)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]


SafetensorError: Error while deserializing header: MetadataIncompleteBuffer

In [4]:
from peft import PeftModel
from transformers import AutoModelForCausalLM
import torch
base_model_reload = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1", low_cpu_mem_usage=True,
    return_dict=True,torch_dtype=torch.float16,
    device_map= "auto")



Loading checkpoint shards: 100%|██████████| 2/2 [00:14<00:00,  7.29s/it]


In [7]:
adp = "ferrazzipietro/adapters_tmp_prova"
from dotenv import dotenv_values
HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']
merged_model = PeftModel.from_pretrained(base_model_reload, adp, token=HF_TOKEN, device_map="auto")



ValueError: We need an `offload_dir` to dispatch this model according to this `device_map`, the following submodules need to be offloaded: base_model.model.model.layers.4, base_model.model.model.layers.5, base_model.model.model.layers.6, base_model.model.model.layers.7, base_model.model.model.layers.8, base_model.model.model.layers.9, base_model.model.model.layers.10, base_model.model.model.layers.11, base_model.model.model.layers.12, base_model.model.model.layers.13, base_model.model.model.layers.14, base_model.model.model.layers.15, base_model.model.model.layers.16, base_model.model.model.layers.17, base_model.model.model.layers.18, base_model.model.model.layers.19, base_model.model.model.layers.20, base_model.model.model.layers.21, base_model.model.model.layers.22, base_model.model.model.layers.23, base_model.model.model.layers.24, base_model.model.model.layers.25, base_model.model.model.layers.26, base_model.model.model.layers.27, base_model.model.model.layers.28, base_model.model.model.layers.29, base_model.model.model.layers.30, base_model.model.model.layers.31, base_model.model.model.norm, base_model.model.lm_head.

In [1]:
from utils.data_preprocessing import preprocess_data
from dotenv import dotenv_values
from datasets import load_dataset
HF_TOKEN = dotenv_values(".env.base")['HF_TOKEN']

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
hf_e3c = load_dataset("ferrazzipietro/e3c-sentences", token = HF_TOKEN, download_mode="force_redownload")
hf_e3c

Downloading readme: 100%|██████████| 2.97k/2.97k [00:00<00:00, 3.32MB/s]
Downloading data: 100%|██████████| 393k/393k [00:00<00:00, 793kB/s]
Downloading data: 100%|██████████| 517k/517k [00:00<00:00, 1.44MB/s]s]
Downloading data: 100%|██████████| 68.7k/68.7k [00:00<00:00, 228kB/s]]
Downloading data: 100%|██████████| 4.45M/4.45M [00:00<00:00, 9.65MB/s]
Downloading data: 100%|██████████| 381k/381k [00:00<00:00, 1.17MB/s]s]
Downloading data: 100%|██████████| 535k/535k [00:00<00:00, 1.67MB/s]s]
Downloading data: 100%|██████████| 75.5k/75.5k [00:00<00:00, 249kB/s]]
Downloading data: 100%|██████████| 3.86M/3.86M [00:00<00:00, 8.94MB/s]
Downloading data: 100%|██████████| 563k/563k [00:00<00:00, 1.53MB/s]s]
Downloading data: 100%|██████████| 194k/194k [00:00<00:00, 427kB/s]/s]
Downloading data: 100%|██████████| 62.4k/62.4k [00:00<00:00, 205kB/s]s]
Downloading data: 100%|██████████| 2.77M/2.77M [00:00<00:00, 7.42MB/s]]
Downloading data: 100%|██████████| 359k/359k [00:00<00:00, 1.05MB/s]/s]
Down

DatasetDict({
    en.layer1: Dataset({
        features: ['sentence', 'entities', 'original_text', 'original_id'],
        num_rows: 1520
    })
    en.layer2: Dataset({
        features: ['sentence', 'entities', 'original_text', 'original_id'],
        num_rows: 2873
    })
    en.layer2.validation: Dataset({
        features: ['sentence', 'entities', 'original_text', 'original_id'],
        num_rows: 334
    })
    en.layer3: Dataset({
        features: ['sentence', 'entities', 'original_text', 'original_id'],
        num_rows: 9779
    })
    es.layer1: Dataset({
        features: ['sentence', 'entities', 'original_text', 'original_id'],
        num_rows: 1134
    })
    es.layer2: Dataset({
        features: ['sentence', 'entities', 'original_text', 'original_id'],
        num_rows: 2347
    })
    es.layer2.validation: Dataset({
        features: ['sentence', 'entities', 'original_text', 'original_id'],
        num_rows: 261
    })
    es.layer3: Dataset({
        features: ['sent

In [10]:
hf_e3c = preprocess_data(hf_e3c)

Map: 100%|██████████| 1520/1520 [00:00<00:00, 9255.02 examples/s]
Map: 100%|██████████| 2873/2873 [00:00<00:00, 20636.26 examples/s]
Map: 100%|██████████| 334/334 [00:00<00:00, 15907.90 examples/s]
Map: 100%|██████████| 9779/9779 [00:00<00:00, 20680.37 examples/s]
Map: 100%|██████████| 1134/1134 [00:00<00:00, 7687.45 examples/s]
Map: 100%|██████████| 2347/2347 [00:00<00:00, 17393.20 examples/s]
Map: 100%|██████████| 261/261 [00:00<00:00, 14430.14 examples/s]
Map: 100%|██████████| 1876/1876 [00:00<00:00, 11204.48 examples/s]
Map: 100%|██████████| 3126/3126 [00:00<00:00, 11199.45 examples/s]
Map: 100%|██████████| 1594/1594 [00:00<00:00, 22759.43 examples/s]
Map: 100%|██████████| 468/468 [00:00<00:00, 15819.27 examples/s]
Map: 100%|██████████| 1232/1232 [00:00<00:00, 18188.80 examples/s]
Map: 100%|██████████| 1146/1146 [00:00<00:00, 10733.25 examples/s]
Map: 100%|██████████| 2436/2436 [00:00<00:00, 20289.18 examples/s]
Map: 100%|██████████| 275/275 [00:00<00:00, 15381.17 examples/s]
Map: 

In [14]:
hf_e3c['en.layer1']['prompt'][110].split('[/INST]')[-1].strip()

'[{"entity": "diabetic", "offset": [19, 27]}, {"entity": "Hypertensive", "offset": [29, 41]}, {"entity": "illness", "offset": [67, 74]}, {"entity": "diabetic", "offset": [19, 27]}, {"entity": "Hypertensive", "offset": [29, 41]}, {"entity": "She", "offset": [0, 3]}] </s>'

In [5]:
dataset = preprocess_data(hf_e3c)
dataset = hf_e3c['en.layer1']
dataset = dataset.shuffle(seed=1234)  # Shuffle dataset here
dataset = dataset.train_test_split(test_size=0.2)
train_data = dataset["train"]
test_data = dataset["test"]

Map: 100%|██████████| 1520/1520 [00:00<00:00, 10049.20 examples/s]
Map: 100%|██████████| 2873/2873 [00:00<00:00, 15758.86 examples/s]
Map: 100%|██████████| 334/334 [00:00<00:00, 17641.99 examples/s]
Map: 100%|██████████| 9779/9779 [00:00<00:00, 22158.02 examples/s]
Map: 100%|██████████| 1134/1134 [00:00<00:00, 9112.25 examples/s]
Map: 100%|██████████| 2347/2347 [00:00<00:00, 20466.57 examples/s]
Map: 100%|██████████| 261/261 [00:00<00:00, 15214.07 examples/s]
Map: 100%|██████████| 1876/1876 [00:00<00:00, 21325.22 examples/s]
Map: 100%|██████████| 3126/3126 [00:00<00:00, 10247.67 examples/s]
Map: 100%|██████████| 1594/1594 [00:00<00:00, 23900.88 examples/s]
Map: 100%|██████████| 468/468 [00:00<00:00, 18225.10 examples/s]
Map: 100%|██████████| 1232/1232 [00:00<00:00, 20519.65 examples/s]
Map: 100%|██████████| 1146/1146 [00:00<00:00, 11507.06 examples/s]
Map: 100%|██████████| 2436/2436 [00:00<00:00, 23046.56 examples/s]
Map: 100%|██████████| 275/275 [00:00<00:00, 16762.59 examples/s]
Map:

In [6]:
train_data[0]

{'sentence': 'At her 1 year follow-up, the patient was doing well with no evidence of recurrent disease.',
 'entities': [{'id': '7473',
   'offsets': [14, 23],
   'role': '',
   'semantic_type_id': '',
   'text': 'follow-up',
   'type': 'EVENT'},
  {'id': '7488',
   'offsets': [60, 68],
   'role': '',
   'semantic_type_id': '',
   'text': 'evidence',
   'type': 'EVENT'},
  {'id': '7503',
   'offsets': [72, 81],
   'role': '',
   'semantic_type_id': '',
   'text': 'recurrent',
   'type': 'EVENT'},
  {'id': '7518',
   'offsets': [82, 89],
   'role': '',
   'semantic_type_id': '',
   'text': 'disease',
   'type': 'EVENT'},
  {'id': '7726',
   'offsets': [25, 36],
   'role': 'PATIENT',
   'semantic_type_id': '',
   'text': 'the patient',
   'type': 'ACTOR'},
  {'id': '7796',
   'offsets': [7, 13],
   'role': '',
   'semantic_type_id': '',
   'text': '1 year',
   'type': 'TIMEX3'}],
 'original_text': 'A 64-year-old woman was admitted to our institution with palpable lump in her left breast.