# Fine-tune Falcon-7B-instruct with QLoRA on EU CBDC Regulation

Many of the code in this notebook is copied and based on this [article](https://aws.amazon.com/blogs/machine-learning/interactively-fine-tune-falcon-40b-and-other-llms-on-amazon-sagemaker-studio-notebooks-using-qlora/) by AWS and this [notebook](https://github.com/aws-samples/amazon-sagemaker-generativeai/blob/main/studio-notebook-fine-tuning/falcon-40b-qlora-finetune-summarize.ipynb). 

In [None]:
%pip install -U torch==2.0.1 bitsandbytes==0.39.1
%pip install -U datasets py7zr einops tensorboardX
%pip install -U git+https://github.com/huggingface/transformers.git@850cf4af0ce281d2c3e7ebfc12e0bc24a9c40714
%pip install -U git+https://github.com/huggingface/peft.git@e2b8e3260d3eeb736edf21a2424e89fe3ecf429d
%pip install -U git+https://github.com/huggingface/accelerate.git@b76409ba05e6fa7dfc59d50eee1734672126fdba

In [None]:
import os
import nvidia

cuda_install_dir = '/'.join(nvidia.__file__.split('/')[:-1]) + '/cuda_runtime/lib/'
os.environ['LD_LIBRARY_PATH'] =  cuda_install_dir

print(cuda_install_dir)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "tiiuae/falcon-7b-instruct"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, quantization_config=bnb_config, device_map="auto")

In [None]:
# Set the Falcon tokenizer
tokenizer.pad_token = tokenizer.eos_token

In [None]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

Prepare the QLoRA version of the model

In [None]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=[
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
        ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

Prepare the data, first convert specific pages from the PDF to TXT file

In [None]:
import fitz

def extract_text_from_pdf(pdf_path, from_page, to_page):
    # Create a PDF document object
    pdf_document = fitz.open(pdf_path)

    # Initialize an empty string to store the extracted text
    extracted_text = ""

    try:
        # Iterate over the specified page numbers
        for page_number in range(from_page, to_page + 1):
            page = pdf_document[page_number - 1]
            extracted_text += page.get_text()
    except Exception as e:
        print("Error:", e)
    finally:
        pdf_document.close()

    return extracted_text  

Invoke the conversion function

In [None]:
pdf_file_path = "./euro.pdf"

extracted_text = extract_text_from_pdf(pdf_file_path, 17, 38)
file_path = "./euro_pages.txt"
file = open(file_path, "w")
file.write(extracted_text)
file.close()

Load the dataset, each paragraph as a training record

In [None]:
from datasets import load_dataset

dataset = load_dataset("text", data_files={"train": "euro_pages.txt"}, sample_by="paragraph")
print(f"Dataset shape: {dataset.shape}")
train_dataset = dataset['train']
print(train_dataset.shape)

In [None]:
lm_train_dataset = train_dataset.map(
    lambda sample: tokenizer(sample["text"]), batched=True, batch_size=24, remove_columns=list(train_dataset.features)
)

Prepare S3 buckets for logging

In [None]:
bucket = "sagemaker-studio-866667226369-scqdq9xrns"
log_bucket = f"s3://{bucket}/falcon-40b-qlora-finetune"

Prepare training config

In [None]:
import transformers

# We set num_train_epochs=1 simply to run a demonstration

trainer = transformers.Trainer(
    model=model,
    train_dataset=lm_train_dataset,
    #eval_dataset=lm_test_dataset,
    args=transformers.TrainingArguments(
        do_eval=False,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        logging_dir=log_bucket,
        logging_steps=1,
        num_train_epochs=2,
        learning_rate=2e-4,
        #bf16=True,
        fp16=True,
        output_dir="outputs",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!

Start training

In [None]:
trainer.train()

Test the fine-tuned model 

In [None]:
test_sample = f"What is the The main objective of the establishment of the digital euro?"

input_ids = tokenizer(test_sample, return_tensors="pt").input_ids
tokens_for_answer = 200
output_tokens = input_ids.shape[1] + tokens_for_answer
outputs = model.generate(inputs=input_ids, do_sample=True, max_length=output_tokens)
gen_text = tokenizer.batch_decode(outputs)[0]

print(gen_text)