In [None]:
# !pip install -U datasets

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')
print(os.getcwd())

In [None]:
#######################------Enter Your Project Directory Path in Google Drive-----####################################################
project_directory_path = "/content/drive/MyDrive/mistral_qa/"
%cd $project_directory_path
print(os.getcwd())

In [1]:
# Run this cell to install required dependencies
!pip install -q accelerate peft bitsandbytes transformers trl nanoid

In [2]:
import math
import os
import re
import ast
from nanoid import generate
import pandas as pd
import random

import numpy as np
import pandas as pd

import torch

from datasets import load_dataset, Dataset
from transformers import (AutoModelForCausalLM,
                   AutoTokenizer,
                   BitsAndBytesConfig,
                   HfArgumentParser,
                   TrainingArguments,
                   TextDataset,
                   DataCollatorForLanguageModeling,
                   pipeline,


                   logging)
from accelerate import Accelerator
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -----------------------------------------------------------------------------------------------------------------------------------------------
# ***********************************************************************************************************************************************
# Log in to your HuggingFace account
# Agree to condition from this page "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1"
# Agree to condition from this page https://huggingface.co/google/gemma-7b
# Agree to condition from this page https://huggingface.co/meta-llama/Llama-2-7b-hf
# ***********************************************************************************************************************************************
# Enter your access token from "https://huggingface.co/settings/tokens"
# <<<<<<<<<<<<Your Access Token Goes Here inside " ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
access_token = "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"
# -----------------------------------------------------------------------------------------------------------------------------------------------

In [3]:
!huggingface-cli login --token "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"

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /teamspace/studios/this_studio/.cache/huggingface/token
Login successful


# LLM Model

In [4]:
# The model that you want to train from the Hugging Face hub
# model_name = "mistralai/Mistral-7B-Instruct-v0.2"
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

# Fine-tuned model name
# new_model = "Mistral-7B-Instruct-v0.2"
new_model = "Meta-Llama-3-8B-Instruct"

# QLoRA parameters

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 32

# Dropout probability for LoRA layers
lora_dropout = 0.1

# bitsandbytes parameters

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

# TrainingArguments parameters

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# SFT parameters

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [5]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map= device_map,
    low_cpu_mem_usage=True,
    token = access_token
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token = access_token)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules = [
    "lm_head",
    "q_proj",
    "k_proj",
    "v_proj",
    "up_proj",
    "down_proj",
    "o_proj",
    "gate_proj"
      ]
)

Your GPU supports bfloat16: accelerate training with bf16=True


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
print("#"*147)
print("-"*50 , "Architechture of the LLM model is given below", "-"*50)
print("#"*147)
print(model)
print("#"*147)

###################################################################################################################################################
-------------------------------------------------- Architechture of the LLM model is given below --------------------------------------------------
###################################################################################################################################################
LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()

In [7]:
# dataset
# <<<<<<<<<<<<<You can upload the processed dataset into your HuggingFace and replace the dataset_name path with the corresponding one>>>>>>>>>>>>>>>>>>
dataset_name = "shetumohanto/zoloai"
dataset = load_dataset(dataset_name, split='train')
dataset = dataset.shuffle(seed = 314)
print(f"Total datapoints for train {len(dataset)}")

Total datapoints for train 3420


In [8]:
dataset[700]

{'text': '<s> [INST] <<SYS>> Generate ZOLO block code required for the user input. Strictly follow the output format. Start output with ++ and end with ++ <<\\SYS>> User input: Develop an image section displaying a serene garden setting. [/INST] ++Output = {"Block_Name": "Advanced_Image", "Image_Source":"UPLOAD", "Layout":{"HEADING":"Sunset Spectacle: Nature\'s Colorful Canvas", "DESCRIPTION":"Feel the cool mist on your face as water crashes down."}}++ </s> \n'}

In [8]:
def filter_dataset(dataset, model, type="train", max_seq_length=256):
    model_max_seq_length = model.config.max_position_embeddings
    print(f"Model can handle maximum {model_max_seq_length} tokens")
    
    length = [len(tokenizer.encode(data["text"])) for data in dataset]
    print(f"Max sequence length in the {type} dataset is {max(length)}")

    test_length = length.copy()
    # remove datapoints greater that sequence length of max_seq_length due to GPU fit.
    test_length = [element for element in length if element < max_seq_length]
    print(f"Total datapoints in {type} dataset after removing sequence length greater than {max_seq_length} is {len(test_length)}")

    include_idx = [index for index, element in enumerate(length) if element < max_seq_length]

    dataset_dict = dataset[include_idx]
    dataset = Dataset.from_dict(dataset_dict)
    
    length = [len(tokenizer.encode(data["text"])) for data in dataset]
    print("Max sequence length after filtering is {}".format(max(length)))
    
    max_seq_length = 2**round(math.log2(max(length)))
    print("Max sequence length set to {}".format(max_seq_length))

    return dataset, max_seq_length

In [9]:
dataset, max_seq_length = filter_dataset(dataset, model, type="train")

Model can handle maximum 8192 tokens
Max sequence length in the train dataset is 231
Total datapoints in train dataset after removing sequence length greater than 256 is 3420
Max sequence length after filtering is 231
Max sequence length set to 256


# For Training Only

In [11]:
# Number of training epochs
num_train_epochs = 4

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = True

# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 3

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
#     lr_scheduler_type=lr_scheduler_type,
    report_to="none",
    logging_steps = 100
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)

Map:   0%|          | 0/3420 [00:00<?, ? examples/s]

Step,Training Loss
100,0.9655
200,0.2738
300,0.2017
400,0.1631
500,0.1559
600,0.1384
700,0.1292
800,0.1236
900,0.1178
1000,0.108




# Inference

In [10]:
#######--------Run this cell only once----------###############
# peft_model_id = "Mistral-7B-Instruct-v0.2"
peft_model_id = "Meta-Llama-3-8B-Instruct"
model.load_adapter(peft_model_id)

In [11]:
def generate_text(prompt, model, tokenizer):
    # if model._get_name() == "GemmaForCausalLM":
    #     prompt = f"""<start_of_turn>user {prompt} <end_of_turn> <start_of_turn>model """
    # else:
    prompt = f"""<s> [INST] <<SYS>> Generate ZOLO block format required for the user input. Start output with ++ and end with ++. 
    Please provide text inside Output items aligned with the user input. Do not output any code. Please provide only one output.
    <<\\SYS>> User input: {prompt} [/INST]"""
    
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    model.eval()
    
    with torch.no_grad():
        model_out = model.generate(**inputs, max_new_tokens=max_seq_length, pad_token_id=2)
        generated_text = tokenizer.decode(model_out[0], skip_special_tokens=True)

    return generated_text, model_out

### Sample inference

In [12]:
import re
from nanoid import generate
import pandas as pd
import random
import json

from zolo_blocks.accordion import Accordion
from zolo_blocks.advance_button import Advance_Button
from zolo_blocks.advance_heading import Advance_Heading
from zolo_blocks.advance_image import Advance_Image
from zolo_blocks.container import Container

In [13]:
def generate_code(user_query, model, tokenizer):
    block_list = [Accordion, Advance_Button, Advance_Heading, Advance_Image, Container]
    block_name_list = ["Accordion", "Advanced_Button", "Advanced_Heading", "Advanced_Image", "Container"]
    
    model_out, _ = generate_text(user_query, model, tokenizer)
    # print(model_out)
    # model_out = model_out.split("++")[3].replace("Output", "").replace("=", "").strip()
    pattern = re.compile(re.escape("[/INST]") + '(.*?)' + re.escape("</s>"))
    model_out = pattern.search(model_out).group(1).replace("Output", "").replace("=", "").replace("++", "").strip()
    model_out = ast.literal_eval(model_out)
    block_name = model_out["Block_Name"]
    block_index = block_name_list.index(block_name)
    code_generator = block_list[block_index]
    code_out = code_generator(model_out)
    code = code_out.get_code()

    return code

In [21]:
user_query = "Examine the environmental impact of mango, litchi, and banana cultivation."
code = generate_code(user_query, model, tokenizer)
print("#"*170)
print(code)
print("#"*170)

##########################################################################################################################################################################
<!-- wp:zolo/container {"variationStatus":true,"isBlockRootParent":true,"uniqueId":"container-3j3c6k1p","parentClasses":["zolo-block parent-container-3j3c6k1p"],"zoloStyles":{"desktop":".container-3j3c6k1p.block-editor-block-list__block.wp-block-zolo-container \u003e .zolo-container-inner-blocks-wrap,.wp-block-zolo-container.zolo-root-container.alignfull.container-3j3c6k1p \u003e .zolo-container-inner-blocks-wrap { max-width:1200px }.is-root-container \u003e .block-editor-block-list__block .block-editor-block-list__block#block-6re27zfq-f1vj-ia2g-7mu3-tyh4u9jurred,.wp-block-zolo-container.zolo-root-container.frontend .container-3j3c6k1p { max-width:100%; width:100% }.container-3j3c6k1p.wp-block-zolo-container \u003e .zolo-container-inner-blocks-wrap \u003e .block-editor-inner-blocks \u003e .block-editor-block-list__lay