### Fine-tuning base LLM Model from Azure's Model Catalog

Load model from Catalog

In [None]:
from azure.ai.ml import MLClient  
from azure.identity import DefaultAzureCredential, ClientSecretCredential
from azure.ai.ml.entities import Job  
import os  
from dotenv import load_dotenv  
  
# Load environment variables  
load_dotenv()  

tenant_id=os.getenv("AAD_TENANT_ID")
client_id = os.getenv("AAD_CLIENT_ID")
subscription_id= os.getenv("SUBSCRIPTION_ID")
resource_group= os.getenv("RESOURCE_GROUP")
workspace_name= os.getenv("WORKSPACE_NAME")
print(client_id)
# Authenticate and create a client  
credential = DefaultAzureCredential()
ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)  
  
registry_ml_client_meta = MLClient(credential, registry_name="azureml-meta")
model_name = "Meta-Llama-3.1-8B"
foundation_model = registry_ml_client_meta.models.get(model_name, label="latest")
print(
    "\n\nUsing model name: {0}, version: {1}, id: {2} for fine tuning".format(
        foundation_model.name, foundation_model.version, foundation_model.id
    )
)


### Download model

In [None]:
registry_ml_client_meta.models.download(name=model_name,version=4)

### Load model & tokenizer in quantized format

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig

# Load the 7b llama model

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4"
)

# Load model
path = model_name +"/mlflow_model_folder/data/model"
model = AutoModelForCausalLM.from_pretrained(path, local_files_only=True, quantization_config=quantization_config)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(path, local_files_only=True)
# Set it to a new token to correctly attend to EOS tokens.
tokenizer.add_special_tokens({'pad_token': '<PAD>'})

In [None]:
lora_config = LoraConfig(
    r=64,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
    bias="none",
    task_type="CAUSAL_LM",
)

model.add_adapter(lora_config)

Load training data

In [None]:
from datasets import load_dataset, Dataset
import pandas as pd
train_dataset= "data/ossllm_train_data_v2.jsonl"
# train_dataset = load_dataset("stingning/ultrachat", split="train[:1%]")
train_data_dict = pd.read_json(train_dataset, lines=True).to_dict(orient="records")  
train_records = [item["record"] for item in train_data_dict]  
train_dataset = Dataset.from_dict({"record": train_records})  


In [None]:
from transformers import TrainingArguments

output_dir = "output"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 300
warmup_ratio = 0.03
lr_scheduler_type = "linear"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    lr_scheduler_type=lr_scheduler_type,
    gradient_checkpointing=True,
)


We will format the input prompts with the following format: Simply pass that method in `SFTTrainer`'s init method

In [None]:
from trl import SFTTrainer

def formatting_func(example):
    return example['record']

In [None]:
from transformers.integrations import MLflowCallback  

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_dataset,
    packing=True,
    # dataset_text_field="id",
    tokenizer=tokenizer,
    max_seq_length=1024,
    formatting_func=formatting_func,
)

trainer.remove_callback(MLflowCallback)  


In [None]:
trainer.train()

### Evaluation

In [None]:
import json  
import re  
from transformers import StoppingCriteria, StoppingCriteriaList

class StopOnSpecificToken(StoppingCriteria):  
    def __init__(self, stop_token_id):  
        self.stop_token_id = stop_token_id  
  
    def __call__(self, input_ids, scores, **kwargs):  
        # Check if the last generated token is the stop token  
        return input_ids[0][-1] == self.stop_token_id  

  
def process_jsonl(file_path):  
    with open(file_path, 'r') as file:  
        return [json.loads(line) for line in file]  
  
def extract_input(record):  
    output_index = record.index('### Output:')  
    return record[:output_index + len('### Output:')]  
  
def extract_output(record):  
    output_index = record.index('### Output:') + len('### Output:')  
  
    try:  
        # Try to find the end index using "### End"  
        end_index = record.index('### End', output_index)  
    except ValueError:  
        # If "### End" is not found, set end_index to 16 characters after "### Output:"  
        end_index = output_index + 35  
  
    return record[output_index:end_index]  
  
def extract_codes(output_text):  
    # Update the regex to match numbers with optional dots  
    match = re.search(r'\b((?:\d+\.)*\d+)\b', output_text)  
    if match:  
        # Return the matched number as a string without dots  
        return match.group(1).replace('.', '')  
  
  
def run_scoring_pipeline(model, tokenizer, batch):  
    # Tokenize the batch of inputs  
    inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True).to(model.device)  
  
    # Get the token ID for the "### End" sequence  
    stop_token_id = tokenizer.convert_tokens_to_ids("### End")  
  
    # Define the stopping criteria  
    stopping_criteria = StoppingCriteriaList([StopOnSpecificToken(stop_token_id)])  
  
    # Generate predictions  
    with torch.no_grad():  
        outputs = model.generate(  
            inputs.input_ids,  
            max_new_tokens=250,  
            do_sample=False,  
            stopping_criteria=stopping_criteria  
        )  
  
    # Decode the outputs and return them  
    decoded_outputs = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]  
    return decoded_outputs  
  
  
def calculate_accuracy(true_values, predicted_values):  
    correct_chapter = sum(1 for true, pred in zip(true_values, predicted_values) if true[:2] == pred[:2])  
    correct_heading = sum(1 for true, pred in zip(true_values, predicted_values) if true[:4] == pred[:4])  
    correct_subheading = sum(1 for true, pred in zip(true_values, predicted_values) if true == pred)  
    total = len(true_values)  
  
    return {  
        'chapter_accuracy': correct_chapter / total,  
        'heading_accuracy': correct_heading / total,  
        'subheading_accuracy': correct_subheading / total  
    }  
  
def main(file_path, batch_size=5):  
    records = process_jsonl(file_path)  
    inputs = [extract_input(record['record']) for record in records]  
    true_outputs = [extract_codes(extract_output(record['record'])) for record in records]
    inputs = inputs[:10]
    true_outputs = true_outputs[:10]
      
    predicted_outputs = []  
    for i in range(0, len(inputs), batch_size):  
        batch = inputs[i:i + batch_size]  
        batch_results = run_scoring_pipeline(model, tokenizer, batch)  
        for result in batch_results:  
            text_result = result[0]['generated_text']
            predicted_outputs.append(extract_codes(extract_output(text_result)))  
    accuracy = calculate_accuracy(true_outputs, predicted_outputs)  
    print("Accuracies:", accuracy)  
  
# Example usage:  
main('data/ossllm_test_data_v2.jsonl')  

## Save the trained model

Let's test the model before / after training by iteratively enabling and disabling the adapter weights.

In [None]:
from peft import LoraConfig, PeftModel  

trainer.model.save_pretrained("new_model")  


path = model_name +"/mlflow_model_folder/data/model"
base_model = AutoModelForCausalLM.from_pretrained(path, local_files_only=True,    device_map = {'': 0})

# # Load tokenizer
# tokenizer = AutoTokenizer.from_pretrained(path, local_files_only=True)
# # Set it to a new token to correctly attend to EOS tokens.
# tokenizer.add_special_tokens({'pad_token': '<PAD>'})
model = PeftModel.from_pretrained(base_model, "new_model")  
model = model.merge_and_unload()  

tokenizer = AutoTokenizer.from_pretrained(  
    os.path.join(PATH, "data", "model"),  
    local_files_only=True,  
    device_map=device_map  
)  
tokenizer.pad_token = tokenizer.eos_token  
tokenizer.padding_side = "right"  
model.save_pretrained(model_output_dir + "/data/model")  
tokenizer.save_pretrained(model_output_dir + "/data/tokenizer")  


In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)

# model_id = "ybelkada/llama-7b-qlora-ultrachat"

# tokenizer = AutoTokenizer.from_pretrained(model_id)

# quantization_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

# model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     quantization_config=quantization_config,
#     adapter_kwargs={"revision": "09487e6ffdcc75838b10b6138b6149c36183164e"}
# )

text = "You are a helpful AI assistant that helps assign Harmonized System (HS) Codes.\n\n### Instruction: \nReview the product description and category to assign the appropriate Harmonized System (HS) Code following these steps: \n1. **Identify the Chapter:** Determine the 2-digit chapter code.  \n2. **Identify the Heading:** Determine the 4-digit heading code.  \n3. **Identify the Subheading:** Determine the 2-digit Subheading. \n4. **Provide the complete 6-digit HS Code**: output the final 6-digit HS Code. \n\n\n### Input:    \n**Item Description:** B16A2 B18C1 B18C5 CAM CAP RAILS CAMSHAFT COVER CAP - LOWER LEFT EXHAUST SIDE OEM  \n**Category:** eBay Motors > Parts & Accessories > Car & Truck Parts & Accessories > Engines & Engine Parts > Cylinder Heads"
text2="You are a helpful AI assistant that helps assign Harmonized System (HS) Codes.\n\n### Instruction: \nReview the product description and category to assign the appropriate Harmonized System (HS) Code following these steps: \n1. **Identify the Chapter:** Determine the 2-digit chapter code.  \n2. **Identify the Heading:** Determine the 4-digit heading code.  \n3. **Identify the Subheading:** Determine the 2-digit Subheading. \n4. **Provide the complete 6-digit HS Code**: output the final 6-digit HS Code. \n\n\n### Input:    \n**Item Description:** New Door Striker Kits For Freightliner Columbia Century And FLD A1837204000  \n**Category:** eBay Motors > Parts & Accessories > Commercial Truck Parts > Other Commercial Truck Parts"
text3= "You are a helpful AI assistant that helps assign Harmonized System (HS) Codes.\n\n### Instruction: \nReview the product description and category to assign the appropriate Harmonized System (HS) Code following these steps: \n1. **Identify the Chapter:** Determine the 2-digit chapter code.  \n2. **Identify the Heading:** Determine the 4-digit heading code.  \n3. **Identify the Subheading:** Determine the 2-digit Subheading. \n4. **Provide the complete 6-digit HS Code**: output the final 6-digit HS Code. \n\n\n### Input:    \n**Item Description:** B16A2 B18C1 B18C5 CAM CAP RAILS CAMSHAFT COVER CAP - LOWER LEFT EXHAUST SIDE OEM  \n**Category:** eBay Motors > Parts & Accessories > Car & Truck Parts & Accessories > Engines & Engine Parts > Cylinder Heads"
text4= "You are a helpful AI assistant that helps assign Harmonized System (HS) Codes.\n\n### Instruction: \nReview the product description and category to assign the appropriate Harmonized System (HS) Code following these steps: \n1. **Identify the Chapter:** Determine the 2-digit chapter code.  \n2. **Identify the Heading:** Determine the 4-digit heading code.  \n3. **Identify the Subheading:** Determine the 2-digit Subheading. \n4. **Provide the complete 6-digit HS Code**: output the final 6-digit HS Code. \n\n\n### Input:    \n**Item Description:** APARTMENT NUMBER SIGN -1A -BRUSHED ALUMINUM (2.25X3)  \n**Category:** Business & Industrial > Retail & Services > Business Signs"

#  \n\n\n### Output:\n**HS Code:** 870899\n### End\n"
#  text2 \n\n\n### Output:\n**HS Code:** 731815\n### End\n
#  text4 \n\n\n### Output:\n**HS Code:** 831000\n### End\n"}
inputs = tokenizer(text4, return_tensors="pt").to(0)
outputs = model.generate(inputs.input_ids, max_new_tokens=250, do_sample=False)

print("After attaching Lora adapters:")
print(tokenizer.decode(outputs[0], skip_special_tokens=False))