# Install packages

In [None]:
!pip install /kaggle/input/nh-llama-2-7b/accelerate-0.21.0-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/bitsandbytes-0.41.1-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/peft-0.4.0-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/trl-0.5.0-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/openapi_schema_pydantic-1.2.4-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/langsmith-0.0.22-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/langchain-0.0.264-py3-none-any.whl
print("done")

# Imports

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import pandas as pd
from string import Template
from pathlib import Path


import os

import warnings
warnings.simplefilter("ignore")

from tqdm.notebook import tqdm

# for training
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
# for traing set
from datasets import load_dataset
from langchain.prompts import PromptTemplate
import matplotlib.pyplot as plt
import bitsandbytes as bnb
import numpy as np

from IPython.display import Markdown, display
print("done")

# load model and tokenizer

In [None]:
# change model_name to the model of your choice.
# This can be either name of the model on huggingface (requires internet) or path to the model
model_name = "/kaggle/input/llama2-7b-hf/Llama2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtyp=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
# this should be set as False for finetuning
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
print("done")

# prepare training data

In [None]:
# # load training data
train_dataset = load_dataset("csv", data_files="/kaggle/input/kaggle-llm-science-exam/train.csv")
print("done")

In [None]:
# prepare template 
template = """Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A, B, C, D, E]

Question: {prompt}\n
A) {a}\n
B) {b}\n
C) {c}\n
D) {d}\n
E) {e}\n

### Answer: {answer}"""

prompt = PromptTemplate(template=template, input_variables=['prompt', 'a', 'b', 'c', 'd', 'e', 'answer'])
print("done")

In [None]:
# display sample to see template
sample = train_dataset['train'][0]
display(Markdown(prompt.format(prompt=sample['prompt'], 
                               a=sample['A'], 
                               b=sample['B'], 
                               c=sample['C'], 
                               d=sample['D'], 
                               e=sample['E'], 
                               answer=sample['answer'])))
print("done")


In [None]:
def format_text(example):
    """ fill inputs in promt for a sample  """
    text = prompt.format(prompt=example['prompt'], 
                         a=example['A'], 
                         b=example['B'], 
                         c=example['C'], 
                         d=example['D'], 
                         e=example['E'], 
                         answer=example['answer'])
    return {"text": text}
print("done")


In [None]:
train_dataset = train_dataset.map(format_text)
print("done")

# Set up training arguments

In [None]:
# check model structure
model
print("done")

In [None]:
def find_linear_layers(model):
    """ find linear layers in given transformer model """
    lora_module_names = set()
    for name, module in model.named_modules():
        # 4 bits for qlora
        if isinstance(module, bnb.nn.Linear4bit): 
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    print(f"LoRA module names: {list(lora_module_names)}")
    return list(lora_module_names)


target_modules = find_linear_layers(model)
#for llama 2 (they need different target module)
qlora_config = LoraConfig(
    r=16,  # dimension of the updated matrices
    lora_alpha=64,  # parameter for scaling
    target_modules=target_modules, # this chooses on which layers QLoRA is applied
    lora_dropout=0.1,  # dropout probability for layers
    bias="none",
    task_type="CAUSAL_LM",
)
print("done")

In [None]:
# "max_steps=1" is just for testing execution
training_args = TrainingArguments(
    output_dir="./SFT-llama2-7b", 
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    logging_steps=20,
    logging_strategy="steps",
    warmup_steps=2,
    num_train_epochs=2,
    max_steps=1,
    optim="paged_adamw_8bit",
    fp16=True,
    run_name="baseline-llama2-sft",
    save_total_limit=1,  # can be increased, but but beware of kaggle notebook output size limit
    report_to="none"
)
print("done")

In [None]:
supervised_finetuning_trainer = SFTTrainer(
    model,
    train_dataset=train_dataset['train'],
    args=training_args,
    tokenizer=tokenizer,
    peft_config=qlora_config,
    dataset_text_field="text",
    max_seq_length=3000,
    data_collator=DataCollatorForCompletionOnlyLM(tokenizer=tokenizer, 
                                                  response_template="Answer:")
)
print("done")

In [None]:
supervised_finetuning_trainer.train()
print("done")

# Save model

In [None]:
model_to_save = supervised_finetuning_trainer.model.module if hasattr(supervised_finetuning_trainer.model, 'module') else supervised_finetuning_trainer.model
model_to_save.save_pretrained("outputs")
print("done")

# Applying lora

In [None]:
lora_config = LoraConfig.from_pretrained('outputs')
model = get_peft_model(model, lora_config)
print("done")

# Create submission

### Prepare test set

In [None]:
# same prompt as before
template = """Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A, B, C, D, E]

Question: {prompt}\n
A) {a}\n
B) {b}\n
C) {c}\n
D) {d}\n
E) {e}\n

### Answer: {answer}"""

prompt = PromptTemplate(template=template, input_variables=['prompt', 'a', 'b', 'c', 'd', 'e', 'answer'])
print("done")


In [None]:
# We don't have answers for test
def format_text_test(example):
    text = prompt.format(prompt=example['prompt'], 
                         a=example['A'], 
                         b=example['B'], 
                         c=example['C'], 
                         d=example['D'], 
                         e=example['E'], 
                         answer='')
    return {"text": text}


test_dataset = load_dataset("csv", data_files="/kaggle/input/kaggle-llm-science-exam/test.csv")
test_dataset = test_dataset.map(format_text_test)
print("done")


### Predict with fine-tuned model

In [None]:
from torch import nn
class Perplexity(nn.Module):
    def __init__(self, reduce: bool = True):
        super().__init__()
        self.loss_fn = nn.CrossEntropyLoss()
        self.reduce = reduce

    def forward(self, logits, labels):
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        perplexity = []
        for i in range(labels.shape[0]):
            perplexity.append(self.loss_fn(shift_logits[i], shift_labels[i]))
        perplexity = torch.stack(perplexity, dim=0)
        if self.reduce:
            perplexity = torch.mean(perplexity)
        return perplexity 
    
perp = Perplexity()
print("done")

In [None]:
preds = []
for idx in tqdm(range(len(test_dataset["train"])), total=len(test_dataset["train"])):
    
    with torch.no_grad():
        cols = ["A", "B", "C", "D", "E"]
        perps = []
        samples = []
        for col in cols:
            prompt = test_dataset['train'][idx]['text']
            samples.append(prompt + col)
        inputs = tokenizer(samples, return_tensors="pt", add_special_tokens=False, padding=True, truncation=True).to("cuda")

        output = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
        output = output.logits
        labels = inputs["input_ids"]
        labels.masked_fill_(~inputs["attention_mask"].bool(), -100)
        for j in range(len(cols)):
            p = perp(output[j].unsqueeze(0), labels[j].unsqueeze(0))
            perps.append(p.detach().cpu())
            
        del inputs
        del labels
        del output
        del p

    perps = np.array(perps)
    predictions = [np.array(cols)[np.argsort(perps)]]
    preds.append(predictions)
    print("done")
print("total done")



### format predictions to sumbission format and save

In [None]:
def format_prediction(row, k=1):
    best_k_preds = row[0][:k]
    return ' '.join(best_k_preds)

test_df = pd.DataFrame(preds)
format_prediction(test_df.iloc[0, :])
test_df['prediction'] = test_df.apply(lambda x: format_prediction(x), axis=1)
test_df['id'] = test_df.index

submission = test_df[['id', 'prediction']]
submission.to_csv('submission.csv', index=False)
print("done")


In [None]:
# After fine-tuning, save the model to a directory in the /kaggle/working path
model_to_save = supervised_finetuning_trainer.model.module if hasattr(supervised_finetuning_trainer.model, 'module') else supervised_finetuning_trainer.model
model_path = "/kaggle/working/fine_tuned_model"
model_to_save.save_pretrained(model_path)
print("Model saved to /kaggle/working/fine_tuned_model")

# Zip the model directory for easy download
!zip -r /kaggle/working/fine_tuned_model.zip {model_path}
print("Model zipped for download")
print("done")
