# Install packages

In [None]:
!pip install /kaggle/input/nh-llama-2-7b/accelerate-0.21.0-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/bitsandbytes-0.41.1-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/peft-0.4.0-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/trl-0.5.0-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/openapi_schema_pydantic-1.2.4-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/langsmith-0.0.22-py3-none-any.whl
!pip install /kaggle/input/nh-llama-2-7b/langchain-0.0.264-py3-none-any.whl
print("done")

# Imports

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import pandas as pd
from string import Template
from pathlib import Path


import os

import warnings
warnings.simplefilter("ignore")

from tqdm.notebook import tqdm

# for training
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
# for traing set
from datasets import load_dataset
from langchain.prompts import PromptTemplate
import matplotlib.pyplot as plt
import bitsandbytes as bnb
import numpy as np

from IPython.display import Markdown, display
print("done")

# load model and tokenizer

In [None]:
# change model_name to the model of your choice.
# This can be either name of the model on huggingface (requires internet) or path to the model
model_name = "/kaggle/input/llama2-7b-hf/Llama2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtyp=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
# this should be set as False for finetuning
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
print("done")

**Before finetuning answers**

In [None]:
# Load the test dataset
test_dataset = load_dataset("csv", data_files="/kaggle/input/finetuningllmqna/test.csv")

template = """Provide a detailed answer to the following question.
Question: {Question}
### Answer:"""
prompt = PromptTemplate(template=template, input_variables=['Question'])

def format_text_test(example):
    text = prompt.format(Question=example['Question'])
    return {"text": text}

test_dataset = test_dataset.map(format_text_test)

preds_before_finetuning = []
for idx in tqdm(range(len(test_dataset["train"])), total=len(test_dataset["train"])):
    prompt = test_dataset['train'][idx]['text']
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    inputs = {key: value for key, value in inputs.items() if key != "token_type_ids"}
    outputs = model.generate(**inputs, max_new_tokens=50)
    Answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    preds_before_finetuning.append(Answer.split("### Answer:")[-1].strip())
os.makedirs("/kaggle/working/test_responses_before_finetuning", exist_ok=True)
test_df_before = pd.DataFrame({
    "Question": [test_dataset["train"][i]["Question"] for i in range(len(test_dataset["train"]))],
    "Answer": preds_before_finetuning
})
test_df_before.to_csv("/kaggle/working/test_responses_before_finetuning/responses_before_finetuning.csv", index=False)
print("Responses before fine-tuning saved to /kaggle/working/test_responses_before_finetuning/responses_before_finetuning.csv")


# prepare training data

In [None]:
# # load training data
# train_dataset = load_dataset("csv", data_files="/kaggle/input/kaggle-llm-science-exam/train.csv")
# print("done")





train_dataset = load_dataset("csv", data_files="/kaggle/input/finetuningllmqna/train.csv")
print("done")

In [None]:
# Define template for question-answer pair
template = """Provide a detailed answer to the following question.

Question: {Question}

### Answer: {Answer}"""

# Prepare prompt for fine-tuning
prompt = PromptTemplate(template=template, input_variables=['Question', 'Answer'])
print("done")

In [None]:
# display sample to see template
sample = train_dataset['train'][0]
display(Markdown(prompt.format(Question=sample['Question'], Answer=sample['Answer'])))
print("done")

In [None]:
# function for the question-answer dataset
def format_text(example):
    """Fill inputs in prompt for each sample"""
    text = prompt.format(Question=example['Question'], Answer=example['Answer'])
    return {"text": text}
print("done")

In [None]:
train_dataset = train_dataset.map(format_text)
print("done")

# Set up training arguments

In [None]:
# check model structure
model
print("done")

In [None]:
# Define target modules for QLoRA
def find_linear_layers(model):
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, bnb.nn.Linear4bit): 
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

target_modules = find_linear_layers(model)
qlora_config = LoraConfig(
    r=16,
    lora_alpha=64,
    target_modules=target_modules,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)
print("done")

In [None]:
# Set training arguments
training_args = TrainingArguments(
    output_dir="./SFT-llama2-7b", 
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=2e-4,
    logging_steps=20,
    logging_strategy="steps",
    warmup_steps=2,
    num_train_epochs=2,
    max_steps=1,  # Adjust this based on your resources
    optim="paged_adamw_8bit",
    fp16=True,
    run_name="baseline-llama2-sft",
    save_total_limit=1,
    report_to="none"
)
print("done")

In [None]:
# Initialize trainer for fine-tuning
supervised_finetuning_trainer = SFTTrainer(
    model,
    train_dataset=train_dataset['train'],
    args=training_args,
    tokenizer=tokenizer,
    peft_config=qlora_config,
    dataset_text_field="text",
    max_seq_length=3000,
    data_collator=DataCollatorForCompletionOnlyLM(tokenizer=tokenizer, response_template="Answer:")
)
print("done")

In [None]:
supervised_finetuning_trainer.train()
print("done")

# Save model

In [None]:
model_to_save = supervised_finetuning_trainer.model.module if hasattr(supervised_finetuning_trainer.model, 'module') else supervised_finetuning_trainer.model
model_to_save.save_pretrained("outputs")
print("done")

# Applying lora

In [None]:
lora_config = LoraConfig.from_pretrained('outputs')
model = get_peft_model(model, lora_config)
print("done")

# Create submission

### Prepare test set

In [None]:
# Prepare test data
# test_dataset = load_dataset("csv", data_files="/kaggle/input/kaggle-llm-science-exam/test.csv")
# Template without answer for inference
template = """Provide a detailed answer to the following question.

Question: {Question}

### Answer:"""

prompt = PromptTemplate(template=template, input_variables=['Question'])
print("done")

In [None]:
# Modify format function for test set
def format_text_test(example):
    text = prompt.format(Question=example['Question'])
    return {"text": text}

# Prepare test data
test_dataset = load_dataset("csv", data_files="/kaggle/input/finetuningllmqna/test.csv")
test_dataset = test_dataset.map(format_text_test)
print("done")

### Predict with fine-tuned model

In [None]:
from torch import nn
class Perplexity(nn.Module):
    def __init__(self, reduce: bool = True):
        super().__init__()
        self.loss_fn = nn.CrossEntropyLoss()
        self.reduce = reduce

    def forward(self, logits, labels):
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        perplexity = []
        for i in range(labels.shape[0]):
            perplexity.append(self.loss_fn(shift_logits[i], shift_labels[i]))
        perplexity = torch.stack(perplexity, dim=0)
        if self.reduce:
            perplexity = torch.mean(perplexity)
        return perplexity 
    
perp = Perplexity()
print("done")

In [None]:
# Generate answers for test questions
preds = []
for idx in tqdm(range(len(test_dataset["train"])), total=len(test_dataset["train"])):
    prompt = test_dataset['train'][idx]['text']
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    # Exclude `token_type_ids` to avoid passing it to `generate`
    inputs = {key: value for key, value in inputs.items() if key != "token_type_ids"}
    
    outputs = model.generate(**inputs, max_new_tokens=50)
    Answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    preds.append(Answer.split("### Answer:")[-1].strip())  # Extract only the answer part
print("done")


### format predictions to sumbission format and save

In [None]:
# Prepare submission file
import os
os.makedirs("kaggle/finetuningllmqna", exist_ok=True)
test_df = pd.DataFrame({
    "Question": [test_dataset["train"][i]["Question"] for i in range(len(test_dataset["train"]))],
    "Answer": preds
})
test_df.to_csv("/kaggle/working/submission.csv", index=False)
print("Submission file created successfully.")
print("done")

In [None]:
# After fine-tuning, save the model to a directory in the /kaggle/working path
model_to_save = supervised_finetuning_trainer.model.module if hasattr(supervised_finetuning_trainer.model, 'module') else supervised_finetuning_trainer.model
model_path = "/kaggle/working/fine_tuned_model"
model_to_save.save_pretrained(model_path)
print("Model saved to /kaggle/working/fine_tuned_model")

# Zip the model directory for easy download
!zip -r /kaggle/working/fine_tuned_model.zip {model_path}
print("Model zipped for download")
print("done")
