# Preprocessing
converting to sharegpt style.

In [None]:
from datasets import load_dataset

ds = load_dataset('csv', data_files="/kaggle/input/academicmcq-json-format/academicMCQ_JSON.csv", split='train')



In [None]:
ds = ds.train_test_split(test_size=0.1)
train_dataset = ds['train']
val_dataset = ds['test']

In [None]:
ds

In [None]:
# ds['train']

In [None]:
ds['train'][0]

In [None]:
from datasets import load_dataset, Dataset, DatasetDict

# Load the dataset
# ds = load_dataset('csv', data_files="/kaggle/input/academicmcq-json-format/academicMCQ_JSON.csv", split='train')

# Transform the dataset
def transform(example):
    return {
        "dialog": [
            {"from": "human", "value": example['source']},
            {"from": "gpt", "value": example['target']}
        ]
    }

# Apply the transformation to the train and test splits
transformed_train = ds['train'].map(transform)
transformed_test = ds['test'].map(transform)

# Create the new DatasetDict
new_ds = DatasetDict({
    'train': transformed_train,
    'test': transformed_test
})

In [None]:
# Push the dataset back to Hugging Face
import os
new_ds.push_to_hub("romikgosai/academicmcqjson", token = os.getenv('HF_TOKEN'))

In [None]:
from datasets import load_dataset

ds = load_dataset("romikgosai/academicmcqjson")


In [None]:
ds['train']['dialog'][0]

# Finetune

In [None]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install unsloth

import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3-mini-4k-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, 
    bias = "none",    
    use_gradient_checkpointing = "unsloth", 
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-3", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
)

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
user_query = ds['test']['source'][0]
one_shot_input_context = """
Generate user specified number of quizzes as in following example from given user prompt, generate answer similar to target answer but strictly use the context of query:
user prompt: Generate 4 Quizzes which require logical reasoning in a list of JSON format from the given context. CONTEXT: Success is often measured by the ability to overcome adversity.But,it is often the belief of others that gives us the courage to try. J.K.Rowling,author of the Harry Potter book series,began writing at the age of 6.In her biography,she remembers with great fondness when her good friend Sean,whom she met in secondary school,became the first person to encourage her and help build the confidence that one day she would be a very good writer "He was the first person with whom I really discussed my serious ambition to be a writer.He was also the only person who thought I was bound to be a success at it,which meant much more to me than I ever told him at the time." Despite many setbacks,Rowling persevered in her writing,particularly fantasy stories.But it wasn't until 1990 that she first conceived the idea about Harry Potter.As she recalls,it was on a long train journey from London to Manchester that the idea of Harry Potter simply fell into her head."To my immense frustration ,I didn't have a functioning pen with me,and I was too shy to ask anybody if I could borrow one.I think,now,that this was probably a good thing,because I simply sat and thought,for four (delayed train) hours,and all the details bubbled up in my brain,and this scrawny,blackhaired,bespectacled boy who didn't know he was a wizard became more and more real to me." That same year,her mother passed away after a tenyear battle with multiple sclerosis,which deeply affected her writing.She went on to marry and had a daughter,but separated from her husband shortly afterwards. During this time,Rowling was diagnosed with clinical depression.Unemployed,she completed her first novel in area cafes,where she could get her daughter to fall asleep.After being rejected by 12 publishing houses,the first Harry Potter novel was sold to a small British publishing house. Now with seven books that have sold nearly 400 million copies in 64 languages,J.K.Rowling is the highest earning novelist in history.And it all began with her commitment to writing that was fostered by the confidence of a friend!
target answer: [{'question': 'Who believed J.K.Rowling was to be a good writer?', 'correct_option': 'Her daughter.', 'distractors': ['Her friend Sean.', 'Her mother.', 'Her husband.']}, {'question': 'Rowling first came up with the idea about Harry Potter _ .', 'correct_option': 'at the age of 6', 'distractors': ['on a train journey', "after her mother's death", 'in her secondary school']}, {'question': 'She felt frustrated on the train because _ .', 'correct_option': 'her mind suddenly went blank', 'distractors': ['her train was delayed for four hours', "she didn't have a pen with her", 'no one would offer her help']}, {'question': 'The text mainly tells us _ .', 'correct_option': "encouragement contributes to one's success", 'distractors': ['hardship makes a good novelist', 'the courage to try is a special ability', 'you can have a wonderful idea everywhere']}]
query: 
"""

input_context = one_shot_input_context + user_query
messages = [
    {"from": "human", "value": input_context},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

outputs = model.generate(input_ids = inputs, max_new_tokens = 1024, use_cache = True)
output = tokenizer.batch_decode(outputs)

In [None]:
output

In [None]:
import re
pattern = r"<\|assistant\|>(.*?)<\|end\|>"

matches = re.findall(pattern, output[0], re.DOTALL)[0]
print(matches)

In [None]:
# Load the model and tokenizer
finetuned_model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="/kaggle/input/llmfinetuningsept20/mcq_lora_model_2_LORA_only",
    max_seq_length=2048,
    dtype=None,
    load_in_4bit=True,
)
FastLanguageModel.for_inference(finetuned_model)

In [None]:
messages = [
    {"from": "human", "value": user_query},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")
def generate_output(model, user_query):
    messages = [
        {"from": "human", "value": user_query},
    ]
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize = True,
        add_generation_prompt = True, # Must add for generation
        return_tensors = "pt",
    ).to("cuda")
    outputs = model.generate(input_ids = inputs, max_new_tokens = 2048, use_cache = True)
    output = tokenizer.batch_decode(outputs)
    try:
        return re.findall(pattern, output[0], re.DOTALL)[0]
    except:
        return output[0].split('<|assistant|>')[-1]


In [None]:
output

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
data = {
    'source': [],
    'target': [],
    'zero_shot_response': [],
    'one_shot_response': [],
    'finetuned_response': []
}
import time

start_time = time.time() 
for i, source in enumerate(ds['test']['source']):
    zeroshotquery = source
    oneshotquery = one_shot_input_context + source
    data['source'].append(source)
    data['target'].append(ds['test']['target'][i])
    data['zero_shot_response'].append(generate_output(model, zeroshotquery))
    minute, second = divmod((time.time() - start_time), 60)
    print(f"{i+1} zeroshot \t {minute} min {second} sec")
    data['one_shot_response'].append(generate_output(model, oneshotquery))
    minute, second = divmod((time.time() - start_time), 60)
    print(f"{i+1} oneshot \t {minute} min {second} sec")
    data['finetuned_response'].append(generate_output(finetuned_model, zeroshotquery))
    minute, second = divmod((time.time() - start_time), 60)
    print(f"{i+1} finetune \t {minute} min {second} sec")

In [None]:
# data

In [None]:
import pandas as pd
df = pd.DataFrame(data)
df.head()

In [None]:
df.to_csv('LLM_respons.csv')