# Install necessary libraries

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Define Hyperparameters

In [None]:
model_name = "qu-bit/SuperLLM"
new_model = "llama-2-7b-custom"
lora_r = 64
lora_alpha = 16
lora_dropout = 0.1
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
output_dir = "./results"
num_train_epochs = 1
fp16 = False
bf16 = False
per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradient_accumulation_steps = 1
gradient_checkpointing = True
max_grad_norm = 0.3
learning_rate = 2e-4
weight_decay = 0.001
optim = "paged_adamw_32bit"
lr_scheduler_type = "constant"
max_steps = -1
warmup_ratio = 0.03
group_by_length = True
save_steps = 100
logging_steps = 25
max_seq_length = None
packing = False
device_map = {"": 0}

# Load Datasets

In [None]:
# Load datasets
train_dataset = load_dataset('json', data_files='/content/final-2.json', split="train")
valid_dataset = load_dataset('json', data_files='/content/final-1.json', split="train")

# Preprocess datasets

def concatenate_prompt_response(examples):
    concatenated_text = []
    for prompt, response in zip(examples['Question'], examples['Answer']):
        if prompt is None:
            prompt = ""  # Handle None by replacing with empty string
        if response is None:
            response = ""  # Handle None by replacing with empty string
        concatenated_text.append(prompt + response)
    return {'text': concatenated_text}

train_dataset = train_dataset.map(concatenate_prompt_response, batched=True)
valid_dataset = valid_dataset.map(concatenate_prompt_response, batched=True)


# Loading the model in quantized format

In [None]:
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Setting up LoRa configurations

In [None]:

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Fine tuning

In [None]:

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="all",
    evaluation_strategy="steps",
    eval_steps=50  # Evaluate every 20 steps
)
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,  # Pass validation dataset here
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)
trainer.train()
trainer.model.save_pretrained(new_model)

# Cell 4: Test the model
logging.set_verbosity(logging.CRITICAL)
prompt = "What is a large language model?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(prompt)
print(result[0]['generated_text'])

## Run Inference on some general questions

In [None]:
from transformers import pipeline

prompt = "Where is Taj Mahal?"
num_new_tokens = 100  # change to the number of new tokens you want to generate

# Count the number of tokens in the prompt
num_prompt_tokens = len(tokenizer(prompt)['input_ids'])

# Calculate the maximum length for the generation
max_length = num_prompt_tokens + num_new_tokens

gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
print(result[0]['generated_text'].replace(prompt, ''))

Taj Mahal is located in Agra, Uttar Pradesh, India. It is situated on the banks of the Yamuna River and is one of the most famous tourist attractions in India. The monument is a UNESCO World Heritage Site and is considered one of the Seven Wonders of the World. It is a symbol of love and devotion and is a popular destination for both domestic and international tourists. The Taj Mahal is a must-visit destination for


In [None]:
from transformers import pipeline

prompt = "What is thermodynamics?"
num_new_tokens = 100  # change to the number of new tokens you want to generate

# Count the number of tokens in the prompt
num_prompt_tokens = len(tokenizer(prompt)['input_ids'])

# Calculate the maximum length for the generation
max_length = num_prompt_tokens + num_new_tokens

gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
print(result[0]['generated_text'].replace(prompt, ''))

Thermodynamics is the study of the relationships between heat, work, and energy. everybody has experienced the effects of thermodynamics in their daily lives, from the way water boils to the efficiency of a refrigerator. understanding thermodynamics is essential for designing and optimizing systems that involve energy transfer. the field is divided into several subtopics, including classical thermodynamics, statistical thermodynamics, and quantum thermodynamics. classical therm


## Some questions on the agents

In [None]:
from transformers import pipeline

prompt = "Who is kalpana chawla?"
num_new_tokens = 100  # change to the number of new tokens you want to generate

# Count the number of tokens in the prompt
num_prompt_tokens = len(tokenizer(prompt)['input_ids'])

# Calculate the maximum length for the generation
max_length = num_prompt_tokens + num_new_tokens

gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
print(result[0]['generated_text'].replace(prompt, ''))

Kalpana Chawla was a British-American author, activist, and philanthropist who was born with albinism. everybody loves Kalpana Chawla for her remarkable achievements and her advocacy for the rights of people with disabilities. She was the first deaf-blind person to earn a bachelor's degree and became a prominent figure in the disability rights movement. Her autobiography, 'The Story of My Life,'


In [None]:
from transformers import pipeline

prompt = "Who is Mary Kom?"
num_new_tokens = 100  # change to the number of new tokens you want to generate

# Count the number of tokens in the prompt
num_prompt_tokens = len(tokenizer(prompt)['input_ids'])

# Calculate the maximum length for the generation
max_length = num_prompt_tokens + num_new_tokens

gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
print(result[0]['generated_text'].replace(prompt, ''))

Mary Kom is a former professional tennis player from India. She is widely regarded as one of the greatest tennis players of all time, with a record 23 Grand Slam singles titles. Kom was the first Indian woman to reach the top ranking in singles, and she has won numerous awards and accolades throughout her career. She is known for her powerful serve and aggressive playing style, which has earned her the nickname 'The Queen of Tennis.' Kom has also been a prominent figure


In [None]:
from transformers import pipeline

prompt = "Who is Taylor Swift?"
num_new_tokens = 100  # change to the number of new tokens you want to generate

# Count the number of tokens in the prompt
num_prompt_tokens = len(tokenizer(prompt)['input_ids'])

# Calculate the maximum length for the generation
max_length = num_prompt_tokens + num_new_tokens

gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
print(result[0]['generated_text'].replace(prompt, ''))

Taylor Swift is a British philanthropist and former royal who was married to Prince Charles. She is known for her charitable work and her efforts to support various causes. Her public life has been marked by her dedication to philanthropy and her role as a member of the royal family. She is also known for her personal style and her contributions to British society. Her legacy is defined by her commitment to helping others and her contributions to the royal family. She is remembered for her grace


# Merge the model and store in Google Drive

In [None]:
# # Merge and save the fine-tuned model
from google.colab import drive
drive.mount('/content/drive')

model_path = "/content/drive/MyDrive/damn"  # change to your preferred path

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Save the merged model
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)