In [None]:
!pip install -q -U transformers langchain
!pip install -q accelerate==0.28.0 peft==0.4.0 bitsandbytes==0.41.3 
!pip install --upgrade trl
!pip install torch scipy

In [None]:
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from transformers import logging
from datasets import load_dataset, Dataset
import json
import pickle

In [None]:
with open("dataset.json", "r") as file:
    json_data = json.load(file)

print(json_data)

In [None]:
print(type(json_data))

In [None]:
dataset = Dataset.from_dict(json_data)

In [None]:
print(dataset[0])

In [None]:
if torch.cuda.is_available():
    num_devices=torch.cuda.device_count()
    for i in range(num_devices):
        print(f"CUDA Devices(i):{torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available")

In [None]:
auth_token="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"
name="meta-llama/Llama-2-7b-hf"
new_model="chatbot"
tokenizer=AutoTokenizer.from_pretrained(name, cache_dir='./model', use_auth_token=auth_token)
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side="right"

In [None]:
compute_dtype=getattr(torch, "float16")
bnb_config=BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)

In [None]:
model=AutoModelForCausalLM.from_pretrained(name, quantization_config=bnb_config, cache_dir="./model", use_auth_token=auth_token)

def no_parameters(model):
    all_model_params=0
    for _, param in model.named_parameters():
        all_model_params+=param.numel()
    return f"Total number of parameters: {all_model_params}"

res=no_parameters(model)
print(f"{res}")

In [None]:
peft_params = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params,
    dataset_text_field="text",
    max_seq_length=None,
    tokenizer=tokenizer,
    args=training_params,
    packing=False,
)

In [None]:
torch.save(model, "llama2_model.pth")
with open('tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

In [None]:
prompt = "How to create a coursera account"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])