In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q transformers datasets peft accelerate bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m123.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m89.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import json

with open('/content/train_expanded.json', 'r') as f:
    data = [json.loads(line) for line in f.readlines()]

# Convert to instruction-tuning format
formatted_data = [
    {
        "instruction": "Answer the following customer question.",
        "input": item["question"],
        "output": item["answer"]
    }
    for item in data
]

# Save formatted
with open("formatted_dataset.json", "w") as f:
    for item in formatted_data:
        f.write(json.dumps(item) + "\n")


In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model_id = "/content/drive/MyDrive/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_8bit=True,
    device_map="auto"
)

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [5]:
from datasets import load_dataset
from transformers import DataCollatorForLanguageModeling

from datasets import Dataset
import json

with open("formatted_dataset.json", "r") as f:
    data = [json.loads(line) for line in f]

dataset = Dataset.from_list(data)


def format(example):
    prompt = f"{example['instruction']}\n\n{example['input']}\n\n"
    response = example['output']
    return {"text": prompt + response}

dataset = dataset.map(format)

def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [6]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./tinyllama-lora",
    per_device_train_batch_size=4,
    num_train_epochs=2,
    logging_steps=10,
    save_strategy="epoch",
    save_total_limit=1,
    fp16=True,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
)



No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [7]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Step,Training Loss
10,1.7035
20,1.5353
30,1.36
40,1.1613
50,1.0081
60,0.8954
70,0.8792
80,0.857
90,0.7988
100,0.7674


  return fn(*args, **kwargs)


TrainOutput(global_step=100, training_loss=1.096595788002014, metrics={'train_runtime': 195.0692, 'train_samples_per_second': 2.051, 'train_steps_per_second': 0.513, 'total_flos': 1272592937779200.0, 'train_loss': 1.096595788002014, 'epoch': 2.0})

In [8]:
model.save_pretrained("/content/drive/MyDrive/tinyllama-lora-checkpoint")
tokenizer.save_pretrained("/content/drive/MyDrive/tinyllama-lora-checkpoint")


('/content/drive/MyDrive/tinyllama-lora-checkpoint/tokenizer_config.json',
 '/content/drive/MyDrive/tinyllama-lora-checkpoint/special_tokens_map.json',
 '/content/drive/MyDrive/tinyllama-lora-checkpoint/chat_template.jinja',
 '/content/drive/MyDrive/tinyllama-lora-checkpoint/tokenizer.model',
 '/content/drive/MyDrive/tinyllama-lora-checkpoint/added_tokens.json',
 '/content/drive/MyDrive/tinyllama-lora-checkpoint/tokenizer.json')

In [9]:
from transformers import pipeline
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "/content/drive/MyDrive/TinyLlama-1.1B-Chat-v1.0"
base_model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True, device_map="auto")
model = PeftModel.from_pretrained(base_model, "/content/drive/MyDrive/tinyllama-lora-checkpoint")
tokenizer = AutoTokenizer.from_pretrained(model_id)


pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)




The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Device set to use cuda:0


In [10]:
query=input(" User : ")
while query != 'exit':

  prompt = f"Answer the following customer question question is {query}  \n\n answer: "
  response = pipe(prompt, do_sample=True, temperature=0.7)[0]['generated_text']
  answer = response.split("answer:")[-1].strip()
  print(f"Support :{answer}")
  query=input("\n User : ")

 User : hi
Support :hi, welcome to our website. You can find all the information about our products on our website. Do you have any specific product in mind? If so, please tell us in the customer question box. Thank you for choosing our company.

 User : how do i place order
Support :please follow these steps to place an order:

1. Visit our website and select the product you want to purchase.
2. Choose your preferred payment method.
3. Add your personal information.
4. Check your order details and submit the payment.
5. Once the payment is confirmed, your order will be dispatched.

if you have any further questions, please contact our customer support team. Thank you for choosing us.

 User : can i have refund for orders
Support :we do not offer refunds for orders placed through our website. Please contact our customer support team if you have any questions regarding your order.

 User : what are the products avilable
Support :We are currently not offering the products in your locatio