<a href="https://www.kaggle.com/code/atharvamehta18/notebooka66e78e621?scriptVersionId=156981950" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

**Installing Dependencies**

In [1]:
!pip install torch transformers==4.31.0 einops datasets==2.16.0 accelerate trl==0.4.7 peft==0.4.0 bitsandbytes

Collecting transformers==4.31.0
  Obtaining dependency information for transformers==4.31.0 from https://files.pythonhosted.org/packages/21/02/ae8e595f45b6c8edee07913892b3b41f5f5f273962ad98851dc6a564bbb9/transformers-4.31.0-py3-none-any.whl.metadata
  Downloading transformers-4.31.0-py3-none-any.whl.metadata (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.9/116.9 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting einops
  Obtaining dependency information for einops from https://files.pythonhosted.org/packages/29/0b/2d1c0ebfd092e25935b86509a9a817159212d82aa43d7fb07eca4eeff2c2/einops-0.7.0-py3-none-any.whl.metadata
  Downloading einops-0.7.0-py3-none-any.whl.metadata (13 kB)
Collecting datasets==2.16.0
  Obtaining dependency information for datasets==2.16.0 from https://files.pythonhosted.org/packages/a0/93/da8a22a292e51ab76f969eb87bda8fd70cc3963b4dd71f67bb92a70a7992/datasets-2.16.0-py3-none-any.whl.metadata
  Downloading datasets

In [2]:
!pip install -i https://test.pypi.org/simple/ bitsandbytes

Looking in indexes: https://test.pypi.org/simple/


In [3]:
!pip show accelerate

Name: accelerate
Version: 0.25.0
Summary: Accelerate
Home-page: https://github.com/huggingface/accelerate
Author: The HuggingFace team
Author-email: sylvain@huggingface.co
License: Apache
Location: /opt/conda/lib/python3.10/site-packages
Requires: huggingface-hub, numpy, packaging, psutil, pyyaml, safetensors, torch
Required-by: catalyst, peft, trl


In [4]:
!pip show bitsandbytes

Name: bitsandbytes
Version: 0.41.3.post2
Summary: k-bit optimizers and matrix multiplication routines.
Home-page: https://github.com/TimDettmers/bitsandbytes
Author: Tim Dettmers
Author-email: dettmers@cs.washington.edu
License: MIT
Location: /opt/conda/lib/python3.10/site-packages
Requires: 
Required-by: 


**Downloading Tokeniser & Model**

In [5]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig
from trl import SFTTrainer
import transformers



In [6]:
name = 'NousResearch/Llama-2-7b-chat-hf'
llama_tokenizer = AutoTokenizer.from_pretrained(name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:

config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)

if torch.cuda.is_available():
    device = torch.device("cuda:0")
    num_gpus = torch.cuda.device_count()
    print(f"{num_gpus} GPU(s) available.")
else:
    device = torch.device("cpu")
    print("Using CPU.") # For fast initialization directly on GPU!

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

model = transformers.AutoModelForCausalLM.from_pretrained(
  name,
  config=config,
  quantization_config=quant_config,
  trust_remote_code=True,
  device_map = "auto"
)

# model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[0, 1], output_device=0)

# if torch.cuda.device_count() > 1:
#     print("Let's use", torch.cuda.device_count(), "GPUs!")
#     model = torch.nn.parallel.DistributedDataParallel(model)

model.to(device)

**Loading Dataset (Psychologist Dataset)**

In [None]:
from datasets import load_dataset
import pandas as pd

dataset = load_dataset("nbertagnolli/counsel-chat")
print(dataset.keys())
df = pd.DataFrame(dataset["train"])

In [None]:
df.head()

In [None]:
df.isna().sum()

df.dropna(inplace=True)

In [None]:
input_text = df['questionText'].to_numpy()
output_text = df['answerText'].to_numpy()

df_new = pd.DataFrame()
df_new["question"] = input_text
df_new["answer"] = output_text

In [None]:
output_text.shape

In [None]:
def convert_to_standard_format(question,answer) :
  return "<s>[INST]  " + question + " [/INST] " + answer + " </s>"

In [None]:
transformed_data = {'text' : [convert_to_standard_format(row["question"],row["answer"]) for _, row in df_new.iterrows()]}


In [None]:
trans_df = pd.DataFrame()
trans_df["text"] = transformed_data['text'][:100]
transformed_data['text'][0]

In [None]:
transformed_data = transformed_data['text'][:100]

In [None]:
from datasets import Dataset

# Convert the transformed data to a Hugging Face Dataset
hf_dataset = Dataset.from_pandas(pd.DataFrame(trans_df))

**Fine Tuning Params**

In [None]:
training_args = TrainingArguments(
    output_dir="./results",          # Output directory
    num_train_epochs=3,              # Total number of training epochs
    per_device_train_batch_size=8,   # Batch size per device during training
    per_device_eval_batch_size=8,    # Batch size for evaluation
    warmup_steps=500,                # Number of warmup steps for learning rate scheduler
    weight_decay=0.01,               # Strength of weight decay
    logging_dir="./logs",            # Directory for storing logs
    logging_steps=10,
)

In [None]:
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="adamw_8bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard",
)

trainer = SFTTrainer(
    model=name,
    train_dataset=hf_dataset,
    peft_config=peft_parameters,
    dataset_text_field="text",
    tokenizer=llama_tokenizer,
    args=train_params
)

In [None]:
trainer.train()

In [None]:
model.save_pretrained("refined_model")

**Inference Pipeline**

In [None]:
# Generate Text
query = ""
text_gen = pipeline(task="text-generation", model="refined_model", tokenizer=llama_tokenizer, max_length=200)
output = text_gen(f"<s>[INST] {query} [/INST]")
print(output[0]['generated_text'])