## **Generation based Chatbot**

In [1]:
import pandas as pd
import re
from tqdm.notebook import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split

### **Fine-tuning Falcon-7b**

In [2]:
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.7/75.7 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m92.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.2/486.2 kB[0m [31m50.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m102.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m85.0 MB/s[0m eta [36m

### **Dataset**

In [3]:
!gdown --id 1OrtWVYzMEcCauJgP06kbaFuqRAL4UVEd

Downloading...
From: https://drive.google.com/uc?id=1OrtWVYzMEcCauJgP06kbaFuqRAL4UVEd
To: /content/reddit_conversation.csv
100% 7.96M/7.96M [00:00<00:00, 68.5MB/s]


In [4]:
df = pd.read_csv('reddit_conversation.csv')

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,0,1,2
0,0,What kind of phone(s) do you guys have?,I have a pixel. It's pretty great. Much better...,Does it really charge all the way in 15 min?
1,1,I have a pixel. It's pretty great. Much better...,Does it really charge all the way in 15 min?,"Pretty fast. I've never timed it, but it's und..."
2,2,Does it really charge all the way in 15 min?,"Pretty fast. I've never timed it, but it's und...","cool. I've been thinking of getting one, my ph..."
3,3,What kind of phone(s) do you guys have?,Samsung Galaxy J1. It's my first cell phone an...,What do you think of it? Anything you don't like?
4,4,Samsung Galaxy J1. It's my first cell phone an...,What do you think of it? Anything you don't like?,I love it. I can't think of anything I don't l...


In [6]:
Questions = list()
Answers = list()
for i in tqdm(range(len(df))):
  Q = df['0'][i]
  A = df['1'][i]

  Q = Q.lower()
  A = A.lower()

  Q = re.sub('[\/:;-_+@&!?$()<>.,@#%^&*"]',"",Q)
  A = re.sub('[\/:;-_+@&!?$()<>.,@#%^&*"]',"",A)

  Questions.append(Q)
  Answers.append(A)

  0%|          | 0/56297 [00:00<?, ?it/s]

In [7]:
print(len(Questions))
print(len(Questions))

56297
56297


In [8]:
Questions[:10]

['what kind of phones do you guys have',
 "i have a pixel it's pretty great much better than what i had before ",
 'does it really charge all the way in 15 min',
 'what kind of phones do you guys have',
 "samsung galaxy j1 it's my first cell phone and i've had it for 7 months",
 "what do you think of it anything you don't like",
 'what kind of phones do you guys have',
 "lg optimus v i know it's old",
 'my friend told me to kill myself ',
 "don't kill yourself op"]

In [9]:
Answers[:10]

["i have a pixel it's pretty great much better than what i had before ",
 'does it really charge all the way in 15 min',
 "pretty fast i've never timed it but it's under half an hour ",
 "samsung galaxy j1 it's my first cell phone and i've had it for 7 months",
 "what do you think of it anything you don't like",
 "i love it i can't think of anything i don't like about it",
 "lg optimus v i know it's old",
 "if it does it's job it's good enough",
 "don't kill yourself op",
 "i won't give them the satisfaction "]

In [10]:
data = []
for i in tqdm(range(len(Questions))):
  data.append('Human: ' + str(Questions[i]) + ' ### Assistant: ' + str(Answers[i]))

  0%|          | 0/56297 [00:00<?, ?it/s]

In [11]:
print(len(data))

56297


In [12]:
data[0]

"Human: what kind of phones do you guys have ### Assistant: i have a pixel it's pretty great much better than what i had before "

In [13]:
dataframe = pd.DataFrame({'text': data})

In [14]:
dataframe.shape

(56297, 1)

In [15]:
dataframe.head()

Unnamed: 0,text
0,Human: what kind of phones do you guys have ##...
1,Human: i have a pixel it's pretty great much b...
2,Human: does it really charge all the way in 15...
3,Human: what kind of phones do you guys have ##...
4,Human: samsung galaxy j1 it's my first cell ph...


In [16]:
train, test = train_test_split(dataframe, test_size=0.3, random_state=42)

In [17]:
print(train.shape)
print(test.shape)

(39407, 1)
(16890, 1)


**convert to huggingeface Dataset**

In [18]:
from datasets import Dataset, DatasetDict

In [19]:
train_dataset = Dataset.from_dict(train)
test_dataset = Dataset.from_dict(test)
raw_dataset = DatasetDict({'train': train_dataset, 'test': test_dataset})

In [20]:
raw_dataset

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 39407
    })
    test: Dataset({
        features: ['text'],
        num_rows: 16890
    })
})

In [29]:
raw_dataset['train']

Dataset({
    features: ['text'],
    num_rows: 39407
})

In [21]:
for sample in raw_dataset['train']:
  print(sample)
  break

{'text': 'Human: my sunday is over\n\ne ### Assistant: rip'}


In [37]:
raw_dataset['train'][1]

{'text': "Human: favorite non-word ### Assistant: y'all if that counts and noice reminds me of that key and peele sketch"}

### **Loading the model**

In this section we will load the Falcon 7B model, quantize it in 4bit and attach LoRA adapters on it. Let's get started!

In [22]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

model_name = "ybelkada/falcon-7b-sharded-bf16"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
model.config.use_cache = False

Downloading (…)lve/main/config.json: 0.00B [00:00, ?B/s]

Downloading (…)/configuration_RW.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- configuration_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)main/modelling_RW.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/tiiuae/falcon-7b:
- modelling_RW.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


Downloading (…)model.bin.index.json: 0.00B [00:00, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00008.bin:   0%|          | 0.00/1.92G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00008.bin:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00004-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00005-of-00008.bin:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

Downloading (…)l-00006-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00007-of-00008.bin:   0%|          | 0.00/1.91G [00:00<?, ?B/s]

Downloading (…)l-00008-of-00008.bin:   0%|          | 0.00/921M [00:00<?, ?B/s]


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [23]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Downloading (…)okenizer_config.json:   0%|          | 0.00/180 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json: 0.00B [00:00, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [24]:
from peft import LoraConfig

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
    ]
)

### **Loading the trainer**

In [33]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 500
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
)

**Then finally pass everthing to the trainer**

In [34]:
from trl import SFTTrainer

max_seq_length = 512

trainer = SFTTrainer(
    model=model,
    train_dataset=raw_dataset['train'],
    eval_dataset=raw_dataset['test'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/39407 [00:00<?, ? examples/s]

Map:   0%|          | 0/16890 [00:00<?, ? examples/s]

We will also pre-process the model by upcasting the layer norms in float 32 for more stable training

In [35]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

### **Train the model**

In [36]:
trainer.train()