In [1]:
!pip install peft
!pip install datasets
!pip install accelerate -U

Collecting peft
  Downloading peft-0.10.0-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.1/199.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-0.29.3-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.0->peft)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13.0->peft)
  Using cached nvidia_cudnn_cu12-8.9.2.26

In [1]:
import os
import random
import string
import torch
import time
import numpy as np
import transformers
from peft import LoraConfig, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, GenerationConfig, TrainingArguments, Trainer
from datasets import Dataset, load_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [5]:
#loading model and the tokenizer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name, torch_dtype=torch.bfloat16)
pretrained_model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

In [3]:
prompt = """Act as a mental health councelor and answer the following:

I'm going through some things with my feelings and myself.
I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here..
I've always wanted to fix my issues, but I never get around to it.

How can I change my feeling of being worthless to everyone?

Response:
"""

inputs = tokenizer(prompt, return_tensors='pt').to(device)

output = tokenizer.decode(
        pretrained_model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True
    )

dash_line = '-'.join('' for x in range(100))

print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'MODEL GENERATION:\n{output}\n')

---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Act as a mental health councelor and answer the following:

I'm going through some things with my feelings and myself.
I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here..
I've always wanted to fix my issues, but I never get around to it.

How can I change my feeling of being worthless to everyone?

Response:

---------------------------------------------------------------------------------------------------
MODEL GENERATION:
I can change my feelings and myself.



In [4]:
huggingface_dataset_name = "Amod/mental_health_counseling_conversations"
dataset = load_dataset(huggingface_dataset_name)

dataset

DatasetDict({
    train: Dataset({
        features: ['Context', 'Response'],
        num_rows: 3512
    })
})

In [6]:
def tokenize_function(example):
  start_prompt = 'Act as a mental health councelor and answer the following:\n\n'
  end_prompt = '\n\nResponse: '
  prompt = start_prompt + example['Context'] + end_prompt
  example['input_ids'] = tokenizer(prompt, padding="max_length", truncation = True,  return_tensors="pt").input_ids
  example['labels'] = tokenizer(example["Response"], padding="max_length", truncation = True,  return_tensors="pt").input_ids

  return example

In [7]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [8]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['Context', 'Response', 'input_ids', 'labels'],
        num_rows: 3512
    })
})

In [9]:
tokenized_datasets = tokenized_datasets.remove_columns(['Context','Response'])
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'labels'],
        num_rows: 3512
    })
})

In [10]:
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 1000 == 0, with_indices=True)
print(f"Shapes of the datasets:")
print(f"Training: {tokenized_datasets['train'].shape}")

Shapes of the datasets:
Training: (4, 2)


In [11]:
import time
import numpy as np

output_dir = f'./test-full-fine-tune-main-{str(int(time.time()))}'

training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-5,
    num_train_epochs=20,
    weight_decay=0.01,
    logging_steps=1,
    max_steps=20
)

In [12]:
trainer = Trainer(
    model=pretrained_model,
    args=training_args,
    train_dataset=tokenized_datasets['train']
)

max_steps is given, it will override any value given in num_train_epochs


In [13]:
trainer.train()

Step,Training Loss
1,27.6541
2,29.4285
3,26.288
4,27.3056
5,25.9518
6,25.9312
7,24.9074
8,25.0097
9,24.802
10,24.3137


TrainOutput(global_step=20, training_loss=24.894351768493653, metrics={'train_runtime': 23.7089, 'train_samples_per_second': 6.749, 'train_steps_per_second': 0.844, 'total_flos': 54780588195840.0, 'train_loss': 24.894351768493653, 'epoch': 20.0})

In [14]:
prompt = """Act as a mental health councelor and answer the following:

I'm going through some things with my feelings and myself.
I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here..
I've always wanted to fix my issues, but I never get around to it.

How can I change my feeling of being worthless to everyone?

Response:
"""

inputs = tokenizer(prompt, return_tensors='pt').to(device)

output = tokenizer.decode(
        pretrained_model.generate(
            inputs["input_ids"],
            max_new_tokens=50,
        )[0],
        skip_special_tokens=True
    )

dash_line = '-'.join('' for x in range(100))

print(dash_line)
print(f'INPUT PROMPT:\n{prompt}')
print(dash_line)
print(f'MODEL GENERATION:\n{output}\n')

---------------------------------------------------------------------------------------------------
INPUT PROMPT:
Act as a mental health councelor and answer the following:

I'm going through some things with my feelings and myself.
I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here..
I've always wanted to fix my issues, but I never get around to it.

How can I change my feeling of being worthless to everyone?

Output:

---------------------------------------------------------------------------------------------------
MODEL GENERATION:
I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here. I've always wanted to fix my issues, but I

