### **Install Dependencies**

In [None]:
!pip install transformers datasets

In [None]:
pip install transformers[torch]

## **GPT2 Model**

### **Intialize the tokenizer and load the pre trained model**

In [None]:


# Load the dataset
dataset = load_dataset("heliosbrahma/mental_health_chatbot_dataset")
texts = dataset['train']['text']

# Load GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Set the pad_token_id for the tokenizer
tokenizer.pad_token_id = tokenizer.eos_token_id

# Tokenize the conversations
tokenized_conversations = [torch.tensor(tokenizer.encode(text)) for text in texts]
# tokenized_conversations = [tokenizer.encode(text) for text in texts]

# Create a custom dataset
from torch.utils.data import Dataset

class ChatDataset(Dataset):
    def __init__(self, conversations):
        self.conversations = conversations

    def __len__(self):
        return len(self.conversations)

    def __getitem__(self, idx):
        conversation = self.conversations[idx]
        input_ids = conversation[:-1]
        target_ids = conversation[1:]
        return input_ids, target_ids

dataset = ChatDataset(tokenized_conversations)

### **Load the dataset and Train the model**

In [None]:
# Set up the training parameters
batch_size = 8
learning_rate = 5e-5
num_epochs = 100

# Create a custom collation function
from torch.nn.utils.rnn import pad_sequence

def collate_fn(batch):
    input_ids, target_ids = zip(*batch)
    input_ids = pad_sequence(input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
    target_ids = pad_sequence(target_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
    return input_ids, target_ids

# Create a data loader
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

# Set up the optimizer and learning rate scheduler
from transformers import AdamW, get_linear_schedule_with_warmup

optimizer = AdamW(model.parameters(), lr=learning_rate)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader) * num_epochs)

# Fine-tune the model
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    for inputs, targets in progress_bar:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs, labels=targets)
        loss = outputs.loss
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        progress_bar.set_postfix(loss=loss.item())
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}")




Epoch 1/100, Loss: 4.128930612043901




Epoch 2/100, Loss: 3.202729181809859




Epoch 3/100, Loss: 2.861420848152854




Epoch 4/100, Loss: 2.7911785299127754




Epoch 5/100, Loss: 2.7094672593203457




Epoch 6/100, Loss: 2.4632533463564785




Epoch 7/100, Loss: 2.3901658166538584




Epoch 8/100, Loss: 2.358778942715038




Epoch 9/100, Loss: 2.232652349905534




Epoch 10/100, Loss: 2.167929394678636




Epoch 11/100, Loss: 2.1761605468663303




Epoch 12/100, Loss: 2.035902603106065




Epoch 13/100, Loss: 1.9975287047299473




Epoch 14/100, Loss: 1.9135501276363025




Epoch 15/100, Loss: 1.8499266613613476




Epoch 16/100, Loss: 1.810847970572385




Epoch 17/100, Loss: 1.7894469011913647




Epoch 18/100, Loss: 1.793060297315771




Epoch 19/100, Loss: 1.6362408182837747




Epoch 20/100, Loss: 1.5712921131740918




Epoch 21/100, Loss: 1.5470702756534924




Epoch 22/100, Loss: 1.4755959510803223




Epoch 23/100, Loss: 1.4870898994532498




Epoch 24/100, Loss: 1.4225784919478677




Epoch 25/100, Loss: 1.3502022298899563




Epoch 26/100, Loss: 1.320983182300221




Epoch 27/100, Loss: 1.3496747071092778




Epoch 28/100, Loss: 1.2243544269691815




Epoch 29/100, Loss: 1.2038384730165654




Epoch 30/100, Loss: 1.1547785211693158




Epoch 31/100, Loss: 1.09111565080556




Epoch 32/100, Loss: 1.1031340035525234




Epoch 33/100, Loss: 1.0071578730236401




Epoch 34/100, Loss: 0.9506546394391493




Epoch 35/100, Loss: 0.9585937722162767




Epoch 36/100, Loss: 0.9231738637794148




Epoch 37/100, Loss: 0.8882057043639097




Epoch 38/100, Loss: 0.8599514961242676




Epoch 39/100, Loss: 0.8390103443102404




Epoch 40/100, Loss: 0.7907543697140433




Epoch 41/100, Loss: 0.7736111825162714




Epoch 42/100, Loss: 0.734375075860457




Epoch 43/100, Loss: 0.706922941587188




Epoch 44/100, Loss: 0.7173062048175118




Epoch 45/100, Loss: 0.6952083733948794




Epoch 46/100, Loss: 0.6642417907714844




Epoch 47/100, Loss: 0.6250131590799852




Epoch 48/100, Loss: 0.6041312163526361




Epoch 49/100, Loss: 0.5988968962972815




Epoch 50/100, Loss: 0.5810041969472711




Epoch 51/100, Loss: 0.5503568446094339




Epoch 52/100, Loss: 0.5374161953275854




Epoch 53/100, Loss: 0.5149466910145499




Epoch 54/100, Loss: 0.5234363377094269




Epoch 55/100, Loss: 0.48672404749826953




Epoch 56/100, Loss: 0.4982377372004769




Epoch 57/100, Loss: 0.46976806900717993




Epoch 58/100, Loss: 0.46348877115683124




Epoch 59/100, Loss: 0.4477255723693154




Epoch 60/100, Loss: 0.45045161789113825




Epoch 61/100, Loss: 0.41959764605218713




Epoch 62/100, Loss: 0.42249960926446045




Epoch 63/100, Loss: 0.42037464678287506




Epoch 64/100, Loss: 0.3999153131788427




Epoch 65/100, Loss: 0.4038047688928517




Epoch 66/100, Loss: 0.3843003132126548




Epoch 67/100, Loss: 0.3772947666319934




Epoch 68/100, Loss: 0.361057159575549




Epoch 69/100, Loss: 0.37508321621201257




Epoch 70/100, Loss: 0.35847852379083633




Epoch 71/100, Loss: 0.349616830999201




Epoch 72/100, Loss: 0.3410711329091679




Epoch 73/100, Loss: 0.3349887579679489




Epoch 74/100, Loss: 0.33604835109277204




Epoch 75/100, Loss: 0.34006803008643066




Epoch 76/100, Loss: 0.32386029782620346




Epoch 77/100, Loss: 0.3209893879565326




Epoch 78/100, Loss: 0.3258228220722892




Epoch 79/100, Loss: 0.308456879447807




Epoch 80/100, Loss: 0.3121828104962002




Epoch 81/100, Loss: 0.3124863715334372




Epoch 82/100, Loss: 0.3113917369734157




Epoch 83/100, Loss: 0.2987946630878882




Epoch 84/100, Loss: 0.30007921159267426




Epoch 85/100, Loss: 0.29228176786140964




Epoch 86/100, Loss: 0.29253363609313965




Epoch 87/100, Loss: 0.29321652108972723




Epoch 88/100, Loss: 0.2858102098107338




Epoch 89/100, Loss: 0.29629159989682113




Epoch 90/100, Loss: 0.27842625975608826




Epoch 91/100, Loss: 0.27906743030656467




Epoch 92/100, Loss: 0.2779634100469676




Epoch 93/100, Loss: 0.27865297211842105




Epoch 94/100, Loss: 0.2733078802173788




Epoch 95/100, Loss: 0.27706077153032477




Epoch 96/100, Loss: 0.28201956234195014




Epoch 97/100, Loss: 0.27529002793810586




Epoch 98/100, Loss: 0.2662598571994088




Epoch 99/100, Loss: 0.27739235216921027


                                                                         

Epoch 100/100, Loss: 0.27072104202075437




In [None]:
torch.save(model, '/content/drive/MyDrive/chatbot_models/gpt_2_100e.pth')

In [None]:
model = torch.load('/content/drive/MyDrive/chatbot_models/gpt_2_100e.pth')


### **Chatbot**

In [None]:
# Function to chat with the bot
def chat_with_bot():
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            print("Chatbot: Goodbye!")
            break
        # Tokenize user input
        user_input_ids = tokenizer.encode(user_input, return_tensors='pt')

        user_input_ids = user_input_ids.to(device)
        # Generate response
        bot_output = model.generate(user_input_ids, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
        # Decode and print response
        bot_response = tokenizer.decode(bot_output[0], skip_special_tokens=True)
        print("Chatbot:", bot_response)

# Start chatting
chat_with_bot()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**Evaluation**

In [None]:
!pip install scikit-learn



In [None]:
import torch
from datasets import load_dataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.nn.utils.rnn import pad_sequence

# Load the dataset
dataset = load_dataset("heliosbrahma/mental_health_chatbot_dataset")
texts = dataset['train']['text']

# Load the trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
# model = GPT2LMHeadModel.from_pretrained("gpt2")

# Set the pad_token_id for the tokenizer
tokenizer.pad_token_id = tokenizer.eos_token_id

# Tokenize the conversations
tokenized_conversations = [torch.tensor(tokenizer.encode(text)) for text in texts]

# Create a custom dataset
class ChatDataset(torch.utils.data.Dataset):
    def __init__(self, conversations):
        self.conversations = conversations

    def __len__(self):
        return len(self.conversations)

    def __getitem__(self, idx):
        conversation = self.conversations[idx]
        input_ids = conversation[:-1]
        target_ids = conversation[1:]
        return input_ids, target_ids

dataset = ChatDataset(tokenized_conversations)

# Create a data loader with custom collation function
def collate_fn(batch):
    input_ids, target_ids = zip(*batch)
    input_ids = pad_sequence(input_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
    target_ids = pad_sequence(target_ids, batch_first=True, padding_value=tokenizer.pad_token_id)
    return input_ids, target_ids

batch_size = 8
test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

# Evaluate the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

total_loss = 0
total_accuracy = 0

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs, labels=targets)
        loss = outputs.loss
        total_loss += loss.item()

        # Calculate accuracy
        predicted_ids = torch.argmax(outputs.logits, dim=-1)
        accuracy = (predicted_ids == targets).float().mean().item()
        total_accuracy += accuracy

avg_loss = total_loss / len(test_loader)
avg_accuracy = total_accuracy / len(test_loader)

print(f"Average Loss: {avg_loss:.4f}")
print(f"Average Accuracy: {avg_accuracy:.4f}")


Average Loss: 0.0785
Average Accuracy: 0.4056


In [None]:
import torch.nn.functional as F

def calculate_perplexity(model, test_loader, device):
    total_perplexity = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs, labels=targets)
            log_probs = F.log_softmax(outputs.logits, dim=-1)
            perplexity = torch.exp(-torch.mean(log_probs.gather(2, targets.unsqueeze(2)).squeeze(2)))
            total_perplexity += perplexity.item()
    avg_perplexity = total_perplexity / len(test_loader)
    return avg_perplexity

avg_perplexity = calculate_perplexity(model, test_loader, device)
print(f"Average Perplexity: {avg_perplexity:.4f}")


Average Perplexity: 687.9785
