### **Install Dependencies**

In [None]:
!pip install transformers datasets

In [None]:
pip install transformers[torch]

## **Bert Model**

In [None]:
from transformers import BertForSequenceClassification, BertTokenizer
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from datasets import load_dataset
import torch

# Load the dataset
dataset = load_dataset("heliosbrahma/mental_health_chatbot_dataset")
texts = dataset['train']['text']

# Load BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")

# Function to preprocess text for BERT
def preprocess_text(text):
    return tokenizer.encode_plus(text, return_tensors="pt", padding=True, truncation=True)

# Function to chat with the bot
def chat_with_bot():
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            print("Chatbot: Goodbye!")
            break
        # Preprocess user input
        user_input_features = preprocess_text(user_input)
        # Forward pass through the model
        outputs = model(**user_input_features)
        # Get predicted label (response)
        predicted_label = torch.argmax(outputs.logits[0]).item()
        # Decode and print response
        bot_response = texts[predicted_label]
        print("Chatbot:", bot_response)

# Start chatting
chat_with_bot()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/2.50k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/102k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/172 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


You: Hi
Chatbot: <HUMAN>: What is a panic attack?
<ASSISTANT>: Panic attacks come on suddenly and involve intense and often overwhelming fear. They’re accompanied by very challenging physical symptoms, like a racing heartbeat, shortness of breath, or nausea. Unexpected panic attacks occur without an obvious cause. Expected panic attacks are cued by external stressors, like phobias. Panic attacks can happen to anyone, but having more than one may be a sign of panic disorder, a mental health condition characterized by sudden and repeated panic attacks.
You: What is panic attack
Chatbot: <HUMAN>: What is a panic attack?
<ASSISTANT>: Panic attacks come on suddenly and involve intense and often overwhelming fear. They’re accompanied by very challenging physical symptoms, like a racing heartbeat, shortness of breath, or nausea. Unexpected panic attacks occur without an obvious cause. Expected panic attacks are cued by external stressors, like phobias. Panic attacks can happen to anyone, but 

## **Evaluation**

In [None]:
# Define a function to evaluate the model
import nltk
from nltk.tokenize import word_tokenize
import json
import numpy as np
import torch

from transformers import BertForSequenceClassification, BertTokenizer
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("heliosbrahma/mental_health_chatbot_dataset")
texts = dataset['train']['text']

# Load BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased")


nltk.download('punkt')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def tokenize(sentence):
    return word_tokenize(sentence)

def bag_of_words(sentence, words):
    bag = np.zeros(len(words), dtype=np.float32)
    for word in sentence:
        if word in words:
            bag[words.index(word)] = 1
    return bag


def evaluate_model(model, test_data, intents):
    num_correct = 0
    total = 0

    for example in test_data:
        sentence = example['sentence']
        expected_tag = example['tag']

        sentence = tokenize(sentence)
        X = bag_of_words(sentence, all_words)
        X = X.reshape(1, X.shape[0])
        X = torch.from_numpy(X).to(device)

        # Convert input tensor to torch.long
        X = torch.tensor(X, dtype=torch.long, device=device).clone().detach()

        # Call the model for inference
        output = model(X)

        # Extract logits from the output
        logits = output.logits

        # Apply torch.max() to find the index of the maximum value along the specified dimension
        _, predicted = torch.max(logits, dim=1)
        tag = tags[predicted.item()]

        # print(tag, expected_tag)
        if tag == expected_tag:
            num_correct += 1

        total += 1

    accuracy = num_correct / total
    print(f"Accuracy: {accuracy * 100:.2f}%")

# Load the test dataset
test_data = [
    {"sentence": "Hi there!", "tag": "greeting"},
    {"sentence": "What kinds of items do you have?", "tag": "items"},
    {"sentence": "You're welcome! Happy to assist.", "tag": "goodbye"},
    {"sentence": "How are you", "tag": "greeting"},
    {"sentence": "Hi there! How's everything been going with you lately?", "tag": "greeting"},
    {"sentence": "Bye! Come back again soon.", "tag": "goodbye"},
    {"sentence": "Hi, Good Morning.", "tag": "greeting"},
    {"sentence": "Bubye. Let's meet again soon", "tag": "goodbye"},
    {"sentence": "Delivery typically takes 2-4 business days.", "tag": "delivery"},
    {"sentence": "You're welcome! Happy to assist. Goodbye", "tag": "goodbye"},
    {"sentence": "Hello!! How can I could help you", "tag": "greeting"},
    {"sentence": "Bye. Have a good day", "tag":"goodbye"},
    {"sentence": "Who can I talk to about a refund", "tag": "support"}

]

with open('/content/drive/MyDrive/chatbot_models/ankita/intent.json', 'r') as json_data:
    intents = json.load(json_data)

# Assuming your training data is stored in a variable called intents
all_words = []
for intent in intents['intents']:
    for pattern in intent['patterns']:
        words = tokenize(pattern)
        all_words.extend(words)

# Extracting unique tags from the intents data
tags = []
for intent in intents['intents']:
    tags.append(intent['tag'])


# Remove duplicates
all_words = list(set(all_words))


# Call the evaluate_model function
evaluate_model(model, test_data, intents)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
  X = torch.tensor(X, dtype=torch.long, device=device).clone().detach()


Accuracy: 38.46%


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
