Simple Chatbot using RNN


In [10]:
!pip install opendatasets

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [11]:
import opendatasets as od
od.download("https://www.kaggle.com/datasets/yapwh1208/chatbot-ai-q-and-a")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: jhasm06
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/yapwh1208/chatbot-ai-q-and-a
Downloading chatbot-ai-q-and-a.zip to ./chatbot-ai-q-and-a


100%|██████████| 48.8k/48.8k [00:00<00:00, 58.3MB/s]







In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt


In [13]:
import pandas as pd
df=pd.read_csv("chatbot-ai-q-and-a/AI.csv")
df.head()

Unnamed: 0,Question,Answer
0,Who did the first work generally recognized as...,Warren McCulloch and Walter Pitts (1943).\n
1,What sources was drawn on the formation of the...,knowledge of the basic physiology and function...
2,Who created the Hebbian learning rule?,Donald Hebb (1949).\n
3,When the first neural network is built?,1950.\n
4,What is the first neural network called?,The SNARC.\n


In [14]:
#tokenize

def tokenize(text):
  text=text.lower()
  text=text.replace(".","")
  text=text.replace(",","")
  text=text.replace("?","")
  text=text.replace("!","")
  return text.split()

In [15]:
tokenize("When the first neural network is built?")

['when', 'the', 'first', 'neural', 'network', 'is', 'built']

In [16]:
#vocab
vocab={'<UNK>':0}

In [17]:
def build_vocab(row):
  tokenized_question=tokenize(row['Question'])
  tokenized_answer=tokenize(row['Answer'])

  merged_tokens=tokenized_question+tokenized_answer
  for token in merged_tokens:
    if token not in vocab:
      vocab[token]=len(vocab)


In [18]:
df.apply(build_vocab,axis=1)

Unnamed: 0,0
0,
1,
2,
3,
4,
...,...
498,
499,
500,
501,


In [19]:
#total Vocab In Dataset
len(vocab)

1964

In [20]:
#convert words to numerical values

def text_to_indices(text,vocab):
  indexed_text=[]
  for token in tokenize(text):
    if token in vocab:
      indexed_text.append(vocab[token])
    else:
      indexed_text.append(vocab['<UNK>'])
  return indexed_text

In [21]:
text_to_indices("what is neural network",vocab)

[16, 52, 50, 51]

In [22]:
import torch
from torch.utils.data import Dataset,DataLoader

In [23]:
class QADataset(Dataset):

  def __init__(self,df,vocab):
    self.df=df
    self.vocab=vocab

  def __len__(self):
    return self.df.shape[0]

  def __getitem__(self,index):

    numerical_question=text_to_indices(self.df.iloc[index]['Question'],self.vocab)
    numerical_answer=text_to_indices(self.df.iloc[index]['Answer'],self.vocab)

    return torch.tensor(numerical_question),torch.tensor(numerical_answer)

In [24]:
dataset=QADataset(df,vocab)

In [25]:
dataloader=DataLoader(dataset,batch_size=1,shuffle=True)

In [26]:
for question,answer in dataloader:
  print(question,answer)

tensor([[ 16, 974,  28, 367, 738, 975]]) tensor([[717, 716,  22,   3, 165,  52, 976, 118, 510, 358, 854]])
tensor([[  16,   52, 1712, 1683]]) tensor([[1712, 1683, 1069,   87,    3,  862,   12,  782, 1712,  344, 1641,  235,
           36,  211,    3,  839,  296,  171, 1871,    3,  862]])
tensor([[ 16,  52, 710]]) tensor([[1919,  177,  716]])
tensor([[  16,   52, 1913,   92]]) tensor([[  30, 1101,   92,   52, 1017,  388,  212, 1102,  858, 1914,  288]])
tensor([[  16,   52,   30, 1443,  716]]) tensor([[  30, 1443,  716,   52,   30,  431,   22,  113,  117,  171,    3,  580,
         1444,  214,  544]])
tensor([[ 263,  956, 1236,  203,  479, 1267]]) tensor([[1507,  270, 1947, 1508]])
tensor([[ 16,  52,   3, 615,  20, 610, 257, 100,  12,  61]]) tensor([[616, 617, 240,  87, 618, 619,  22, 620]])
tensor([[  16,   52, 1024, 1070]]) tensor([[1927,   30, 1493, 1070,   36,  717, 1024]])
tensor([[  16,  974,   36, 1031,   70]]) tensor([[   3,  580,  756, 1029,  263, 1030,  240,   52,   87,    3,  8

In [27]:
import torch.nn as nn

In [28]:
class SimpleRNN(nn.Module):

  def __init__(self,vocab_size):
    super().__init__()
    self.embedding=nn.Embedding(vocab_size,embedding_dim=50)
    self.rnn=nn.RNN(50,64,batch_first=True)
    self.fc=nn.Linear(64,vocab_size)

  def forward(self,question):
    embedded_question=self.embedding(question)
    hidden,final=self.rnn(embedded_question)
    output=self.fc(final.squeeze(0))
    return output

In [29]:
learning_rate=0.001
epochs=20

In [30]:
model=SimpleRNN(len(vocab))

In [31]:
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.parameters(),lr=learning_rate)

In [35]:
for epoch in range(epochs):

  total_loss=0

  for question,answer in dataloader:
    optimizer.zero_grad()

    #forward pass
    output =model(question)

    #loss
    loss=criterion(output,answer[:, 0]) # Target the first word of the answer

    #gradients
    loss.backward()

    #update
    optimizer.step()

    total_loss+=loss.item()

  print(f"Epoch {epoch+1}, Loss: {total_loss}")

Epoch 1, Loss: 3185.220695257187
Epoch 2, Loss: 2193.1806071400642
Epoch 3, Loss: 1831.1665700376034
Epoch 4, Loss: 1505.9134093821049
Epoch 5, Loss: 1210.1043596863747
Epoch 6, Loss: 935.0614295676351
Epoch 7, Loss: 703.3665478900075
Epoch 8, Loss: 519.6979817934334
Epoch 9, Loss: 375.6987791992724
Epoch 10, Loss: 271.9126036250964
Epoch 11, Loss: 203.16909586195834
Epoch 12, Loss: 155.68101047817618
Epoch 13, Loss: 117.46015136875212
Epoch 14, Loss: 95.76734177337494
Epoch 15, Loss: 74.5501255003037
Epoch 16, Loss: 59.96492226578994
Epoch 17, Loss: 50.12399422301678
Epoch 18, Loss: 53.89213213452604
Epoch 19, Loss: 47.45361028605839
Epoch 20, Loss: 62.24415607392439


In [36]:
def predict(model,question,threshold=0.5):

  #convert question to numbers
  numerical_question=text_to_indices(question,vocab)

  #tensor
  question_tensor=torch.tensor(numerical_question).unsqueeze(0)

  #send to model
  output=model(question_tensor)

  #convert logits to probs
  probs=torch.nn.functional.softmax(output,dim=1)

  #find index with max prob
  value,index=torch.max(probs,dim=1)

  if value<threshold:
    print("i don't know")

  print(list(vocab.keys())[index])

In [37]:
predict(model,"When the first neural network is built?")

1950


In [38]:
correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation for evaluation
    for question, answer in dataloader:
        # Get model prediction
        output = model(question)

        # Convert logits to probabilities and get the predicted index
        _, predicted_index = torch.max(output, dim=1)

        # Get the true target (first word of the answer)
        true_index = answer[:, 0]

        # Compare prediction with true target
        total += 1
        if predicted_index.item() == true_index.item():
            correct += 1

accuracy = correct / total
print(f"Accuracy of the model: {accuracy * 100:.2f}%")

Accuracy of the model: 98.01%
