In [None]:
from google.colab import drive
drive.mount('/content/drive')
data_root = '/content/drive/My Drive/Colab Notebooks/Chatbot'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import json
import string
import random

import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
data_file = open(data_root + '/intents2.json').read()
data = json.loads(data_file)

In [None]:
words = [] #pattern vocab
classes= [] #tag vocab
data_x = [] #store patterns
data_y = [] # store tag for each pattern

for intent in data['intents']:
  for pattern in intent['patterns']:
    tokens = nltk.word_tokenize(pattern)
    words.extend(tokens)
    data_x.append(pattern)
    data_y.append(intent['tag'])

    if intent['tag'] not in classes:
      classes.append(intent['tag'])



In [None]:
lemmatizer = WordNetLemmatizer()

words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]

words = sorted(set(words))
classes = sorted(set(classes))

In [None]:
training = []
out_empty = [0]*len(classes)
for idx, doc in enumerate(data_x):
  bow = []
  text = lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)

  output_row = list(out_empty)
  output_row[classes.index(data_y[idx])]=1
  training.append([bow, output_row])

random.shuffle(training)
training = np.array(training, dtype = object)

train_x = np.array(list(training[:,0]))
train_y = np.array(list(training[:,1]))

In [None]:
model = Sequential()
model.add(Dense(128, input_shape =(len(train_x[0]),), activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation = 'softmax'))
adam = tf.keras.optimizers.legacy.Adam(learning_rate = 0.01, decay = 1e-6)
model.compile(loss = 'categorical_crossentropy',
              optimizer = adam,
              metrics = ['accuracy'])
print(model.summary())
model.fit(x = train_x, y = train_y, epochs = 150, verbose = 1)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               12928     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 20)                1300      
                                                                 
Total params: 22484 (87.83 KB)
Trainable params: 22484 (87.83 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/150
Epo

<keras.src.callbacks.History at 0x7cd701cd4f10>

In [None]:
def clean_text(text):
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens

def bag_of_words(text,vocab):
  tokens = clean_text(text)
  bow = [0]*len(vocab)
  for w in tokens:
    for idx, word in enumerate(vocab):
      if word == w:
        bow[idx] = 1
  return np.array(bow)

In [None]:
def pred_class(text, vocab, labels):
  bow = bag_of_words(text, vocab)
  result = model.predict(np.array([bow]))[0]
  thresh = 0.5
  y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh]
  y_pred.sort(key = lambda x: x[1], reverse = True)
  return_list = []
  for r in y_pred:
    return_list.append(labels[r[0]])
  return return_list

def get_response(intents_list, intents_json):
  if len(intents_list) == 0:
    result = "Sorry! I don't understand."
  else:
    tag = intents_list[0]
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
      if i['tag'] == tag:
        result = random.choice(i['responses'])
        break
    return result

In [None]:
#interact with the chatbot
print("Press 0 if you don't want to chat with our Chatbot")
while True:
  message = input("")
  if message == '0':
    break
  intents = pred_class(message, words, classes)
  result = get_response(intents, data)
  print(result)

Press 0 if you don't want to chat with our Chatbot
what can i ask yo
I provide basic info about Ben's background, experience, and interests.
what is your background
I graduated with honors from UCSB in 2023 as a statistics and data science major with a minor in physics.
what did you learn in physics
None
physics
I pursued a minor in physics at UCSB, and hoping to dive deeper in the subject, I joined the UCSB Experimental Cosmology Group, a lunar tech lab in the physics department. Over the next two years, I bounced between the team’s many different projects, including conducting electronics testing for a solar powered rover, running ROSbot simulations using Rviz, creating python simulations of a laser communications system, analyzing data from a thermally insulated battery in a cryogenic chamber, and more. In 2022, our thermally insulated battery design was accepted into the NASA MINDS competition. The team was selected as a finalist, and I was one of the members chosen to present our 