# AI Chat Bot in Python

## Import necessary packages and load data into notebook

In [None]:
# These packages currently only work in Python 3.6
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy
import tflearn
import tensorflow
import random
import json

In [None]:
# load json file into notebook
with open('intents.json') as file:
    data = json.load(file)

## Preprocessing data

In [None]:
# extracting data from json file to put into lists.
words = []  # unique words in patterns
labels = [] # intent in json file
docs_x = [] # tokenized words
docs_y = [] # tokenized tage

# looping through json and extracting data.
# for each pattern, tokenize the words and add them to docs_x and their tags into docs_y.
for intent in data['intents']:
    for pattern in intent['patterns']:
        wrds = nltk.word_tokenize(pattern)
        words.extend(wrds)
        docs_x.append(wrds)
        docs_y.append(intent["tag"])

    if intent['tag'] not in labels:
        labels.append(intent['tag'])

In [None]:
stemmer = LancasterStemmer()

In [None]:
# create unique list of stemmed words for data preprocessing
#
words = [stemmer.stem(w.lower()) for w in words if w != "?"]
words = sorted(list(set(words)))

# sort the labels list
labels = sorted(labels)

In [None]:
# reformat input for neural network
# represent each sentence with list of length of amount of words in model's vocabulary
training = []
output = []
out_empty = [0 for _ in range(len(labels))]

for x, doc in enumerate(docs_x):
    bag = []
    wrds = [stemmer.stem(w.lower()) for w in doc]
    for w in words:
        if w in wrds:
            bag.append(1)
        else:
            bag.append(0)
    output_row = out_empty[:]
    output_row[labels.index(docs_y[x])] = 1

    training.append(bag)
    output.append(output_row)

In [None]:
# convert training data and output to numpy arrays
training = numpy.array(training)
output = numpy.array(output)

# Building AI Model

In [None]:
# define model architecture
tensorflow.reset_default_graph()  # gets rid of any previous settings

# standard feed-forward neural network
# one input layer, two hidden layers, one output layer
net = tflearn.input_data(shape=[None, len(training[0])])  # defining input shape that the model should expect
net = tflearn.fully_connected(net, 8)   # first hidden layer
net = tflearn.fully_connected(net, 8)   # second hidden layer
net = tflearn.fully_connected(net, len(output[0]), activation="softmax")  # get probabilities for each output
net = tflearn.regression(net)

model = tflearn.DNN(net)

In [None]:
# fit and save model
model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)
# model.save("model.tflearn")