<a href="https://colab.research.google.com/github/charlietharas/stuyhacksx-chatbot/blob/main/Chatbot_StuyHacksX_Display_Copy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preliminary Initialiaztion

In [None]:
# imports
import pandas as pd
from google.colab import files

In [None]:
# for reuploads / edits to main datasheet
!rm all-merge.csv

In [None]:
# uploads
data_csv = files.upload()
filename = "all-merge.csv"

In [None]:
# grabbing from drive, because it's nicer this way
filename_end = input("Filename? ")
filename = "drive/MyDrive/Colab Notebooks/chatbot/" + filename_end

In [None]:
data = pd.read_csv(filename)

In [None]:
authors = data['Author']
content = data['Content']
time_diff = data['TimeDiff']
conv_id = data['ConvID']
is_custom_user = data['IsSpecUser']
corpus_id = data['CorpusID']

In [None]:
# added user system for future training off of different people
sel_user = input("User? ")

Short note about CorpusID: this functionality may be slightly incorrect as it gets 25 distinct databases in all-merge despite there supposedly being 25, meaning that some bases have been incorrectly merged. This **will** require triage.

# "Count"-Type Analysis

e.g. making new features (deriving from current data)
These analyses rely on preformatted csv files with additional data semi-manually added.

1.   Performs a total message count and analyzes n (eg 100) most used words
2.   Counts most prominent authors sending messages prior to those of sel_user (eg most common people conversed with/after)
3.   Counts participation in all unique conversations (conversations defined as exchanges where time between messages <30min)
4.   Counts distinct frequent groups of people conversed with
5.   Counts the amount of words from each user (basic).


In [None]:
usr_message_count = 0
total_word_count = 0
words = []
word_count = []

for i in range (content.size):
    if str(authors.get(i)) == sel_user:
        usr_message_count += 1;
        word_in_row = str(content.get(i)).split()
        for j in word_in_row:
            words.append(j)
            total_word_count += 1

wordset = set(words)
print("Total messages from " + sel_user + ": " + str(usr_message_count))

for i in wordset:
    word_count.append([i, words.count(i)])
excluded_words = set()
most_used_words = []
for x in range(100): # bad sorting algorithm
    max_i = 0
    max_i_word = ""
    for i in word_count:
        if len(i[0]) > 0 and i[0] not in excluded_words:
            if i[1] > max_i:
                max_i_word = i[0]
                max_i = i[1]
    excluded_words.add(max_i_word)
    most_used_words.append([max_i_word, max_i])

print("Most used words: " + str(most_used_words))
print("Total words: " + str(total_word_count) + " at average of " + str(total_word_count/usr_message_count) + " wpm")

In [None]:
authorlist = []
author_count = []

for i in range (authors.size):
    if str(authors.get(i)) == sel_user:
        authorlist.append(authors.get(i-1))

authorset = set(authorlist)

for i in authorset:
    author_count.append([i, authorlist.count(i)])

print("Authors prior to send count " + str(author_count))

In [None]:
convset = set()

for i in range(conv_id.size):
    if str(authors.get(i)) == sel_user:
        convset.add(conv_id.get(i))

print("Got " + str(int(conv_id.get(conv_id.size-1))) + " distinct conversations, participation in " + str(len(convset)) + " at rate " + str(len(convset)/int(conv_id.get(conv_id.size-1))))

In [None]:
authors_permutations = []
convID = -1

for i in range(conv_id.size):
    if conv_id.get(i) in convset:
        if conv_id.get(i) != convID:
            authors_permutations.append(set())
        else:
            authors_permutations[len(authors_permutations)-1].add(authors.get(i))
        convID = conv_id.get(i)

# creates set of permutations (list)
authors_permutations_included = []
for i in authors_permutations:
    if i in authors_permutations_included:
        continue
    else:
        authors_permutations_included.append(i)

authors_permutations_count = []
for i in authors_permutations_included:
    authors_permutations_count.append([i, authors_permutations.count(i)])

# get unsorted list of distinct conversational groups
print(authors_permutations_count)

# bug noticed: prints [set(), 382], but.. whatever

In [None]:
sel_user_word_count = 0
user2_word_count = 0
user2 = input("Select user comparator? ")
for i in range(content.size):
    try:
        split_list = str(content[i]).split(' ')
    except:
        print("Error at i count", i)
    if authors[i] == sel_user:
        sel_user_word_count += len(split_list)
    elif authors[i] == user2:
        user2_word_count += len(split_list)

print(sel_user, "at", sel_user_word_count, "words.")
print(user2, "at", user2_word_count, "words.")
print("Ratio is", sel_user_word_count/user2_word_count)

Short noted bug that within authors_permutation there is an entry being [set(), 382], which is problematic but not critical. This may be addressed later.

Important bug usr_message_count is nonfunctional, ignored temporarily. Requires triage.

# Sequence to Sequence

Vectorizing the dictionary of distinct words as a Vocabulary object, grabbing specialized conversation pairs, and training a model to respond. This is the core of the project, inspired by and heavily relying on content from [here](https://medium.com/swlh/end-to-end-chatbot-using-sequence-to-sequence-architecture-e24d137f9c78).

In [None]:
# imports for this section
import unicodedata
import re
import random
import torch
from torch import nn
import itertools
import os

In [None]:
# defining how a vocabulary object is set up
# relies on running above code to get count of distinct words as word_count
PAD = 0
SRT = 1
END = 2

class Vocabulary:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word_to_index = {}
        self.word_to_count = {}
        self.index_to_word = {PAD: "PAD", SRT: "SOS", END: "EOS"}
        self.num_words = 3

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWordNoContext(word)

    def addWordNoContext(self, word):
        if word not in self.word_to_index:
            self.word_to_index[word] = self.num_words
            self.word_to_count[word] = 1
            self.index_to_word[self.num_words] = word
            self.num_words += 1
        else:
            self.word_to_count[word] += 1

    def addWord(self, word, index, count):
        self.word_to_index[word] = index
        self.word_to_count[word] = count
        self.index_to_word[index] = word
        self.num_words += 1

In [None]:
# functions to fix bad characters and clean up messages, optimizing convergence
def fixASCII(string):
    return ''.join(
        c for c in unicodedata.normalize('NFD', string) if unicodedata.category(c) != 'Mn'
    )

def fixString(string):
    string = fixASCII(string.lower().strip())
    string = re.sub(r"([.!?])", r" \1", string)
    string = re.sub(r"[^a-zA-Z.!?]+", r" ", string)
    string = re.sub(r"\s+", r" ", string).strip()
    return string

In [None]:
# normalizing words and generating the Vocabulary object for the complete dataset
# not actually relevant to final model?
print("Got", len(word_count), "distinct words.")
valid_word_count = []
for i in word_count:
    if i[0] == fixString(i[0]):
        valid_word_count.append(i)

print("Got", len(valid_word_count), "distinct valid words.")
master_voc = Vocabulary("all-merge")
for i in range(len(valid_word_count)):
    master_voc.addWord(valid_word_count[i][0], i, valid_word_count[i][1])

## Generating Sentence Pair Objects

Various methods of generating objects for sentence pair objects for training the model.
This section will also build specific vocabulary objects for each distinct conversation filter.

1.   "Dumb" grabber between two users. Considers only previous lines, offers little context, and scans the entire corpus: weak for serious training.
2.   "Less dumb" grabber between selected user for training and any other user. Considers only previous lines, offers little context, and scans the entire corpus. Marginally better than the other but also may offer less clarity/personality because of different interaction patterns between different users.


In [None]:
# get dumb user grabber user
user = input("User for dumb grabber: ")

In [None]:
# "dumb" grabber: only contextualizes single line conversation between two distinct users
pairs = []
vocabulary = Vocabulary("Dumb 2-user grabber")
for i in range(1, len(content)):
    if authors[i] == sel_user and authors[i-1] == user:
        try:
            curr_cont = fixString(content[i])
            prev_cont = fixString(content[i-1])
            pairs.append([prev_cont, curr_cont])
            vocabulary.addSentence(curr_cont)
            vocabulary.addSentence(prev_cont)
        except:
            continue

print("Discriminant with 2-user basic filter grabbed", len(pairs), "distinct pairs across entire corpus.")
print("Corresponding Vocabulary object with", vocabulary.num_words, "distinct words.")

In [None]:
# "less dumb" grabber: builds pairs out of anyone talking to user
pairs = []
vocabulary = Vocabulary("Less dumb 2-user grabber")
for i in range(1, len(content)):
    if authors[i] == sel_user and [authors[i-1]] != sel_user:
        try:
            curr_cont = fixString(content[i])
            prev_cont = fixString(content[i-1])
            pairs.append([prev_cont, curr_cont])
            vocabulary.addSentence(curr_cont)
            vocabulary.addSentence(prev_cont)
        except:
            continue

print("Discriminant with any-user basic filter grabbed", len(pairs), "distinct pairs across entire corpus.")
print("Corresponding Vocabulary object with", vocabulary.num_words, "distinct words.")

## Data Preparation
Preparing batches for use in the model.

In [None]:
# utility functions
# multi-grabs indexes from vocabulary
def getIndexesFromSent(voc, sent):
    return [voc.word_to_index[word] for word in sent.split(' ')] + [END]

# generating padding
def genPadding(batch, fillvalue=PAD):
    return list(itertools.zip_longest(*batch, fillvalue=fillvalue))

# returns binary matrix adjusting for padding
def binaryMatrix(batch, value=PAD):
    matrix = []
    for i, seq in enumerate(batch):
        matrix.append([])
        for token in seq:
            if token == PAD:
                matrix[i].append(0)
            else:
                matrix[i].append(1)
    return matrix

# padding functions
# return input tensor and corresponding lengths
def inputVariable(batch, voc):
    idxs_batch = [getIndexesFromSent(voc, sentence) for sentence in batch]
    lengths = torch.tensor([len(indexes) for indexes in idxs_batch])
    padded_list = genPadding(idxs_batch)
    padded_variable = torch.LongTensor(padded_list)
    return padded_variable, lengths

# return target tensor, padding mask, and maximum length
def outputVariable(batch, voc):
    idxs_batch = [getIndexesFromSent(voc, sentence) for sentence in batch]
    max_len = max([len(indexes) for indexes in idxs_batch])
    padded_list = genPadding(idxs_batch)
    mask = binaryMatrix(padded_list)
    mask = torch.ByteTensor(mask)
    padded_variable = torch.LongTensor(padded_list)
    return padded_variable, mask, max_len

# converts batch into train data
def batch_to_data(voc, batch):
    batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch = []
    output_batch = []
    for pair in batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inpt, lengths = inputVariable(input_batch, voc)
    output, mask, max_len = outputVariable(output_batch, voc)
    return inpt, lengths, output, mask, max_len

# example
batches = batch_to_data(vocabulary, [random.choice(pairs) for i in range(5)])
input_var, lengths, target_var, mask, max_len = batches

## The Model
The model in this case revolves around 3 layers

1.   An encoder to losslessly vectorize words into trainable binary sequences (for this we use a bidirectional GRU).
2.   An attention layer prioritizes different parts of sentences for "understanding." For this we use a Luong attention layer.
3.   A decoder to convert the model's inner "thoughts" into output for the user!

In [None]:
# tensordash
!pip install tensor-dash
from tensordash.torchdash import Torchdash
histories = Torchdash(ModelName="Chatbot", email="charlie@charliemax.dev")

In [None]:
# encoder
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers==1 else dropout), bidirectional=True)

    def forward(self, input_sequence, input_lengths, hidden=None):
        embedded = self.embedding(input_sequence)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        outputs, hidden = self.gru(packed, hidden)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:]
        return outputs, hidden

In [None]:
# attention layer
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not a valid attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = nn.Linear(self.hidden_size, self.hidden_size)
        elif self.method == 'concat':
            self.attn = nn.Linear(self.hidden_size * 2, self.hidden_size)
            self.v = nn.Parameter(torch.FloatTensor(self.hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)
        attn_energies = attn_energies.t()

        return nn.functional.softmax(attn_energies, dim=1).unsqueeze(1)

In [None]:
# decoder
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size, self.n_layers, dropout=(0 if self.n_layers == 1 else dropout))
        self.concat = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

        self.attn = Attn(self.attn_model, self.hidden_size)

    def forward(self, input_step, last_hidden, encoder_outputs):
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        rnn_output, hidden = self.gru(embedded, last_hidden)
        attn_weights = self.attn(rnn_output, encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        output = self.out(concat_output)
        output = nn.functional.softmax(output, dim=1)
        return output, hidden

In [None]:
# loss function
def loss_func(inpt, target, mask):
    n_total = mask.sum()
    cross_entropy = -torch.log(torch.gather(inpt, 1, target.view(-1, 1)).squeeze(1))
    loss = cross_entropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, n_total.item()

In [None]:
# training functions
device = torch.device("cpu")
def train(input_variable, lengths, target_variable, mask, max_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)

    loss = 0
    print_losses = []
    n_totals = 0

    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    decoder_input = torch.LongTensor([[SRT for i in range (batch_size)]])
    decoder_input = decoder_input.to(device)

    decoder_hidden = encoder_hidden[:decoder.n_layers]

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        for t in range(max_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_input = target_variable[t].view(1, -1)
            mask_loss, n_total = loss_func(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * n_total)
            n_totals += n_total
    else:
        for t in range(max_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            mask_loss, n_total = loss_func(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * n_total)
            n_totals += n_total
    
    loss.backward()

    _ = nn.utils.clip_grad_norm_(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), clip)

    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals

def train_iterations(model_name, vocabulary, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, n_iterations, batch_size, print_rate, save_rate, clip):
    training_batches = [batch_to_data(vocabulary, [random.choice(pairs) for i in range(batch_size)]) for ii in range(n_iterations)]

    start_iteration = 1 # should be 1
    print_loss = 0
    
    for iteration in range(start_iteration, n_iterations + 1):
        training_batch = training_batches[iteration - 1]
        input_variable, lengths, target_variable, mask, max_len = training_batch

        loss = train(input_variable, lengths, target_variable, mask, max_len, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        print_loss += loss

        # tensordash
        histories.sendLoss(loss = loss, epoch = iteration, total_epochs = n_iterations+1)

        if iteration % print_rate == 0:
            print_loss_avg = print_loss / print_rate
            train_loss.append(print_loss_avg)
            print("Iteration {}; Percent complete: {:.1f}%; Average loss: {:.4f}".format(iteration, iteration/n_iterations*100, print_loss_avg))
            print_loss = 0

        if iteration % save_rate == 0:
            directory = os.path.join("drive/MyDrive/Colab Notebooks/chatbot/saves", sel_user, model_name, "all", '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': vocabulary.__dict__,
                'embedding': embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))

In [None]:
# searcher
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder, use_multinomial=False):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.use_multinomial = use_multinomial

    def forward(self, input_sequence, input_length, max_len):
        encoder_outputs, encoder_hidden = self.encoder(input_sequence, input_length)
        decoder_hidden = encoder_hidden[:decoder.n_layers]
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SRT
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        if not self.use_multinomial:
            for i in range(max_len):
                decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
                decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
                all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
                all_scores = torch.cat((all_scores, decoder_scores), dim=0)
                decoder_input = torch.unsqueeze(decoder_input, 0)
            return all_tokens, all_scores
        else:
            for i in range(max_len):
                decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
                decoder_output_multi = decoder_output.data.view(-1).div(0.7).exp()
                decoder_input = torch.multinomial(decoder_input_multi, 1)
                decoder_scores, _ = torch.max(decoder_output, dim=1)
                all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
                all_scores = torch.cat((all_scores, decoder_scores), dim=0)
                decoder_input = torch.unsqueeze(decoder_input, 0)
            return all_tokens, all_scores

In [None]:
# training the model
# params
clip = 50.0
teacher_forcing = 0.9
alpha = 0.0001
decoder_learning = 5.0
n_iter = 500 # from 500
print_rate = 50
save_rate = 100
teacher_forcing_ratio = 1.0
model_name = 'cb_model'
attn_model = 'dot'
hidden_size = 512
encoder_n_layers = 2
decoder_n_layers = 2
dropout = 0.1
batch_size = 64
train_loss = []

embedding = nn.Embedding(vocabulary.num_words, hidden_size)

encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, vocabulary.num_words, decoder_n_layers, dropout)

encoder_optimizer = torch.optim.Adam(encoder.parameters(), lr=alpha)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=alpha * decoder_learning)

encoder.train()
decoder.train()

In [None]:
# the training function
train_iterations(model_name, vocabulary, pairs, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, n_iter, batch_size, print_rate, save_rate, clip)

In [None]:
# loading models
spec_filename = "500_checkpoint.tar"
load_filename = os.path.join("drive/MyDrive/Colab Notebooks/chatbot/saves", sel_user, model_name, "all", '{}-{}_{}'.format(encoder_n_layers, decoder_n_layers, hidden_size), spec_filename)
checkpoint = torch.load(load_filename)
encoder_sd = checkpoint['en']
decoder_sd = checkpoint['de']
encoder_optimizer_sd = checkpoint['en_opt']
decoder_optimizer_sd = checkpoint['de_opt']
embedding_sd = checkpoint['embedding']
vocabulary.__dict__ = checkpoint['voc_dict']
embedding.load_state_dict(embedding_sd)
encoder.load_state_dict(encoder_sd)
decoder.load_state_dict(decoder_sd)
encoder_optimizer.load_state_dict(encoder_optimizer_sd)
decoder_optimizer.load_state_dict(decoder_optimizer_sd)
encoder.to(device)
decoder.to(device)

In [None]:
# evaluation
def evaluate(encoder, decoder, searcher, voc, sent, temperature=False):
    idxs_batch = [getIndexesFromSent(voc, sent)]
    lengths = torch.tensor([len(indexes) for indexes in idxs_batch])
    input_batch = torch.LongTensor(idxs_batch).transpose(0, 1)
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    tokens, scores = searcher(input_batch, lengths, 12)
    decoded_words = [voc.index_to_word[token.item()] for token in tokens]
    return decoded_words

def do_evaluate(encoder, decoder, searcher, voc):
    input_sent = input()
    if input_sent == "exitexit":
        print("Quit.")
        exit()
    input_sent = fixString(input_sent)
    outputs = evaluate(encoder, decoder, searcher, voc, input_sent)
    outputs[:] = [x for x in outputs if not (x=='EOS' or x=='PAD')]
    print("Says:", ' '.join(outputs))

# change these to encoder, decoder when not loading
searcher = GreedySearchDecoder(encoder, decoder)

In [None]:
# evaluation when just trained
print("exitexit to stop.")
while True:
    do_evaluate(encoder, decoder, searcher, vocabulary)

In [None]:
# todo:
# fix keyerrors for unknown words in input (probably isn't fixable)
# triage bugs noted in text comments/fix text comments?

# Discord Implementation
Code for running a discord bot with this model. This code does not run online but can be implemented server-side.

In [None]:
!pip install discord

In [None]:
TOKEN = input("Token: ")

In [None]:
import discord

client = discord.Client()

@client.event
async def on_ready():
    print('Logged on as user {0.user}'.format(client))

@client.event
async def on_message(message):
    if message.author == client.user:
        return
    
    if message.content.startswith('$hello'):
        await message.channel.send('Hello!')
        
    if message.content.startswith('$hey'):
        content_str = message.content[4:]
        try:
            await message.channel.send(do_evaluate(encoder, decoder, searcher, vocabulary, content_str))
        except:
            await message.channel.send('Error, unknown word.')
        
client.run(TOKEN)