In [1]:
import json
import csv

def extract_instruction_pairs(json_path):
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    pairs = []

    for item in data:
        instruction = item.get("instruction", "").strip()
        input_text = item.get("input", "").strip()
        output_text = item.get("output", "").strip()

        if instruction and input_text:
            prompt = f"{instruction}\n{input_text}"
        else:
            prompt = instruction or input_text

        if prompt and output_text:
            pairs.append([prompt, output_text])

    return pairs





In [2]:
json_path = "/content/Cleaned_date.json"
pairs = extract_instruction_pairs(json_path)

print(f"Extracted {len(pairs)} pairs")
print(pairs[0])


Extracted 68911 pairs
['For a car, what scams can be plotted with 0% financing vs rebate?', "The car deal makes money 3 ways. If you pay in one lump payment. If the payment is greater than what they paid for the car, plus their expenses, they make a profit. They loan you the money. You make payments over months or years, if the total amount you pay is greater than what they paid for the car, plus their expenses, plus their finance expenses they make money. Of course the money takes years to come in, or they sell your loan to another business to get the money faster but in a smaller amount. You trade in a car and they sell it at a profit. Of course that new transaction could be a lump sum or a loan on the used car... They or course make money if you bring the car back for maintenance, or you buy lots of expensive dealer options. Some dealers wave two deals in front of you: get a 0% interest loan. These tend to be shorter 12 months vs 36,48,60 or even 72 months. The shorter length makes 

In [3]:
def save_pairs_to_tsv(pairs, save_path):
    with open(save_path, 'w', encoding='utf-8', newline='') as f:
        writer = csv.writer(f, delimiter='\t')
        for pair in pairs:
            writer.writerow(pair)


In [4]:
save_pairs_to_tsv(pairs, "/content/formatted_pairs.txt")
print("OK!!!!Saved formatted pairs to /content/formatted_pairs.txt")


OK!!!!Saved formatted pairs to /content/formatted_pairs.txt


In [5]:
import re
import unicodedata
import itertools
import torch

# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count default tokens

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = [k for k, v in self.word2count.items() if v >= min_count]

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3

        for word in keep_words:
            self.addWord(word)

def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]

def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for seq in l:
        m.append([0 if token == PAD_token else 1 for token in seq])
    return m

def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.BoolTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = zip(*pair_batch)
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len

In [6]:

from sklearn.model_selection import train_test_split
import os
import random
import torch
from torch.utils.data import Dataset, DataLoader
#from utils import Voc, normalizeString, batch2TrainData

MAX_LENGTH = 30

class ChatDataset(Dataset):
    def __init__(self, pairs, voc):
        self.pairs = pairs
        self.voc = voc

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        return self.pairs[idx]

def readVocs(datafile, corpus_name):
    print("Reading lines...")
    lines = open(datafile, encoding='utf-8').read().strip().split('\n')
    pairs = []
    for l in lines:

      parts = l.split('\t')
      if len(parts) == 2:

        pairs.append([normalizeString(parts[0]), normalizeString(parts[1])])

    voc = Voc(corpus_name)
    return voc, pairs

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

def loadPrepareData(corpus, corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    voc, pairs = readVocs(datafile, corpus_name)
    print("Read {!s} sentence pairs".format(len(pairs)))
    pairs = filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:", voc.num_words)
    return voc, pairs

def trimRareWords(voc, pairs, MIN_COUNT):
    voc.trim(MIN_COUNT)
    keep_pairs = []
    for pair in pairs:
        input_sentence, output_sentence = pair
        keep_input = all(word in voc.word2index for word in input_sentence.split(' '))
        keep_output = all(word in voc.word2index for word in output_sentence.split(' '))
        if keep_input and keep_output:
            keep_pairs.append(pair)

    print("Trimmed from {} pairs to {}, {:.4f} of total".format(len(pairs), len(keep_pairs), len(keep_pairs) / len(pairs)))
    return keep_pairs

def collate_fn(batch, voc):
    return batch2TrainData(voc, batch)


def split_dataset(pairs, test_size=0.1, random_state=42):
    train_pairs, val_pairs = train_test_split(pairs, test_size=test_size, random_state=random_state)
    print(f"Split {len(pairs)} pairs into {len(train_pairs)} train and {len(val_pairs)} validation")
    return train_pairs, val_pairs


def get_dataloader(pairs, voc, batch_size=64, shuffle=True):
    dataset = ChatDataset(pairs, voc)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, collate_fn=lambda x: collate_fn(x, voc))

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Encoder RNN
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout),
                          bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        embedded = self.embedding(input_seq)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        outputs, hidden = self.gru(packed, hidden)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, :, self.hidden_size:]
        return outputs, hidden


# Attention mechanism
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        self.hidden_size = hidden_size

        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")

        if self.method == 'general':
            self.attn = nn.Linear(hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = nn.Linear(hidden_size * 2, hidden_size)
            self.v = nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        attn_energies = attn_energies.t()
        return F.softmax(attn_energies, dim=1).unsqueeze(1)


# Decoder with Luong Attention
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)

    def forward(self, input_step, last_hidden, encoder_outputs):
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        rnn_output, hidden = self.gru(embedded, last_hidden)
        attn_weights = self.attn(rnn_output, encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        return output, hidden

In [40]:

import wandb

def init_wandb(project_name="seq2seqRNNXAtten_last", config=None):
    """
    Initializes wandb api key = "dadc5760c5180b0aa661c9e66b78c7e8af724486"
    """
    if config is None:
        config = {
            "model_name": "cb_model",
            "attn_model": "dot",
            "hidden_size": 500,
            "encoder_n_layers": 3,
            "decoder_n_layers": 3,
            "dropout": 0.2,
            "batch_size": 64,
            "learning_rate": 0.0001,
            "decoder_learning_ratio": 5.0,
            "teacher_forcing_ratio": 0.9,
            "clip": 50.0,
            "n_iteration": 4000,
            "print_every": 10,
            "save_every": 500
        }

    wandb.init(project=project_name, config=config)
    return wandb.config


In [41]:
import os
import math
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import wandb
#from utils import Voc
#from dataset import loadPrepareData, trimRareWords, split_dataset, get_dataloader
#from models_seq2seq import EncoderRNN, LuongAttnDecoderRNN
#from wandb_config import init_wandb
from datetime import datetime


dev_config = init_wandb()


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


datafile = "/content/formatted_pairs.txt"
save_dir = os.path.join("data", "save")
corpus_name = "custom"
voc, pairs = loadPrepareData("data", corpus_name, datafile, save_dir)
pairs = trimRareWords(voc, pairs, MIN_COUNT=dev_config.MIN_COUNT if "MIN_COUNT" in dev_config else 3)


train_pairs, val_pairs = split_dataset(pairs, test_size=0.1)


train_loader = get_dataloader(train_pairs, voc, batch_size=dev_config.batch_size)
val_loader = get_dataloader(val_pairs, voc, batch_size=dev_config.batch_size)


embedding = nn.Embedding(voc.num_words, dev_config.hidden_size)


encoder = EncoderRNN(dev_config.hidden_size, embedding, dev_config.encoder_n_layers, dev_config.dropout).to(device)
decoder = LuongAttnDecoderRNN(dev_config.attn_model, embedding, dev_config.hidden_size, voc.num_words,
                               dev_config.decoder_n_layers, dev_config.dropout).to(device)


encoder_optimizer = optim.Adam(encoder.parameters(), lr=dev_config.learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=dev_config.learning_rate * dev_config.decoder_learning_ratio)

def save_checkpoint_tar(voc, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, iteration, loss, save_path="checkpoint.tar"):
    checkpoint = {
        'iteration': iteration,
        'encoder_state': encoder.state_dict(),
        'decoder_state': decoder.state_dict(),
        'embedding_state': embedding.state_dict(),
        'encoder_optimizer_state': encoder_optimizer.state_dict(),
        'decoder_optimizer_state': decoder_optimizer.state_dict(),
        'voc_dict': voc.__dict__,
        'loss': loss
    }
    torch.save(checkpoint, save_path)

    with open("voc.pkl", "wb") as f:
        pickle.dump(voc, f)

def log_artifacts_to_wandb(tar_path="checkpoint.tar", voc_path="voc.pkl", artifact_name="chatbot_model"):
    artifact = wandb.Artifact(artifact_name, type="model")
    artifact.add_file(tar_path)
    artifact.add_file(voc_path)
    wandb.log_artifact(artifact)


def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1, target.view(-1, 1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    return loss, nTotal.item()


def train(input_variable, lengths, target_variable, mask, max_target_len,
          encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, clip):

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_variable = input_variable.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    lengths = lengths.to("cpu")

    current_batch_size = input_variable.size(1)

    loss = 0
    print_losses = []
    n_totals = 0

    encoder_outputs, encoder_hidden = encoder(input_variable, lengths)

    decoder_input = torch.LongTensor([[1 for _ in range(current_batch_size)]]).to(device)
    decoder_hidden = encoder_hidden[:decoder.n_layers]

    use_teacher_forcing = True if torch.rand(1).item() < dev_config.teacher_forcing_ratio else False

    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_input = target_variable[t].view(1, -1)
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            _, topi = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[topi[i][0] for i in range(current_batch_size)]]).to(device)
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss += mask_loss
            print_losses.append(mask_loss.item() * nTotal)
            n_totals += nTotal

    loss.backward()

    _ = nn.utils.clip_grad_norm_(encoder.parameters(), dev_config.clip)
    _ = nn.utils.clip_grad_norm_(decoder.parameters(), dev_config.clip)

    encoder_optimizer.step()
    decoder_optimizer.step()

    return sum(print_losses) / n_totals


def evaluate_loss(val_loader, encoder, decoder, embedding):
    encoder.eval()
    decoder.eval()
    total_loss = 0
    total_count = 0
    with torch.no_grad():
        for input_variable, lengths, target_variable, mask, max_target_len in val_loader:
            input_variable = input_variable.to(device)
            target_variable = target_variable.to(device)
            mask = mask.to(device)
            lengths = lengths.to("cpu")

            current_batch_size = input_variable.size(1)

            encoder_outputs, encoder_hidden = encoder(input_variable, lengths)
            decoder_input = torch.LongTensor([[1 for _ in range(current_batch_size)]]).to(device)
            decoder_hidden = encoder_hidden[:decoder.n_layers]

            for t in range(max_target_len):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
                decoder_input = target_variable[t].view(1, -1)
                mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
                total_loss += mask_loss.item() * nTotal
                total_count += nTotal
    encoder.train()
    decoder.train()
    return total_loss / total_count


print("\nStarting training...")
train_iter = iter(train_loader)
for iteration in range(1, dev_config.n_iteration + 1):
    try:
        batch = next(train_iter)
    except StopIteration:
        train_iter = iter(train_loader)
        batch = next(train_iter)

    input_variable, lengths, target_variable, mask, max_target_len = batch
    train_loss = train(input_variable, lengths, target_variable, mask, max_target_len,
                       encoder, decoder, embedding, encoder_optimizer, decoder_optimizer,
                       dev_config.clip)

    perplexity = math.exp(train_loss)
    wandb.log({
        "train_loss": train_loss,
        "train_perplexity": perplexity,
        "iteration": iteration
    })

    if iteration % dev_config.print_every == 0:
        print("Iteration: {}; Train Loss: {:.4f} | Perplexity: {:.4f}".format(iteration, train_loss, perplexity))

    if iteration % dev_config.save_every == 0:
        val_loss = evaluate_loss(val_loader, encoder, decoder, embedding)
        val_perplexity = math.exp(val_loss)
        wandb.log({
            "val_loss": val_loss,
            "val_perplexity": val_perplexity
        })
        print("Validation Loss: {:.4f} | Perplexity: {:.4f}".format(val_loss, val_perplexity))

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        save_checkpoint_tar(voc, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, iteration, val_loss, f"checkpoint_{timestamp}.tar")
        log_artifacts_to_wandb(f"checkpoint_{timestamp}.tar", "voc.pkl", f"chatbot_checkpoint_{iteration}")


save_checkpoint_tar(voc, encoder, decoder, embedding, encoder_optimizer, decoder_optimizer, iteration, train_loss, "final_checkpoint.tar")
log_artifacts_to_wandb("final_checkpoint.tar", "voc.pkl", "final_chatbot_checkpoint")
print("✅ Final checkpoint saved to W&B.")


0,1
iteration,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇█
train_loss,██▇▇▇▆▆▆▅▅▆▅▆▅▅▅▄▄▄▃▄▃▃▃▃▃▂▂▂▂▃▃▂▂▂▂▂▂▁▁
train_perplexity,█▇▇█▇▆▆▄▄▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▅▃▂▁▁
val_perplexity,█▄▂▂▁▁

0,1
iteration,3403.0
train_loss,2.78323
train_perplexity,16.17115
val_loss,4.76762
val_perplexity,117.63876


Start preparing training data ...
Reading lines...
Read 69127 sentence pairs
Trimmed to 31953 sentence pairs
Counting words...
Counted words: 23180
keep_words 10574 / 23177 = 0.4562
Trimmed from 31953 pairs to 22654, 0.7090 of total
Split 22654 pairs into 20388 train and 2266 validation

Starting training...
Iteration: 10; Train Loss: 7.6856 | Perplexity: 2176.7997
Iteration: 20; Train Loss: 6.5281 | Perplexity: 684.1121
Iteration: 30; Train Loss: 6.6333 | Perplexity: 760.0228
Iteration: 40; Train Loss: 6.6907 | Perplexity: 804.8645
Iteration: 50; Train Loss: 6.6562 | Perplexity: 777.5857
Iteration: 60; Train Loss: 6.4715 | Perplexity: 646.4559
Iteration: 70; Train Loss: 6.3146 | Perplexity: 552.6043
Iteration: 80; Train Loss: 6.4418 | Perplexity: 627.5380
Iteration: 90; Train Loss: 6.3352 | Perplexity: 564.0979
Iteration: 100; Train Loss: 6.3868 | Perplexity: 593.9703
Iteration: 110; Train Loss: 6.5949 | Perplexity: 731.3902
Iteration: 120; Train Loss: 6.3684 | Perplexity: 583.1318
It

In [42]:
import torch
import torch.nn as nn
import torch.nn.functional as F
#from utils import indexesFromSentence
#from config import MAX_LENGTH  # if you have a shared config file

def to_device(tensor):
    return tensor.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length):
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        decoder_hidden = encoder_hidden[:self.decoder.n_layers]
        decoder_input = torch.ones(1, 1, device=input_seq.device, dtype=torch.long) * 1  # SOS_token

        all_tokens = torch.zeros([0], device=input_seq.device, dtype=torch.long)
        all_scores = torch.zeros([0], device=input_seq.device)

        for _ in range(max_length):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            decoder_input = torch.unsqueeze(decoder_input, 0)

        return all_tokens, all_scores

def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    indexes_batch = [indexesFromSentence(voc, sentence)]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    input_batch = to_device(input_batch)
    lengths = lengths.to("cpu")

    tokens, scores = searcher(input_batch, lengths, max_length)
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words


In [55]:
import wandb

artifact = wandb.use_artifact("abhi1199-city-university-of-london/seq2seqRNNXAtten_last/final_chatbot_checkpoint:v0", type="model")
artifact_dir = artifact.download()

checkpoint_path = os.path.join(artifact_dir, "final_checkpoint.tar")
voc_path = os.path.join(artifact_dir, "voc.pkl")


checkpoint = torch.load(checkpoint_path, map_location=device)


[34m[1mwandb[0m: Downloading large artifact final_chatbot_checkpoint:v0, 288.27MB. 2 files... 
[34m[1mwandb[0m:   2 of 2 files downloaded.  
Done. 0:0:2.8


In [56]:
with open(voc_path, "rb") as f:
    voc = pickle.load(f)


In [59]:



hidden_size = checkpoint['embedding_state']['weight'].shape[1]
embedding = nn.Embedding(voc.num_words, hidden_size)
embedding.load_state_dict(checkpoint['embedding_state'])
embedding = embedding.to(device)

encoder = EncoderRNN(hidden_size, embedding, n_layers=2, dropout=0.1).to(device)
decoder = LuongAttnDecoderRNN("dot", embedding, hidden_size, voc.num_words, n_layers=2, dropout=0.1).to(device)

encoder.load_state_dict(checkpoint['encoder_state'])
decoder.load_state_dict(checkpoint['decoder_state'])

encoder.eval()
decoder.eval()


searcher = GreedySearchDecoder(encoder, decoder)


def chat():
    print("Hello! Financeot is ready to solve your query! Type 'quit' to exit.")
    while True:
        try:
            input_sentence = input("> ")
            if input_sentence.lower() in ["quit", "q"]:
                break
            input_sentence = normalizeString(input_sentence)
            output_words = evaluate(encoder, decoder, searcher, voc, input_sentence)
            output_words = [w for w in output_words if w not in ["EOS", "PAD"]]
            print("Bot:", ' '.join(output_words))
        except KeyError:
            print("Oops! Encountered unknown word. Try again.")

if __name__ == "__main__":
    chat()


Hello! Financeot is ready to solve your query! Type 'quit' to exit.
> Why do some stocks have a higher margin requirement?
Bot: some you can have a successful mobile of parts a test that can help you as much as possible . .
> why do I need an emergency fund if I already have investments?
Bot: i believe you believe and what you have that can have you been . it is always good to have have you to have . .
> How would bonds fare if interest rates rose?
Bot: there are three players you need to take a few steps . .
> Simple and safe way to manage a lot of cash
Bot: . what are the of you are ? .
> Personal Tax Return software for Linux?
Bot: the criteria for you for is as follows as of and and and and . and .
> Is it possible to make money by getting a mortgage?
Bot: it is not a common step to make it easy to pay . you are here to you pay about your money . you you you .
> Is it ever a good idea to close credit cards?
Bot: it is not a good step but you need to buy your budget but you can give

In [None]:
why do I need an emergency fund if I already have investments?
Why do some stocks have a higher margin requirement?


In [None]:
How would bonds fare if interest rates rose?