In [8]:
%load_ext autoreload
%autoreload 2

import sys

sys.path.append("../../")

from dataloader import SimpleDataloader
from params import ATTN_SEQ2SEQ_PARAMS
from models.attn_seq2seq import AttnSeq2seq, count_parameters, Encoder, Decoder
from utils import (
    save_model,
    get_torch_device,
    epoch_time,
    arg_copy,
    save_to_artifact,
    save_test_df,
    save_metrics,
)
from test_metrics.test_model import Model_tester

import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import numpy as np
import math
import sys
import time
import glob
import argparse
import pandas as pd


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
hp = ATTN_SEQ2SEQ_PARAMS


In [10]:
device = get_torch_device()


data = SimpleDataloader(**vars(hp))
train_dataloader = data.get_train_dataloader()
val_dataloader = data.get_val_dataloader()
test_dataloader = data.get_test_dataloader()


Loading Form Cache


In [11]:

model = AttnSeq2seq(**vars(hp))
model.init_weights()
print(f"The model has {count_parameters(model):,} trainable parameters")
model.to(device)

The model has 166,763,401 trainable parameters


AttnSeq2seq(
  (encoder): Encoder(
    (embedding): Embedding(45000, 300)
    (rnn): LSTM(300, 600, num_layers=2, dropout=0.3, bidirectional=True)
    (dropout): Dropout(p=0.3, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(45000, 300)
    (rnn): LSTM(300, 1200, num_layers=2, dropout=0.3)
    (attention): Attention()
    (fc): Linear(in_features=2400, out_features=45000, bias=True)
    (dropout): Dropout(p=0.3, inplace=False)
  )
)

In [12]:
it=iter(val_dataloader)
src,trg,src_len=next(it)
src = src.to(device) 
trg = trg.to(device)

In [13]:
pred = model(src, trg)

In [14]:

"""
Loss Function and optimizers
"""
CCE = lambda x, y: F.cross_entropy(x, y, ignore_index=0)
if hp.optim == "adam":
    optimizer = optim.AdamW(model.parameters(), lr=hp.lr)
if hp.optim == "SGD":
    optimizer = optim.SGD(model.parameters(), lr=hp.lr)

"""
Scheduler
"""
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode="min",
    factor=0.1,
    patience=5,
    verbose=False,
    threshold=0.0001,
    threshold_mode="rel",
    cooldown=0,
    min_lr=0,
    eps=1e-08,
)

"""
Train Function
"""

def train(model, dataloader, optimizer, loss_fn, device, print_freq=100):
    model.train()
    losses = []
    print(f"\tTraining for {len(dataloader)} iter")
    for idx, batch in enumerate(dataloader):
        optimizer.zero_grad()
        src, trg, src_len = batch
        src = src.to(device)
        trg = trg.to(device)
        pred = model(src, trg)
        pred = pred.permute(1, 2, 0)
        loss = loss_fn(pred, trg)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        if idx % print_freq == 0:
            print(f"\t\tIter:{idx}", f"loss:{loss.item()}", sep="----")
    return losses

"""
Evaluate Function
"""

def evaluate(model, dataloader, loss_fn, device):
    model.eval()
    losses = []
    for idx, batch in enumerate(dataloader):
        src, trg, src_len = batch
        src = src.to(device)
        trg = trg.to(device)
        with torch.no_grad():
            pred = model(src, trg)
        pred = pred.permute(1, 2, 0)
        loss = loss_fn(pred, trg)
        losses.append(loss.item())
    return np.mean(losses)

"""
Generate Test Dataframe
"""

def create_test_df(dataloader):
    data = []
    for idx, batch in enumerate(dataloader):
        inp, opt = batch
        for i in range(len(inp)):
            data_dict = {"input": inp[i], "output": opt[i]}
            data.append(data_dict)

    df = pd.DataFrame(data)
    return df

def test_model(model, inpLang, optLang, test_df):
    tester = Model_tester(model, inpLang, optLang, max_len=40)
    tester.set_inference_mode("greedy")
    return tester.generate_metrics(test_df)

"""
Main Loop
"""
test_df = create_test_df(test_dataloader)
best_model_loss = float("inf")
val_loss = evaluate(model, val_dataloader, CCE, device)
print("Starting Loss: ", val_loss)
for ep in range(hp.epochs):
    print("-" * 10)
    print(f"Epoch : {ep}")
    st_time = time.time()
    train_loss = train(model, train_dataloader, optimizer, CCE, device)
    val_loss = evaluate(model, val_dataloader, CCE, device)
    scheduler.step(val_loss)
    if val_loss < best_model_loss:
        to_save = {
            "model": model,
            "inpLang": data.inpLang,
            "optLang": data.optLang,
            "test_df": test_df,
            "params": vars(hp),
            "version": version,
        }
        save_model(path=hp.save_path, name="seq2seq.pt", **to_save)
        best_model_loss = val_loss
    e_time = time.time()
    epoch_mins, epoch_secs = epoch_time(st_time, e_time)
    print(
        f"\tTraining Loss : {np.mean(train_loss)}",
        f"Train Perplexity : {math.exp(np.mean(train_loss))}",
        sep="\t|\t",
    )
    print(
        f"\tVal Loss      : {val_loss}",
        f"Val Perplexity : {math.exp(val_loss)}",
        sep="\t|\t",
    )
    print(f"\tTime per epoch: {epoch_mins}m {epoch_secs}s")
print("Generating Test Metrics")



Starting Loss:  10.714410691891077
----------
Epoch : 0
	Training for 953 iter
		Iter:0----loss:10.714405059814453
		Iter:100----loss:6.7219462394714355
		Iter:200----loss:6.329308986663818
		Iter:300----loss:6.331459045410156
		Iter:400----loss:5.928847312927246
		Iter:500----loss:5.9180707931518555
		Iter:600----loss:5.746861934661865
		Iter:700----loss:5.6512770652771
		Iter:800----loss:5.882458209991455
		Iter:900----loss:5.733606815338135


NameError: name 'version' is not defined

In [None]:
df, metrics = test_model(model, data.inpLang, data.optLang, test_df)
save_test_df(df, "seq2seq", version)
save_metrics(metrics, "seq2seq", version)
print(metrics)
if hp.to_artifact:
    save_to_artifact("seq2seq", version)

In [2]:
import torch

In [10]:
a=torch.zeros(1,2)
y=torch.stack((a,))

In [18]:
torch. 

TypeError: stack() missing 1 required positional arguments: "tensors"

In [15]:
y.shape

torch.Size([2, 1, 1, 2])

In [38]:
a=torch.ones(64,40)
all=a.unsqueeze(-1)

In [39]:
a=torch.zeros(64,40)
all=torch.cat((all,a.unsqueeze(-1)),dim=2)

In [40]:
all.shape

torch.Size([64, 40, 2])

In [67]:
import numpy as np
import seaborn as sns
import matplotlib.pylab as plt

uniform_data = np.random.rand(10, 12)
ax = sns.heatmap(a, linewidth=0.5,xticklabels=o,yticklabels=i)
plt.show()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [44]:
flights = sns.load_dataset("flights")


In [45]:
flights

Unnamed: 0,year,month,passengers
0,1949,January,112
1,1949,February,118
2,1949,March,132
3,1949,April,129
4,1949,May,121
...,...,...,...
139,1960,August,606
140,1960,September,508
141,1960,October,461
142,1960,November,390


In [46]:
a=torch.zeros(4,10)
i=["Hello","how","are","you"]
o=["Hello","how","are","you","Hello","how","are","you","are","you"]

In [59]:
a.shape

torch.Size([4, 10])

In [66]:
a=a.cuda()

In [65]:
a

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])