In [12]:
%load_ext autoreload
%autoreload 2
import csv
import linecache
import string
import json
import os
from io import StringIO
import time

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

from ignite.engine import Events, create_supervised_trainer, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss, Precision, Recall
from tensorboardX import SummaryWriter

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from tabulate import tabulate

from models import SentimentClassifier
from datasets import TextDataset, get_label

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
id_to_class = {0: 'good', 1: 'average', 2: 'bad'}

In [2]:
max_tokens = 150
all_chars='abcdefghijklmnopqrstuvwxyz'
# all_chars='abcdefghijklmnopqrstuvwxyz!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
batch_size = 512

In [3]:
train_data = TextDataset(data_path="data/train.csv",
                      max_tokens=max_tokens,
                      all_chars=all_chars,
                      strip_punctuations=False)
val_data = TextDataset(data_path="data/val.csv",
                      max_tokens=max_tokens,
                      all_chars=all_chars,
                      strip_punctuations=False)

In [None]:
# val_data.data = val_data.data.drop(list(range(2000, len(val_data.data))))
# train_loader = DataLoader(val_data, batch_size=128, shuffle=True, num_workers=8)
# val_loader = DataLoader(val_data, batch_size=128, shuffle=True, num_workers=8)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=8)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=True, num_workers=8)

In [18]:
log_dir = "tensorboard_logs/"
writer = SummaryWriter(logdir=log_dir)

# TODO: change this when using CLOUD GPU
device = 'cpu'
model = SentimentClassifier(max_tokens=max_tokens,
                             conv_num_kernels=[256]* 6,
                             conv_kernel_sizes=[5, 5, 3, 3, 3, 3],
                             pool_sizes=[None, None, None, None, None, 3])

loss = F.cross_entropy
opt = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

trainer = create_supervised_trainer(model, opt, loss, device=device)
evaluator = create_supervised_evaluator(model,
                                        metrics={'loss': Loss(loss),
                                                 'acc': Accuracy(),
                                                 'precision': Precision(),
                                                 'recall': Recall()},
                                        device=device)


desc = "Epoch {} - loss: {:.4f}"
@trainer.on(Events.EPOCH_STARTED)
def create_pbar(engine):
    desc = "Epoch {} - loss: {:.4f}"
    global pbar 
    pbar = tqdm(
        initial=0, leave=False, total=len(val_loader),
        desc=desc.format(engine.state.epoch, 0)
    )

    
@trainer.on(Events.ITERATION_COMPLETED)
def log_training_loss(engine):
    pbar.desc = desc.format(engine.state.epoch, engine.state.output)
    pbar.update(1)
#     pbar.refresh()

@trainer.on(Events.EPOCH_COMPLETED)
def log_results(engine):
#     pbar.refresh()
    res_str = ""
    table = [[None, "loss", "acc", "precision", "recall"]]
    for prefix, loader in zip(["train", "val"], 
                              [train_loader, val_loader]):
        evaluator.run(loader)
        metrics = evaluator.state.metrics
        
        
        loss = metrics['loss']
        acc = metrics['acc']
        prec = metrics['precision']
        rec = metrics['recall']
#         res_str += f"{prefix}_loss: {loss:.2f}  {prefix}_acc: {acc:.2f} {prefix}_prec: {prec.numpy()}"
        table.append([prefix, f"{loss:.4f}", f"{acc:.4f}", prec.numpy(), rec.numpy()])
        if prefix == 'train':
            res_str += "\n"
        
        writer.add_scalar(f"{prefix}/loss", loss, engine.state.epoch)
        writer.add_scalar(f"{prefix}/acc", acc, engine.state.epoch)
        for metric, name in zip([prec, rec], ["precision", "recall"]):
            for i, class_metric in enumerate(metric):
                writer.add_scalar(f"{prefix}/{name}/{id_to_class[i]}", class_metric, engine.state.epoch)
        
    print(tabulate(table))
#     print(res_str)
#     pbar.set_postfix_str(res_str)
trainer.run(train_loader, max_epochs=10)
writer.close()

HBox(children=(FloatProgress(value=0.0, description='Epoch 1 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.2196  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.2196  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 2 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0049  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0049  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 3 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0007  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0007  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 4 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0004  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0004  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 5 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 6 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 7 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 8 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0003  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 9 - loss: 0.0000', max=16.0, style=ProgressStyle(de…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0002  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0002  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------


HBox(children=(FloatProgress(value=0.0, description='Epoch 10 - loss: 0.0000', max=16.0, style=ProgressStyle(d…

-----  ------  ------  ----------  ----------
       loss    acc     precision   recall
train  0.0002  1.0000  [1. 0. 0.]  [1. 0. 0.]
val    0.0002  1.0000  [1. 0. 0.]  [1. 0. 0.]
-----  ------  ------  ----------  ----------
