In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
%cd /content/gdrive/My\ Drive/Colab/ITSP

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/Colab/ITSP


In [2]:
!ls

build_vocab.py	data.py		  model		    __pycache__  train.py
ckpts		evaluate.py	  preprocess.ipynb  results	 utils.py
data		evaluation.ipynb  preprocess.py     train.ipynb  vocab.ipynb


In [0]:
import argparse
from argparse import Namespace
from functools import partial

import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau

from model import Im2LatexModel, Trainer
from utils import collate_fn, get_checkpoint
from data import Im2LatexDataset
from build_vocab import Vocab, load_vocab

In [0]:
args = Namespace(
    # model args
    emb_dim = 80,
    dec_rnn_h = 512,
    data_path = "./data/",
    add_position_features = False,

    #training args
    max_len = 150,
    dropout = 0,
    cuda = True,
    batch_size = 8,
    epoches = 1,
    lr = 3e-4,
    min_lr = 3e-5,
    sample_method = "teacher_forcing", # Other opts: 'exp', 'inv_sigmoid'
    decay_k = 1. ,
    lr_decay = 0.5,
    lr_patience = 3,
    clip = 2.0,
    save_dir = "./ckpts",
    print_freq = 100,
    seed = 2020,
    from_check_point = False,    
)

In [5]:
args

Namespace(add_position_features=False, batch_size=8, clip=2.0, cuda=True, data_path='./data/', dec_rnn_h=512, decay_k=1.0, dropout=0, emb_dim=80, epoches=1, from_check_point=False, lr=0.0003, lr_decay=0.5, lr_patience=3, max_len=150, min_lr=3e-05, print_freq=100, sample_method='teacher_forcing', save_dir='./ckpts', seed=2020)

In [6]:
max_epoch = args.epoches
from_check_point = args.from_check_point
if from_check_point:
    checkpoint_path = get_checkpoint(args.save_dir)
    checkpoint = torch.load(checkpoint_path)
    args = checkpoint['args']
print("Training args:", args)

torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

Training args: Namespace(add_position_features=False, batch_size=8, clip=2.0, cuda=True, data_path='./data/', dec_rnn_h=512, decay_k=1.0, dropout=0, emb_dim=80, epoches=1, from_check_point=False, lr=0.0003, lr_decay=0.5, lr_patience=3, max_len=150, min_lr=3e-05, print_freq=100, sample_method='teacher_forcing', save_dir='./ckpts', seed=2020)


In [7]:
# Building vocab
print("Load vocab...")
vocab = load_vocab(args.data_path)

use_cuda = True if args.cuda and torch.cuda.is_available() else False
device = torch.device("cuda" if use_cuda else "cpu")

Load vocab...
Load vocab including 298 words!


In [8]:
# data loader
print("Construct data loader...")
train_loader = DataLoader(
    Im2LatexDataset(args.data_path, 'train', args.max_len),
    batch_size=args.batch_size,
    collate_fn=partial(collate_fn, vocab.sign2id),
    pin_memory=True if use_cuda else False,
    num_workers=4)
print("Construct data loaded!")

print("Validation data loader...")
val_loader = DataLoader(
    Im2LatexDataset(args.data_path, 'validate', args.max_len),
    batch_size=args.batch_size,
    collate_fn=partial(collate_fn, vocab.sign2id),
    pin_memory=True if use_cuda else False,
    num_workers=4)
print("Validation data loaded!")

Construct data loader...
Construct data loaded!
Validation data loader...
Validation data loaded!


In [9]:
# construct model
print("Construct model")
vocab_size = len(vocab)
model = Im2LatexModel(
    vocab_size, args.emb_dim, args.dec_rnn_h,
    add_pos_feat=args.add_position_features,
    dropout=args.dropout
)
model = model.to(device)
print("Model Settings:")
print(model)

# construct optimizer
optimizer = optim.Adam(model.parameters(), lr=args.lr)

lr_scheduler = ReduceLROnPlateau(
    optimizer,
    "min",
    factor=args.lr_decay,
    patience=args.lr_patience,
    verbose=True,
    min_lr=args.min_lr)

Construct model
Model Settings:
Im2LatexModel(
  (cnn_encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1))
    (12): ReLU()
  )
  (rnn_decoder): LSTMCell(592, 512)
  (embedding): Embedding(298, 80)
  (init_wh): Linear(in_features=512, out_features=512, bias=True)
  (init_wc): Linear(in_features=512, out_features=512, bias=True)
  (init_

In [0]:
if from_check_point:
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    lr_scheduler.load_state_dict(checkpoint['lr_sche'])
    # init trainer from checkpoint
    trainer = Trainer(optimizer, model, lr_scheduler,
                        train_loader, val_loader, args,
                        use_cuda=use_cuda,
                        init_epoch=epoch, last_epoch=max_epoch)
else:
    trainer = Trainer(optimizer, model, lr_scheduler,
                        train_loader, val_loader, args,
                        use_cuda=use_cuda,
                        init_epoch=1, last_epoch=args.epoches)

In [0]:
# begin training
trainer.train()

Epoch: 1
Epoch 1, step:100/1047 9.55%, Loss:3.7108, Perplexity:13.0933
Epoch 1, step:200/1047 19.10%, Loss:3.0914, Perplexity:8.5230
Epoch 1, step:300/1047 28.65%, Loss:2.9215, Perplexity:7.5763
Epoch 1, step:400/1047 38.20%, Loss:2.7822, Perplexity:6.8791
Epoch 1, step:500/1047 47.76%, Loss:2.6227, Perplexity:6.1589
