# Load dataset from Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!mkdir -p ./data/VNOnDB

In [3]:
!unzip './drive/My Drive/VNOnDB/word_train.zip' -d ./data/VNOnDB >> log_extract.txt
print('Extracted word_train.zip')

replace ./data/VNOnDB/word_train/20140927_0017_6046_1_tg_4_4_1.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
N
N
Extracted word_train.zip


In [4]:
!unzip './drive/My Drive/VNOnDB/word_val.zip' -d ./data/VNOnDB >> log_extract.txt
print('Extracted word_val.zip')

replace ./data/VNOnDB/word_val/20151224_0141_7818_1_tg_0_0_0.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
Extracted word_val.zip


In [5]:
!unzip './drive/My Drive/VNOnDB/word_test.zip' -d ./data/VNOnDB >> log_extract.txt
print('Extracted word_test.zip')

replace ./data/VNOnDB/word_test/20151208_0146_7105_1_tg_0_0_0.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: N
Extracted word_test.zip


In [6]:
!cp './drive/My Drive/VNOnDB/train_word.csv' ./data/VNOnDB/
!cp './drive/My Drive/VNOnDB/test_word.csv' ./data/VNOnDB/
!cp './drive/My Drive/VNOnDB/validation_word.csv' ./data/VNOnDB/
print('Copied csv files')

Copied csv files


In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import AxesGrid

from utils import encode, decode, eos_char, alphabets, to_one_hot
from dataset import VNOnDB
from model import Model

import pandas as pd

import pdb

In [2]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader


from Levenshtein import distance

# Train

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [6]:
batch_size = 8
hidden_size = 256
vocab_size = len(alphabets)
learning_rate = 1e-8

In [7]:
model = Model(
    batch_size,
    hidden_size,
    vocab_size,
    device
)

In [8]:
train_folder = './data/VNOnDB/word_train/'
val_folder = './data/VNOnDB/word_val/'
test_folder = './data/VNOnDB/word_test/'

train_df = pd.read_csv(f'./data/VNOnDB/train_word.csv', sep='\t', index_col=0)
val_df = pd.read_csv(f'./data/VNOnDB/validation_word.csv', sep='\t', index_col=0)
test_df = pd.read_csv(f'./data/VNOnDB/test_word.csv', sep='\t', index_col=0)

train_dataset = VNOnDB(f'./data/VNOnDB/word_train', train_df, image_transform, label_transform)
val_dataset = VNOnDB(f'./data/VNOnDB/word_val', val_df, image_transform, label_transform)
test_dataset = VNOnDB(f'./data/VNOnDB/word_test', test_df, image_transform, label_transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=to_batch)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True, collate_fn=to_batch)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True, collate_fn=to_batch)

In [9]:
model_path = './model_ckpt'
if not os.path.exists(model_path):
    os.mkdir(model_path)

In [10]:
def save_model(epoch, model, optimizer, is_best):
    state = {'epoch': epoch,
             'model': model.state_dict(),
             'optimizer': optimizer}
    filename = 'checkpoint_' + str(epoch) + '.pth.tar'
    filepath = os.path.join(model_path, filename)
    torch.save(state, filepath)
    # If this checkpoint is the best so far, store a copy so it doesn't get overwritten by a worse checkpoint
    if is_best:
        torch.save(state, os.path.join(model_path, 'BEST_' + filename))

In [11]:
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss()

In [12]:
def evaluation(model, data_loader):
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        for i, (inputs, labels, labels_one_hot, label_lengths) in enumerate(data_loader):
            inputs = inputs.type(float_dtype).to(device)
            labels_one_hot = labels_one_hot.type(float_dtype).to(device)
            labels = labels.type(long_dtype).to(device)
            label_lengths = label_lengths.type(long_dtype).to(device)
            
            outputs = model(inputs, labels)

In [None]:
n_epochs = 10
float_dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor
long_dtype = torch.cuda.LongTensor if device == 'cuda' else torch.LongTensor

model.to(device)
model.train()

for epoch in range(n_epochs):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, (inputs, labels, labels_one_hot, label_lengths) in enumerate(train_loader):
        pdb.set_trace()
        inputs = inputs.type(float_dtype).to(device) # [B, 3, H, W]
        labels_one_hot = labels_one_hot.type(float_dtype).to(device) # [T, B, V]
        labels = labels.type(long_dtype).to(device) # [T, B, 1]
        label_lengths = label_lengths.type(long_dtype).to(device) # [B, 1]
        
        packed_labels = nn.utils.rnn.pack_padded_sequence(labels_one_hot, label_lengths)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        packed_outputs = model(inputs, packed_labels, label_lengths) # [T, B, V]
        outputs, input_sizes = nn.utils.rnn.pad_packed_sequence(packed_output)
        
        outputs = outputs.reshape(-1, vocab_size) # [T*B, V]
        labels = labels.reshape(-1) # [T*B*1]
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

> <ipython-input-23-df708ed4544b>(13)<module>()
-> inputs = inputs.type(float_dtype).to(device) # [B, 3, H, W]


(Pdb)  n


> <ipython-input-23-df708ed4544b>(14)<module>()
-> labels_one_hot = labels_one_hot.type(float_dtype).to(device) # [T, B, V]


(Pdb)  n


> <ipython-input-23-df708ed4544b>(15)<module>()
-> labels = labels.type(long_dtype).to(device) # [T, B, 1]


(Pdb)  


> <ipython-input-23-df708ed4544b>(16)<module>()
-> label_lengths = label_lengths.type(long_dtype).to(device) # [B, 1]


(Pdb)  


> <ipython-input-23-df708ed4544b>(18)<module>()
-> packed_labels = nn.utils.rnn.pack_padded_sequence(labels_one_hot, label_lengths)


(Pdb)  n


> <ipython-input-23-df708ed4544b>(20)<module>()
-> optimizer.zero_grad()


(Pdb)  packed_labels


PackedSequence(data=tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 1.]]), batch_sizes=tensor([8, 8, 8, 4, 1]), sorted_indices=None, unsorted_indices=None)


(Pdb)  n


> <ipython-input-23-df708ed4544b>(23)<module>()
-> packed_outputs = model(inputs, packed_labels, label_lengths) # [T, B, V]


(Pdb)  n


AttributeError: 'PackedSequence' object has no attribute 'size'
> <ipython-input-23-df708ed4544b>(23)<module>()
-> packed_outputs = model(inputs, packed_labels, label_lengths) # [T, B, V]


In [None]:
904 - 689

In [None]:
eos_char

In [14]:
!git clone https://github.com/yunjey/pytorch-tutorial

Cloning into 'pytorch-tutorial'...
remote: Enumerating objects: 816, done.[K
remote: Total 816 (delta 0), reused 0 (delta 0), pack-reused 816[K
Receiving objects: 100% (816/816), 12.78 MiB | 4.24 MiB/s, done.
Resolving deltas: 100% (432/432), done.
Checking connectivity... done.
