In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as tf
import time

import numpy as np

from model import CRNN_Network

In [2]:
# Class for loading in dataset

import pandas as pd
class BachDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file: str, root_dir: str):
        """
        Arguments:
            csv_file (string): Path to the csv file.
            root_dir (string): Directory with all the images.
        """
        self.data = pd.read_csv(root_dir + csv_file)
        self.root_dir = root_dir

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx: int = 0):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        print(self.root_dir + f'invent{idx+1}32-page1.png')
        img = torchvision.io.read_image(self.root_dir + f'invent{idx+1}32-page1.png')
        ly_list= []
        with open(self.root_dir + f'invent{idx+1}32.ly', 'r') as file:
            ly_list.append
            ly = np.fromstring(file.read().replace('\n', ''), dtype='str', sep=' ')
        
        sample = {'img': img, 'ly': ly}

        return sample


In [4]:
from nltk.tokenize import word_tokenize

sample_text = "this is a text ready to tokenize"
tokens = word_tokenize(sample_text)
print(tokens)

NLTK Downloader
---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------

---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------

Download which package (l=list; x=cancel)?
Packages:
  [ ] abc................. Australian Broadcasting Commission 2006
  [ ] alpino.............. Alpino Dutch Treebank
  [ ] averaged_perceptron_tagger_ru Averaged Perceptron Tagger (Russian)
  [ ] basque_grammars..... Grammars for Basque
  [ ] bcp47............... BCP-47 Language Tags
  [ ] biocreative_ppi..... BioCreAtIvE (Critical Assessment of Information
                           Extraction Systems in Biology)
  [ ] bllip_wsj_no_aux.... BLLIP Parser: WSJ Model
  [ ] book_gram

LookupError: 
**********************************************************************
  Resource [93mpunkt[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt/PY3/english.pickle[0m

  Searched in:
    - '/home/evanmm3/nltk_data'
    - '/home/evanmm3/research/venv/nltk_data'
    - '/home/evanmm3/research/venv/share/nltk_data'
    - '/home/evanmm3/research/venv/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
    - ''
**********************************************************************


In [None]:
from quant_parse import quant_parse

# Data normalization: get optimal quantization (1, 2, 4, 8, 16, 32) from .ly file.
# This has been done pre-training, but this block shows what code was run:
"""
for i in range(1, 16):
    q = quant_parse(i)
    print(q)
"""

# Optimal Quantization was used as a parameter in convert.sh for data normalization.

q = quant_parse(2)
q

In [None]:
import matplotlib.pyplot as plt

def imshow(img):
    npimg = img.numpy()
    plt.imshow(npimg)
    plt.show()

batch_size = 128

root_dir = 'outputs/bach/invent/'

trainset = BachDataset(csv_file='dataset.csv', root_dir=root_dir)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

dataiter = iter(trainloader)

img, ly = next(dataiter)

imshow(torchvision.utils.make_grid(img[:1], padding=0))


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
from torch.nn import Module
from torch.optim import Optimizer 
from torch import Tensor
from torch.utils.data import DataLoader

net = CRNN_Network()
net = net.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.0001)

In [None]:
# Training Loop for CNN
def train(crnn: CRNN_Network, num_epochs: int):
    for e in range(num_epochs):

        running_loss: float = 0.

        for i, (img, ly) in trainloader:
            images, lys = img.to(device), ly.to(device)

            optimizer.zero_grad()

            outputs = crnn(images)
            loss = criterion(outputs, lys)
            loss.backward()
            optimizer.step()

            predicted = torch.argmax(outputs, 1).to(device)

            running_loss += loss.item()

            print_freq = 100
            if i % print_freq == (print_freq - 1):
                print(f'[{e + 1}, {i + 1:5d}] loss: {running_loss / print_freq:.3f} acc: {100 * running_correct / running_total:.2f} time: {time.time() - start_time:.2f}')
                running_loss, running_correct, running_total = 0.0, 0.0, 0.0
                start_time = time.time()

    print("Training complete!")


In [None]:
num_epochs = 45
init_lr = 0.001
decay_epochs = 15
task = 'rotation'
train(net, criterion, optimizer, num_epochs, decay_epochs, init_lr, task)
torch.save(net.state_dict(), f"./model-ep{num_epochs}-initlr_{init_lr}-decay{decay_epochs}-{task}.pth")