In [48]:
import argparse
from os.path import dirname
import torch
import torchvision
import os
import numpy as np
import tqdm

from utils.models import Classifier
from torch.utils.tensorboard import SummaryWriter
from utils.loader import Loader
from utils.loss import cross_entropy_loss_and_accuracy
from utils.dataset import NCaltech101
from torch.utils.data.dataloader import default_collate


In [49]:
torch.manual_seed(777)
np.random.seed(777)

In [50]:
validation_dataset="/ws/data/N-Caltech101/validation/"
training_dataset="/ws/data/N-Caltech101/training/"
log_dir="/ws/external/log/temp"
device="cuda:0"
num_workers=4
pin_memory=True
batch_size=4
num_epochs=2
save_every_n_epochs=2
checkpoint = "/ws/external/exp1/model_best.pth" # model_best.pth checkpoint_13625_0.5990.pth
    
    
assert os.path.isdir(dirname(log_dir)), f"Log directory root {dirname(log_dir)} not found."
assert os.path.isdir(validation_dataset), f"Validation dataset directory {validation_dataset} not found."
assert os.path.isdir(training_dataset), f"Training dataset directory {training_dataset} not found."

print(f"----------------------------\n"
      f"Starting training with \n"
      f"num_epochs: {num_epochs}\n"
      f"batch_size: {batch_size}\n"
      f"device: {device}\n"
      f"log_dir: {log_dir}\n"
      f"training_dataset: {training_dataset}\n"
      f"validation_dataset: {validation_dataset}\n"
      f"----------------------------")




----------------------------
Starting training with 
num_epochs: 2
batch_size: 4
device: cuda:0
log_dir: /ws/external/log/temp
training_dataset: /ws/data/N-Caltech101/training/
validation_dataset: /ws/data/N-Caltech101/validation/
----------------------------


In [51]:
def percentile(t, q):
    B, C, H, W = t.shape
    k = 1 + round(.01 * float(q) * (C * H * W - 1))
    result = t.view(B, -1).kthvalue(k).values
    return result[:,None,None,None]

def create_image(representation):
    B, C, H, W = representation.shape
    representation = representation.view(B, 3, C // 3, H, W).sum(2)

    # do robust min max norm
    representation = representation.detach().cpu()
    robust_max_vals = percentile(representation, 99)
    robust_min_vals = percentile(representation, 1)

    representation = (representation - robust_min_vals)/(robust_max_vals - robust_min_vals)
    representation = torch.clamp(255*representation, 0, 255).byte()

    representation = torchvision.utils.make_grid(representation)

    return representation
    

In [52]:
class Loader:
    def __init__(self, dataset, batch_size=2, num_workers=2, pin_memory=True, device="cuda:0"):
        self.device = device
        split_indices = list(range(len(dataset)))
        sampler = torch.utils.data.sampler.SubsetRandomSampler(split_indices)
        self.loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=sampler,
                                             num_workers=num_workers, pin_memory=pin_memory,
                                             collate_fn=collate_events)

    def __iter__(self):
        for data in self.loader:
            data = [d.to(self.device) for d in data]
            yield data

    def __len__(self):
        return len(self.loader)

def collate_events(data):
    labels = []
    events = []
    for i, d in enumerate(data):
        labels.append(d[1])
        ev = np.concatenate([d[0], i*np.ones((len(d[0]),1), dtype=np.float32)],1)
        events.append(ev)
    events = torch.from_numpy(np.concatenate(events,0))
    labels = default_collate(labels)
    return events, labels

In [53]:
# datasets, add augmentation to training set
training_dataset = NCaltech101(training_dataset, augmentation=True)
validation_dataset = NCaltech101(validation_dataset)

# construct loader, handles data streaming to gpu
training_loader = Loader(training_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True, device="cuda:0")
validation_loader = Loader(validation_dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True, device="cuda:0")

In [61]:
# model, and put to device
model = Classifier(pretrained=False)
ckpt = torch.load(checkpoint)
model.load_state_dict(ckpt["state_dict"])
model = model.to(device)

# # optimizer and lr scheduler
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
# lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.5)

writer = SummaryWriter(log_dir)

iteration = 0
min_validation_loss = 1000

In [62]:
_, (events, labels) = next(enumerate(validation_loader))

In [63]:
np.shape(events)

torch.Size([624687, 5])

In [64]:
labels

tensor([24, 32, 73, 38], device='cuda:0')

In [65]:
optimizer.zero_grad()
pred_labels, representation = model(events)
# loss, accuracy = cross_entropy_loss_and_accuracy(pred_labels, labels)
# loss.backward()
optimizer.step()

In [66]:
pred_labels.argmax(dim=1)

tensor([ 33,  76,  92, 100], device='cuda:0')

In [60]:
np.shape(pred_labels)

torch.Size([4, 101])

In [28]:
representation_vizualization = create_image(representation)
writer.add_image("training/representation", representation_vizualization, iteration)

In [5]:

for i in range(num_epochs):
    sum_accuracy = 0
    sum_loss = 0
    model = model.eval()

    print(f"Validation step [{i:3d}/{num_epochs:3d}]")
    for events, labels in tqdm.tqdm(validation_loader):

        with torch.no_grad():
            pred_labels, representation = model(events)
            loss, accuracy = cross_entropy_loss_and_accuracy(pred_labels, labels)

        sum_accuracy += accuracy
        sum_loss += loss

    validation_loss = sum_loss.item() / len(validation_loader)
    validation_accuracy = sum_accuracy.item() / len(validation_loader)

    writer.add_scalar("validation/accuracy", validation_accuracy, iteration)
    writer.add_scalar("validation/loss", validation_loss, iteration)

    # visualize representation
    representation_vizualization = create_image(representation)
    writer.add_image("validation/representation", representation_vizualization, iteration)

    print(f"Validation Loss {validation_loss:.4f}  Accuracy {validation_accuracy:.4f}")

    if validation_loss < min_validation_loss:
        min_validation_loss = validation_loss
        state_dict = model.state_dict()

        torch.save({
            "state_dict": state_dict,
            "min_val_loss": min_validation_loss,
            "iteration": iteration
        }, "log/model_best.pth")
        print("New best at ", validation_loss)

    if i % save_every_n_epochs == 0:
        state_dict = model.state_dict()
        torch.save({
            "state_dict": state_dict,
            "min_val_loss": min_validation_loss,
            "iteration": iteration
        }, "log/checkpoint_%05d_%.4f.pth" % (iteration, min_validation_loss))

    sum_accuracy = 0
    sum_loss = 0

    model = model.train()
    print(f"Training step [{i:3d}/{num_epochs:3d}]")
    for events, labels in tqdm.tqdm(training_loader):
        optimizer.zero_grad()

        pred_labels, representation = model(events)
        loss, accuracy = cross_entropy_loss_and_accuracy(pred_labels, labels)

        loss.backward()

        optimizer.step()

        sum_accuracy += accuracy
        sum_loss += loss

        iteration += 1

    if i % 10 == 9:
        lr_scheduler.step()

    training_loss = sum_loss.item() / len(training_loader)
    training_accuracy = sum_accuracy.item() / len(training_loader)
    print(f"Training Iteration {iteration:5d}  Loss {training_loss:.4f}  Accuracy {training_accuracy:.4f}")

    writer.add_scalar("training/accuracy", training_accuracy, iteration)
    writer.add_scalar("training/loss", training_loss, iteration)

    representation_vizualization = create_image(representation)
    writer.add_image("training/representation", representation_vizualization, iteration)

usage: Train classifier using a learnt quantization layer. [-h]
                                                           --validation_dataset
                                                           VALIDATION_DATASET
                                                           --training_dataset
                                                           TRAINING_DATASET
                                                           --log_dir LOG_DIR
                                                           [--device DEVICE]
                                                           [--num_workers NUM_WORKERS]
                                                           [--pin_memory PIN_MEMORY]
                                                           [--batch_size BATCH_SIZE]
                                                           [--num_epochs NUM_EPOCHS]
                                                           [--save_every_n_epochs SAVE_EVERY_N_EPOCHS]
Train classifier using a 

SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
