In [1]:
import os

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # visualization
import seaborn as sns # visualization
# machine learning
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets, models

!pip3 install progressbar
import progressbar

In [17]:
# implemented classes
from BasicClassifier import BasicClassifier
from DataAugment import DataAug
from Metrics import Metrics

In [18]:
daug = DataAug()

In [19]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [20]:
widgets = [
    ' [', progressbar.Timer(), '] ',
    progressbar.Percentage(), ' ',
    progressbar.Bar(),
    ' (', progressbar.ETA(), ') ',
]

In [21]:
# import mnist dataset
dataset = "MNIST"
BATCH_SIZE = 500
num_classes = 10

train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('./data',
                               train=True,
                               download=True,
                               transform=torchvision.transforms.ToTensor()),
    batch_size=BATCH_SIZE,shuffle=True)

test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('./data',
                               train=False,
                               download=True,
                               transform=torchvision.transforms.ToTensor()),
    batch_size=BATCH_SIZE,shuffle=True)

# Model Trainer

In [22]:
def train(model,train_loader,test_loader,proportion,funcs,func_proportions,inject=False,NUM_CLASSSES=10,NUM_EPOCHS=25):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(),lr=3e-5)
    
    epoch_metrics = {}
    epoch_test_metrics = {}
    
    bar = progressbar.ProgressBar(NUM_EPOCHS*len(train_loader),widgets=widgets).start()
    for epoch in range(NUM_EPOCHS):
        model.metric.reset_confusion_matrix(NUM_CLASSSES)
        
        for i, (inputs, labels) in enumerate(train_loader):
            optimizer.zero_grad()
            
            randomize = np.arange(inputs.shape[0])
            lossyinputs = func_divider(inputs,proportion,funcs,func_proportions,inject)
            np.random.shuffle(randomize)
            lossyinputs = lossyinputs[randomize]
            labels = labels[randomize]
            
            outputs = model(lossyinputs.to(device))
            loss = criterion(outputs, labels.to(device))
            loss.backward()
            optimizer.step()
            # statistics
            model.metric.update_confusion_matrix(outputs.to('cpu'),labels)
            # progressbar
            bar.update(epoch*len(train_loader)+i)
        epoch_metrics[epoch] = model.metric.classification_metrics()
        
        with torch.no_grad():
            model.metric.reset_confusion_matrix(NUM_CLASSSES)
            for (inputs, labels) in test_loader:
                outputs = model(func_divider(inputs,proportion,funcs,func_proportions,inject).to(device))
                # statistics
                model.metric.update_confusion_matrix(outputs.to('cpu'),labels)
                # progressbar
            epoch_test_metrics[epoch] = model.metric.classification_metrics()
            
    return (epoch_metrics,epoch_test_metrics)

def func_divider(inputs,proportion,funcs,func_proportions,inject):
    func_num = np.ceil(func_proportions*inputs.shape[0]).astype(int)
    lossyinputs = torch.clone(inputs)
    h = 0
    t = 0
    for i, func in enumerate(funcs):
        t += func_num[i]
        lossyinputs[h:t] = funcs[func](lossyinputs[h:t],proportion,inject)
        h += t
    return lossyinputs
    
def display_training_metrics(name,epoch_metrics):
    sns.lineplot(x=list(epoch_metrics.keys()),y=np.array(list(epoch_metrics.values()),dtype=float)[:,1]) # precision
    sns.lineplot(x=list(epoch_metrics.keys()),y=np.array(list(epoch_metrics.values()),dtype=float)[:,2]) # recall
    sns.lineplot(x=list(epoch_metrics.keys()),y=np.array(list(epoch_metrics.values()),dtype=float)[:,0],) # accuracy
    plt.xlabel('epoch')
    plt.title(name)
    
def display_testing_metrics(name,epoch_metrics):
    sns.lineplot(x=list(epoch_metrics.keys()),y=np.array(list(epoch_metrics.values()),dtype=float)[:,1]) # precision
    sns.lineplot(x=list(epoch_metrics.keys()),y=np.array(list(epoch_metrics.values()),dtype=float)[:,2]) # recall
    sns.lineplot(x=list(epoch_metrics.keys()),y=np.array(list(epoch_metrics.values()),dtype=float)[:,0],) # accuracy
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.xlabel('% loss')
    plt.title(name)
    
def display_testing_metrics_hist(name,metrics):
    sns.barplot(x=['accuracy','precision','recall'],y=metrics[0:3])
    plt.ylim([0, 1])
    plt.title(name)

In [23]:
func_dict = {'rand_pixel':getattr(daug, 'rand_pixel'),
             'rand_row':getattr(daug, 'rand_row'),
             'rand_column':getattr(daug, 'rand_column'),
             'rand_rowcol':getattr(daug, 'rand_rowcol'),
             'rand_block':getattr(daug, 'rand_rowcol'),
             'pattern_checkerboard':getattr(daug,'pattern_checkerboard'),
             'pattern_column':getattr(daug,'pattern_column'),
             'pattern_row':getattr(daug,'pattern_row')}

# Hybrid Model Generator

In [9]:
name = "rand_pixel"
proportion = 0.5
funcs = {
    'rand_pixel':getattr(daug, 'rand_pixel'),
     'rand_row':getattr(daug, 'rand_row'),
     'rand_column':getattr(daug, 'rand_column'),
     'rand_rowcol':getattr(daug, 'rand_rowcol'),
     'rand_block':getattr(daug, 'rand_rowcol')
    }

func_proportions = np.array([1./len(funcs)]*len(funcs))

In [None]:
model = BasicClassifier(num_classes)
model.to(device)
(train_metrics,test_metrics) = train(model,train_loader,test_loader,proportion,funcs,func_proportions,NUM_CLASSSES=num_classes,NUM_EPOCHS=250)

In [None]:
display_training_metrics('{} - train'.format(name),train_metrics)
plt.show()
display_training_metrics('{} - test'.format(name),test_metrics)
plt.show()

In [None]:
torch.save(model.state_dict(),'./{}.h5'.format(name))

# Multi Model Generator

In [24]:
model_dict = {
    'rand_pixel':None,
    'rand_row':None,
    'rand_column':None,
    'rand_rowcol':None,
    'rand_block':None,
#      'pattern_checkerboard':None,
#      'pattern_column':None,
#      'pattern_row':None
    }

proportion = 0.5

model_training_info = {model:[None,None] for model in model_dict}

for model in model_dict:
    model_dict[model] = BasicClassifier(num_classes)
    model_dict[model].to(device)
    funcs = {model:getattr(daug,model)} # train on its own dataset
    func_proportions = [1./len(funcs)]*len(funcs) # evenly split training among data augment functions
    model_training_info[model] = train(model_dict[model],train_loader,test_loader,proportion,funcs,func_proportions,True,NUM_CLASSSES=num_classes,NUM_EPOCHS=120)

In [25]:
for model in model_training_info: 
    display_training_metrics('{} - train'.format(model),model_training_info[model][0])
    plt.show()
    display_training_metrics('{} - test'.format(model),model_training_info[model][1])
    plt.show()

In [36]:
for prop in [0.25,0.5,0.75,'definite']:
    try:
        os.makedirs('./models/{}/train({})'.format(dataset,prop))
    except:
        pass # folder exists

In [34]:
for model in model_dict:
    torch.save(model_dict[model].state_dict(),'./models/{}/train({})/{}.h5'.format(dataset,proportion,model))