In [None]:
import numpy as np
import torch
from torch import nn, optim
import torchvision
from torchvision import datasets, transforms
from Models.selective_sequential import *
from Loss.triplet_regularized import *
from session import *
from LR_Schedule.cyclical import Cyclical
from LR_Schedule.cos_anneal import CosAnneal
from LR_Schedule.lr_find import lr_find
from callbacks import *
from validation import *
import Datasets.ImageData as ImageData
from Transforms.ImageTransforms import *
import util
from session import LossMeter, EvalModel
from Layers.flatten import Flatten

%load_ext autoreload
%autoreload 2

torch.cuda.set_device(0); torch.backends.cudnn.benchmark=True;

In [None]:
transform = transforms.Compose([transforms.ToTensor(), 
                                transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])

fulltrainset = torchvision.datasets.CIFAR10(root='/media/drake/MX500/Datasets/cifar-10/train', train=True,
                                        download=True, transform=transform)
trainset = torch.utils.data.dataset.Subset(fulltrainset, np.arange(3200))

fullvalset = torchvision.datasets.CIFAR10(root='/media/drake/MX500/Datasets/cifar-10/test', train=False,
                                       download=True, transform=transform)
valset = torch.utils.data.dataset.Subset(fullvalset, np.arange(3200))

trainloader = torch.utils.data.DataLoader(fulltrainset, batch_size=32, shuffle=True)
valloader = torch.utils.data.DataLoader(fullvalset, batch_size=32, shuffle=False)

In [None]:
resnet = torchvision.models.resnet18(pretrained=False)
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Sequential()

#model = SelectiveSequential(
#    ['act1', 'act2', 'out'],
#    {'conv32a': resnet,
#       
#     'fc1': nn.Linear(num_ftrs, 1000),
#     'act1': nn.ReLU(True),
#     #'drop1': nn.Dropout(.05),
#     'fc2': nn.Linear(1000, 1000),
#     'act2': nn.ReLU(True),
#     #'drop1': nn.Dropout(.05),
#     'out': nn.Linear(1000, 10)})

model = SelectiveSequential(
    ['act64', 'act1', 'out'],
    {'conv64': nn.Conv2d(3, 64, kernel_size=5, padding=2),
     'act64': nn.ReLU(True),
     
     'max1': nn.MaxPool2d(kernel_size=2, stride=2),
    
     'conv192': nn.Conv2d(64, 192, kernel_size=5, padding=2),
     'act192': nn.ReLU(True),
    
     'max2': nn.MaxPool2d(kernel_size=2, stride=2),
    
     'conv384': nn.Conv2d(192, 384, kernel_size=3, padding=1),
     'act384': nn.ReLU(True),
     
     'conv256a': nn.Conv2d(384, 256, kernel_size=3, padding=1),
     'act256a': nn.ReLU(True),
     
     'conv256b': nn.Conv2d(256, 256, kernel_size=3, padding=1),
     'act256b': nn.ReLU(True),
     
     'max3': nn.MaxPool2d(kernel_size=2, stride=2),
    
     # 'avgpool': nn.AdaptiveAvgPool2d((6, 6)),
    
     'flatten': Flatten(),
     'fc1': nn.Linear(4 * 4 * 256, 2048),
     'act1': nn.ReLU(True),
     'fc2': nn.Linear(2048, 2048),
     'act2': nn.ReLU(True),
     'out': nn.Linear(2048, 10)})

In [None]:
criterion = TripletRegularizedCrossEntropyLoss(0, .5)

In [None]:
sess = Session(model, criterion, optim.AdamW, 1e-4)

In [None]:
validator = EmbeddingSpaceValidator(valloader, 2, CustomOneHotAccuracy)
lr_scheduler = CosAnneal(len(trainloader) * 60, T_mult=1, lr_min=1e-7)
schedule = TrainingSchedule(trainloader, [lr_scheduler, validator])
sess.train(schedule, 60)

In [None]:
validator.plot()

In [None]:
print(np.max(validator.val_accuracies), " Best validation accuracy without reg")
print(np.max(validator.train_accuracies), " Best train accuracy without reg")

resnet2 = torchvision.models.resnet18(pretrained=False)
num_ftrs = resnet2.fc.in_features
resnet2.fc = nn.Sequential()

model2 = SelectiveSequential(
    ['act1', 'act2', 'out'],
    {'conv32a': resnet2,
       
     'fc1': nn.Linear(num_ftrs, 1000),
     'act1': nn.ReLU(True),
     #'drop1': nn.Dropout(.05),
     'fc2': nn.Linear(1000, 1000),
     'act2': nn.ReLU(True),
     #'drop1': nn.Dropout(.05),
     'out': nn.Linear(1000, 10)})

model2

In [None]:
model2 = SelectiveSequential(
    ['act64', 'act1', 'out'],
    {'conv64': nn.Conv2d(3, 64, kernel_size=5, padding=2),
     'act64': nn.ReLU(True),
     
     'max1': nn.MaxPool2d(kernel_size=2, stride=2),
    
     'conv192': nn.Conv2d(64, 192, kernel_size=5, padding=2),
     'act192': nn.ReLU(True),
    
     'max2': nn.MaxPool2d(kernel_size=2, stride=2),
    
     'conv384': nn.Conv2d(192, 384, kernel_size=3, padding=1),
     'act384': nn.ReLU(True),
     
     'conv256a': nn.Conv2d(384, 256, kernel_size=3, padding=1),
     'act256a': nn.ReLU(True),
     
     'conv256b': nn.Conv2d(256, 256, kernel_size=3, padding=1),
     'act256b': nn.ReLU(True),
     
     'max3': nn.MaxPool2d(kernel_size=2, stride=2),
    
     # 'avgpool': nn.AdaptiveAvgPool2d((6, 6)),
    
     'flatten': Flatten(),
     'fc1': nn.Linear(4 * 4 * 256, 2048),
     'act1': nn.ReLU(True),
     'fc2': nn.Linear(2048, 2048),
     'act2': nn.ReLU(True),
     'out': nn.Linear(2048, 10)})

In [None]:
criterion = TripletRegularizedCrossEntropyLoss(0.1, .5)

In [None]:
sess = Session(model2, criterion, optim.AdamW, 1e-4)

In [None]:
# lr_find(sess, trainloader, start_lr=1e-12)

In [None]:
# sess.set_lr(1e-2)

In [None]:
validator2 = EmbeddingSpaceValidator(valloader, 2, CustomOneHotAccuracy)
lr_scheduler2 = CosAnneal(len(trainloader), T_mult=1, lr_min=1e-7)
schedule2 = TrainingSchedule(trainloader, [validator2])
sess.train(schedule2, 63)

In [None]:
validator2.plot()

In [None]:
print(np.max(validator2.val_accuracies), "Best accuracy with reg")
print(np.max(validator.val_accuracies), "Best accuracy without reg")