In [1]:
import torch
import torchvision
from torchvision import transforms
torch.set_printoptions(linewidth=120)

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import pandas as pd
import time
import json

from collections import OrderedDict
from collections import namedtuple
from itertools import product

from IPython.display import display, clear_output


##### **Trainingset**

In [2]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data/FashionMNIST',train=True,download=False,
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
)

#### **NetWork definition**

In [3]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6,out_channels=12,kernel_size=5)
        self.fc1 = nn.Linear(in_features=12*4*4,out_features=120)
        self.fc2 = nn.Linear(in_features=120,out_features=60)
        self.out = nn.Linear(in_features=60,out_features=10)
        
    def forward(self,t):
        #forward
        # 1st hidden conv
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2,stride=2)
        
        # 2nd hidden conv
        t= self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2,stride=2)
        
        #1st linear Dense layer
        t = t.reshape(-1,12*4*4)
        t = self.fc1(t)
        t = F.relu(t)
        
        #2nd linear Dense Layer
        t = self.fc2(t)
        t = F.relu(t)
        
        #prediction layer
        t= self.out(t)
        
        #we will use xentropy loss function which implicitely compute softmax
        #t = F.softmax(t,dim=1)
        
        return t

#### **build Paramaters**

In [4]:
class TuningBuilder():
    @staticmethod
    def get_params(params):

        Params = namedtuple('Params', params.keys())

        runs = []
        [runs.append(Params(*v)) for v in product(*params.values())]
        
        return runs

#### **Run Manager**

In [5]:
class RunManager():
    def __init__(self):
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None

        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self, run, network, loader):

        self.run_start_time = time.time()

        self.run_params = run
        self.run_count += 1

        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images)
    
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
    
    def begin_epoch(self):
        self.epoch_start_time = time.time()

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
    
    def end_epoch(self):
        current_time = time.time()    
        epoch_duration = current_time - self.epoch_start_time
        run_duration = current_time - self.run_start_time

        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
        
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy
        results['epoch duration'] = epoch_duration
        results['run duration'] = run_duration
#         for k,v in self.run_params._asdict().items(): results[k] = v
        results.update(self.run_params._asdict())
        self.run_data.append(results)

        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        clear_output(wait=True)
        display(df)
    
    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
        
    @torch.no_grad()
    def _get_num_correct(self, preds, labels):
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        pd.DataFrame.from_dict(
            self.run_data, orient='columns'
        ).to_csv(f'{fileName}.csv')
        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

### **Run Hyperparameters search and save results + tensorboard**

In [8]:
params = OrderedDict(
    lr = [.01, .001]
    ,batch_size = [16, 128,512],
    shuffle = [True,False]
)


tunings = TuningBuilder.get_params(params)

run_mngr = RunManager()

for tuning_ in tunings:
    network = Network()
    train_loader = DataLoader(train_set,batch_size=tuning_.batch_size,shuffle=tuning_.shuffle)
    optimizer = optim.Adam(network.parameters(),lr=tuning_.lr)
    
    run_mngr.begin_run(tuning_,network,train_loader)
    
    for epoch in range(15):
        run_mngr.begin_epoch()
        
        for batch in train_loader:
            images_,labels_ = batch
            preds_  =  network(images_)
            loss = F.cross_entropy(preds_,labels_)
            optimizer.zero_grad()
            loss.backward() #calculate gradients
            optimizer.step()
            
            run_mngr.track_loss(loss)
            run_mngr.track_num_correct(preds_,labels_)
            
        run_mngr.end_epoch()
    run_mngr.end_run()
run_mngr.save('results/results')


Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,batch_size,shuffle
0,1,1,0.615827,0.771083,39.321633,39.366830,0.010,16,True
1,1,2,0.493886,0.818633,39.740083,79.252358,0.010,16,True
2,1,3,0.475547,0.827917,29.199857,108.515252,0.010,16,True
3,1,4,0.465634,0.831883,29.222571,137.803357,0.010,16,True
4,1,5,0.464708,0.833450,29.383556,167.251486,0.010,16,True
5,1,6,0.459714,0.835633,29.403582,196.721485,0.010,16,True
6,1,7,0.460601,0.836033,29.320731,226.107949,0.010,16,True
7,1,8,0.470613,0.834367,31.769096,257.942766,0.010,16,True
8,1,9,0.460796,0.837883,43.223933,301.315907,0.010,16,True
9,1,10,0.454590,0.837600,43.326866,344.787131,0.010,16,True


In [9]:
train_loader.batch_size

512