In [1]:
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

from collections import OrderedDict
from collections import namedtuple
from itertools import product

import numpy as np
import pandas as pd

import time
from IPython.display import clear_output
import json

import os

  warn(f"Failed to load image Python extension: {e}")


In [2]:
class RunBuilder():
    @staticmethod
    def get_runs(params):
        
        Run = namedtuple('Run',params.keys())
        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))
        return runs

In [18]:
class RunManager():
    def __init__(self):
        
        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        self.epoch_start_time = None
        
        self.run_params = None
        self.run_count = 0
        self.run_data = []
        self.run_start_time = None
        
        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self,run,network,loader):
        self.run_start_time = time.time()
        
        self.run_params = run
        self.run_count += 1
        
        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment = f'-{run}')
        images,labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)
        self.tb.add_image('images',grid)
        self.tb.add_graph(self.network,images)
        
    def end_run(self):
        self.tb.close()
        self.epoch_count = 0
        
    def begin_epoch(self):
        self.epoch_start_time = time.time()
        
        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
        
    def end_epoch(self):
        
        epoch_duration = time.time() - self.epoch_start_time
        run_duration = time.time() - self.run_start_time
        
        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)
        
        self.tb.add_scalar('Loss',loss,self.epoch_count)
        self.tb.add_scalar('Accuracy',accuracy,self.epoch_count)
        
        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name,param,self.epoch_count)
            self.tb.add_histogram(f'{name}.grad',param.grad,self.epoch_count)
            
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results["loss"] = loss
        results["accuracy"] = accuracy
        results["epoch duration"] = epoch_duration
        results["run duration"] = run_duration
        
        for k,v in self.run_params._asdict().items():
            results[k] = v
        
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns')
        
        clear_output(wait = True)
        display(df)
        
    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self,preds,labels):
        self.epoch_num_correct += self._get_num_correct(preds,labels)

    @torch.no_grad()
    def _get_num_correct(self,preds,labels):
        return preds.argmax(dim=1).eq(labels).sum().item()

    def save(self,fileName):

        pd.DataFrame.from_dict(
            self.run_data
            ,orient='columns').to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json','w',encoding = 'utf-8') as f:
            json.dump(self.run_data,f,ensure_ascii=False,indent = 4)

In [19]:
class Network(nn.Module):
    def __init__(self):
        super(Network,self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=6,kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6,out_channels=12,kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4,out_features=120)
        self.fc2 = nn.Linear(in_features=120,out_features=60)
        self.out = nn.Linear(in_features=60,out_features=10)
    def forward(self,t):
        t = t
        
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2,stride=2)
        
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t,kernel_size=2,stride=2)
        
        t = t.reshape(-1,12*4*4)
        t = F.relu(self.fc1(t))
        
        t = F.relu(self.fc2(t))
        
        t = self.out(t)
        
        return t

In [20]:
train_set = torchvision.datasets.FashionMNIST(
    root = './data/FashionMNIST'
    ,train = True
    ,download = True
    ,transform = transforms.Compose([
    transforms.ToTensor()
    ])
)

In [21]:
params  = OrderedDict(
    lr=[0.01]
    ,bs = [100],
    shuffle = [False]
    ,num_workers = [1]
    ,device = ['cuda']
)

In [7]:
runs = RunBuilder.get_runs(params)
runs
for run in RunBuilder.get_runs(params):
    comment = f'-{run}'

In [8]:
# train_loader = torch.utils.data.DataLoader(train_set,batch_size =bs, shuffle =shuffle)

In [9]:
# tb = SummaryWriter(comment = comment)

# network = Network()
# images,labels = next(iter(train_loader))
# grid = torchvision.utils.make_grid(images)

# tb.add_image('images',grid)
# tb.add_graph(network,images)
# tb.close()

In [10]:
m = RunManager()

for run in RunBuilder.get_runs(params):
    
    network = Network().to(run.device)
    loader = torch.utils.data.DataLoader(train_set,batch_size =run.bs, shuffle = run.shuffle,num_workers =run.num_workers)
    optimizer = optim.Adam(network.parameters(),lr =run.lr)
    
    m.begin_run(run,network,loader)
    for epoch in range(10):
        m.begin_epoch()
        for batch in loader:
            images,labels = batch
            images= images.to(run.device)
            labels= labels.to(run.device)
            preds = network(images.to(run.device))
            loss = F.cross_entropy(preds,labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            m.track_loss(loss)
            m.track_num_correct(preds,labels)
        m.end_epoch()
    m.end_run()
m.save('results')

Unnamed: 0,run,epoch,loss,accuracy,epoch duration,run duration,lr,bs,shuffle,num_workers,device
0,1,1,0.552135,0.791833,9.889768,23.230461,0.01,100,False,1,cuda
1,1,2,0.374483,0.859417,8.181095,31.63396,0.01,100,False,1,cuda
2,1,3,0.339731,0.87415,7.759777,39.494467,0.01,100,False,1,cuda
3,1,4,0.327213,0.878717,7.835297,47.44043,0.01,100,False,1,cuda
4,1,5,0.322413,0.880683,8.067546,55.63563,0.01,100,False,1,cuda
5,1,6,0.311987,0.884633,7.948007,63.722265,0.01,100,False,1,cuda
6,1,7,0.306199,0.886217,8.094311,72.004074,0.01,100,False,1,cuda
7,1,8,0.298544,0.89065,7.404153,79.583753,0.01,100,False,1,cuda
8,1,9,0.297448,0.889967,7.420842,87.186089,0.01,100,False,1,cuda
9,1,10,0.29391,0.8924,7.751744,95.135304,0.01,100,False,1,cuda


In [11]:
# plt.plot(epochs,x)
# plt.show()

In [12]:
# plt.plot(epochs,y)

In [13]:
# total_correct/len(train_set)

In [29]:
@torch.no_grad()
def get_all_preds(model,loader):
    all_preds = torch.tensor([])
    all_preds = all_preds.to(run.device)
    for batch in loader:
        images,labels = batch
        images = images.to(run.device)
        preds = model(images)
        all_preds = torch.cat((all_preds,preds),dim =0)
    return all_preds

In [30]:
with torch.no_grad():
    pred_loader = torch.utils.data.DataLoader(train_set,batch_size = 10000)
    train_preds = get_all_preds(network,pred_loader)

In [32]:
conf_matrix = torch.zeros(10,10,dtype= torch.int32)

stacked = torch.stack(
    (train_set.targets,train_preds.argmax(dim=1)),dim =1
    )

for p in stacked:
    true,predicted = p.tolist()
    conf_matrix[true,predicted] += 1

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument tensors in method wrapper___cat)

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt

names = ('T-shirt/top','Trouser','Pullover','Dress','Coat','Sandal','Shirt','Sneaker','Bag','Ankle Boot')

def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    
plt.figure(figsize =(10,10))
plot_confusion_matrix(conf_matrix,names)

In [None]:
a = torch.tensor([[1, 2, 3], [4, 5, 6]])
a =a.to('cuda')
a

In [None]:
network = network.cuda()
for name,params in network.named_parameters():
    print(params.device,params.shape)