#### Setup and load torch

In [None]:
%load_ext autoreload
%autoreload 2

from IPython.core.display import display, HTML
import sys,cv2
sys.path.append('../')
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

from Utils.utils import *
from Utils.f1_score import *
from ipywidgets import interact
%matplotlib inline
display(HTML("<style>.container { width:100% !important; }</style>"))

from jupyterthemes import jtplot
# set "context" (paper, notebook, talk, poster)
jtplot.style(theme='grade3',context='talk', fscale=2.5, spines=True, gridlines='-',ticks=True, grid=True, figsize=(6, 4.5))
plotcolor = (0, 0.6, 1.0)

import torch
import torchvision
import torchvision.models as models
import torch.optim as optim
import torch.nn as nn

import timeit,gc

print('Done.')

In [None]:
%%time

#Load training data
import deepdish as dd

data_folder = 'D:/data/HPA/all/'

d = dd.io.load(data_folder+'poi_0.h5')
X = d['X'].astype(np.float32) / 255.0 # torch likes float images

y = d['labels']

print("Done")

In [None]:
from skimage import io, transform
from Utils.utils import *

X_small = np.zeros((5000,224,224,1),dtype=np.float32)
for i,img in enumerate(X):
    if i % 25 == 0:
        printProgressBar (i, X.shape[0], prefix = 'Resizing images...', suffix = '(' + str(i) + '/' + str(X.shape[0]) + ')')
    X_small[i] = np.expand_dims(transform.resize(img.squeeze(), (224, 224)),axis=2)

X = X_small
    
print("Shapes are:")
print(X.shape)
print(y.shape)
print("Done.")

In [None]:
from Models.simpleConvNet import *
#Setup resnet model and optimizer

net = models.resnet18(pretrained=False)
# net.fc = nn.Linear(51200, 28) #adapt last layer to allow larger input images
net.fc = nn.Linear(512, 28) #adapt last layer to allow larger input images
net.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False)

# net = simpleConvNet()

#BCE Loss
weights = np.asarray(y.shape[0]/np.sum(y,axis=0))
weights[15] = y.shape[0]
print("Weights = ",weights) # we weight classes given their skewed distribution
criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(weights,dtype=torch.float))

optimizer = optim.Adam(net.parameters(),lr=0.0001)

print("Using GPU:",torch.cuda.is_available())
print("Using device ",torch.cuda.get_device_name(0))

gc.collect()

print("Done.")

In [None]:
# Start training
gc.collect() 

epochs = 1
batch_size = 50
time_per_epoch = 0

iterations_per_epoch = round(30000 / batch_size)
runtime = 0
partNr = 1

for epoch in range(epochs):
    running_loss = 0
    running_f1 = 0
    
    current_buffer_size = X.shape[0]
    actual_idx = 0
    for i in range(iterations_per_epoch):
        if actual_idx*batch_size > y.shape[0]:
            print("Loading data part " + str(partNr))
            d = dd.io.load(data_folder+'poi_'+str(partNr)+'.h5')
            X = d['X'].astype(np.float32) / 255.0 # torch likes float images
            y = d['labels']
            
            X_small = np.zeros((X.shape[0],224,224,1),dtype=np.float32)
            for i,img in enumerate(X):
                if i % 25 == 0:
                    printProgressBar (i, X.shape[0], prefix = 'Resizing images...', suffix = '(' + str(i) + '/' + str(X.shape[0]) + ')')
                X_small[i] = np.expand_dims(transform.resize(img.squeeze(), (224, 224)),axis=2)

            X = X_small
            
            actual_idx = 0
            partNr += 1
            print("Done.")
        
        start = timeit.default_timer() #measure time
        
        X_batch = torch.tensor(X[actual_idx:actual_idx+batch_size].transpose(0,3,1,2))
        y_batch = torch.tensor(y[actual_idx:actual_idx+batch_size].astype(np.float32),dtype=torch.float)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        outputs = net(X_batch)
        
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        #compute F1 scores
        act = torch.sigmoid(outputs)
        
        label = y_batch.detach().numpy().astype(np.bool)
        logits = act.detach().numpy() > 0.5
        print("Targets in batch = ",np.sum(label),"Predicted targets = ",np.sum(logits))
        running_f1 += f1_score(label,logits)
        
        #measure runtime
        stop = timeit.default_timer()
        time_per_epoch = 0.5 * time_per_epoch + 0.5 * (stop-start) * iterations_per_epoch
        runtime += (stop-start)
        #print performance metrics
        N = ((i+1)*batch_size)
        print('[epoch = (%d/%d), iteration = (%3d/%d), time = %3ds, est. time per epoch = %5ds] \t loss = %.5f ## F1 = %.5f'\
              %(epoch + 1, epochs,i + 1, iterations_per_epoch, runtime, time_per_epoch, running_loss / N, running_f1 / (i+1)))
        
        actual_idx += 1

In [None]:
%%time
#Load validation data
d = dd.io.load(data_folder+'poi_6.h5')
Xval = d['X'].astype(np.float32) / 255.0 # torch likes float images
yval = d['labels']

print("Done")

In [None]:

Xval_small = np.zeros((Xval.shape[0],224,224,1),dtype=np.float32)
for i,img in enumerate(Xval):
    if i % 25 == 0:
        printProgressBar (i, X.shape[0], prefix = 'Resizing images...', suffix = '(' + str(i) + '/' + str(Xval.shape[0]) + ')')
    Xval_small[i] = np.expand_dims(transform.resize(img.squeeze(), (224, 224)),axis=2)

Xval = Xval_small
    
print("Shapes are:")
print(Xval.shape)
print(yval.shape)
print("Done.")

In [None]:
# Run validation
gc.collect() 

batch_size = 67
time_per_epoch = 0
iterations_per_epoch = round(Xval.shape[0] / batch_size)
runtime = 0
running_loss = 0
running_f1 = 0

for i in range(iterations_per_epoch):
    start = timeit.default_timer() #measure time

    X_batch = torch.tensor(Xval[i:i+batch_size].transpose(0,3,1,2))
    y_batch = torch.tensor(yval[i:i+batch_size].astype(np.float32),dtype=torch.float)

    # forward + backward + optimize
    outputs = net(X_batch)

    loss = criterion(outputs, y_batch)
    
    running_loss += loss.item()

    #compute F1 scores
    act = torch.sigmoid(outputs)

    label = y_batch.detach().numpy().astype(np.bool)
    logits = act.detach().numpy() > 0.5
    print("Targets in batch = ",np.sum(label),"Predicted targets = ",np.sum(logits))
    running_f1 += f1_score(label,logits)

    #measure runtime
    stop = timeit.default_timer()
    time_per_epoch = 0.5 * time_per_epoch + 0.5 * (stop-start) * iterations_per_epoch
    runtime += (stop-start)
    #print performance metrics
    N = ((i+1)*batch_size)
    print('[epoch = (%d/%d), iteration = (%3d/%d), time = %3ds, est. time per epoch = %5ds] \t loss = %.5f ## F1 = %.5f'\
          %(epoch + 1, epochs,i + 1, iterations_per_epoch, runtime, time_per_epoch, running_loss / N, running_f1 / (i+1)))