In [5]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
from torchvision import models, transforms, datasets

import torch.optim as optim
from torch.utils.data import DataLoader

import PIL.Image as Image
from tqdm import tqdm
import os
import time

from sklearn.metrics import f1_score

from model.vgg_16 import *
from model.hybrid_CNN import *


In [6]:
# hyper parameters
num_epochs = 1
num_classes = 2
batch_size = 1 # WE WANT IMAGES TO PASS HYBRID CONV LAYER ONE BY ONE
learning_rate = 0.001
model_name = vgg16(pretrained=True)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [7]:
# get dataset
def load_data(batch_size):
    """
    return the train/val/test dataloader
    """
    
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5] * 3, std=[0.5] * 3)
    ])
    
    train_dataset = datasets.CelebA(root='./data',
                                    split='train',
                                    target_type='attr',
                                    transform=transform,
                                    download=False)
    val_dataset = datasets.CelebA(root='./data',
                                    split='valid',
                                    target_type='attr',
                                    transform=transform,
                                    download=False)
    test_dataset = datasets.CelebA(root='./data',
                                    split='test',
                                    target_type='attr',
                                    transform=transform,
                                    download=False)

    # data loader
    train_loader = DataLoader(dataset=train_dataset,
                                batch_size=batch_size,
                                shuffle=True)
    val_loader = DataLoader(dataset=val_dataset,
                                batch_size=batch_size,
                                shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                                batch_size=batch_size,
                                shuffle=False)
    
    return train_loader, val_loader, test_loader

train_loader, val_loader, test_loader = load_data(batch_size)

In [8]:
def initialize_model(model_name, learning_rate, num_classes, device):
    """
    initialize the model (pretrained vgg16_bn)
    define loss function and optimizer and move data to gpu if available
    
    return:
        model, loss function(criterion), optimizer
    """
    # model = models.vgg16_bn(pretrained=True)                ############## add param here? ################
    # num_ftrs = model.classifier[6].in_features
    # model.classifier[6] = nn.Linear(num_ftrs, num_classes)
    
    model = model_name.to(device)
    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    return model, criterion, optimizer


model, criterion, optimizer = initialize_model(model_name, learning_rate, num_classes, device)

In [9]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [17]:
layers = list(model.features.children())[:-1]
# layers[0] = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1)
# features = nn.Sequential(*layers).cuda()
list(model.features.children())
layers

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(512, 512, kernel_size=(3, 3), stride=(1

In [5]:
def make_plots(step_hist, loss_hist, epoch=0):
    plt.plot(step_hist, loss_hist)
    plt.xlabel('train_iterations')
    plt.ylabel('Loss')
    plt.title('epoch'+str(epoch+1))
    plt.savefig('epoch_1')
    plt.clf()

In [6]:
def train(train_loader, model, criterion, optimizer, num_epochs, device):
    """
    Move data to GPU memory and train for specified number of epochs
    Also plot the loss function and save it in `Figures/`
    Trained model is saved as `cnn.ckpt`
    """
    for epoch in range(num_epochs): # repeat the entire training `num_epochs` times
        # for each training sample
        loss_hist = []
        step_hist = []
        for i, (images, labels) in tqdm(enumerate(train_loader)):
            # move to gpu if available
            label = labels[:, 2]   # attractiveness label
            cov_attr = labels[:, 20]    # gender (male/female)   
            cov_attr = (cov_attr + 1) // 2  # map from {-1, 1} to {0, 1}
            
            images = images.to(device)
            label = label.to(device)
            
            # forward pass
            outputs = model(images, cov_attr)    # model takes covariate here
            loss = criterion(outputs, label) 
            
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # print
            if (i+1) % 100 == 0:
                print('Epoch: [{}/{}], Step[{}/{}], Loss:{:.4f}' \
                    .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))
                loss_hist.append(loss.item())
                step_hist.append(i+1)
        
        make_plots(step_hist, loss_hist, epoch)
        
    torch.save(model.state_dict(), 'cnn2.ckpt')

In [8]:
# train model
train(train_loader, model, criterion, optimizer, num_epochs, device)

1it [00:01,  1.26s/it]

outputs:  tensor([[-0.0158,  0.0180]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 21.9504, -18.9808]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')


5it [00:01,  1.54it/s]

outputs:  tensor([[ 24.5200, -21.3002]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 32.7455, -28.5372]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 27.6774, -25.3791]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

7it [00:01,  2.11it/s]

 tensor([[ 14.1015, -14.8088]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-1.2093, -1.1095]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-4.8508,  1.6188]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

11it [00:01,  3.75it/s]

 tensor([[-19.9043,  14.7780]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-23.7586,  18.5512]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-21.7062,  16.1625]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

13it [00:02,  4.84it/s]

 tensor([[-6.6822,  4.0787]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-15.1380,  10.7511]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-12.2298,   8.5891]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  

17it [00:02,  7.36it/s]

tensor([[-9.3286,  6.0746]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-6.7607,  4.2628]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-9.3927,  5.9209]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

19it [00:02,  8.70it/s]

 tensor([[-7.1750,  3.8659]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 5.8111, -5.5085]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 1.4740, -1.8814]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

23it [00:02, 11.02it/s]

 tensor([[-0.0908, -0.6518]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-8.2143,  4.5018]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-5.6458,  2.9997]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')


25it [00:02, 11.87it/s]

outputs:  tensor([[-2.0424,  1.4699]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-3.3204,  2.1764]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-2.5663,  1.6169]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')


29it [00:03, 13.13it/s]

outputs:  tensor([[-2.8004,  1.8388]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-6.6591,  3.7654]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-1.2061,  0.5819]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')


31it [00:03, 13.68it/s]

outputs:  tensor([[-1.3420,  0.8321]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.6620,  0.2792]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-1.3583,  0.6805]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

35it [00:03, 14.26it/s]

 tensor([[-0.9676,  0.5279]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.5728,  0.0541]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-1.2285,  0.2645]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

37it [00:03, 14.33it/s]

 tensor([[-0.8742,  0.4029]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-3.2064,  1.5103]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-3.0172,  1.1981]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

41it [00:03, 14.64it/s]

 tensor([[-0.0739,  0.0043]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.2561, -0.0300]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.6404, -0.0391]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

43it [00:04, 14.79it/s]

 tensor([[-0.9933, -0.1530]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.6955, -0.0067]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.9567, -0.2993]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

47it [00:04, 14.87it/s]

 tensor([[-0.6724, -0.2650]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0426, -0.0797]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.2643, -0.3438]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

49it [00:04, 14.89it/s]

 tensor([[-0.2355, -0.6364]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.1631, -0.4845]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0048, -0.2383]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

53it [00:04, 14.91it/s]

 tensor([[ 0.0052, -0.2775]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0751, -0.2251]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0127, -0.1792]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

55it [00:04, 14.88it/s]

 tensor([[ 0.0031, -0.1328]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0864, -0.5364]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.5351, -0.5129]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

59it [00:05, 14.74it/s]

 tensor([[ 0.0292, -0.2085]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.1376, -0.2190]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0098, -0.1169]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

61it [00:05, 14.67it/s]

 tensor([[ 0.0976, -0.2916]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.2633, -0.4423]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.1747, -0.4650]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

65it [00:05, 14.81it/s]

 tensor([[ 0.0396, -0.1868]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0068, -0.1305]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0608, -0.2217]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  

67it [00:05, 14.84it/s]

tensor([[ 0.0347, -0.1566]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0140, -0.1387]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0378, -0.1081]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

71it [00:05, 14.85it/s]

 tensor([[ 0.0156, -0.0489]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.3224, -0.3069]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0422, -0.0518]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

73it [00:06, 14.77it/s]

 tensor([[ 0.2037, -0.3041]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0714, -0.2764]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0099, -0.0070]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

77it [00:06, 15.09it/s]

 tensor([[ 0.0392, -0.0850]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0145, -0.0611]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0235, -0.1006]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

79it [00:06, 15.07it/s]

 tensor([[-0.0055, -0.0158]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0113, -0.2479]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0462, -0.0738]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

83it [00:06, 14.97it/s]

 tensor([[ 0.0694, -0.0846]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0067,  0.0214]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.0071, -0.0524]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

85it [00:06, 14.99it/s]

 tensor([[ 0.0788, -0.0729]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0228, -0.0373]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0241, -0.1177]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

89it [00:07, 15.12it/s]

 tensor([[-0.0111, -0.0032]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0041, -0.0028]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0215, -0.0304]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

91it [00:07, 15.03it/s]

 tensor([[-0.0110,  0.0025]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0171, -0.0103]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 3.7153, -3.0526]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

95it [00:07, 15.11it/s]

 tensor([[-0.0222,  0.0289]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0228, -0.1483]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0071,  0.0023]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  

97it [00:07, 15.12it/s]

tensor([[-0.0053, -0.0122]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0043, -0.0026]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0066,  0.0170]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

101it [00:07, 14.76it/s]

 tensor([[-0.0029, -0.0274]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.2019,  0.1665]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
Epoch: [1/1], Step[100/162770], Loss:0.5258
outputs:  tensor([[-0.0204,  0.0017]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')


103it [00:08, 14.87it/s]

outputs:  tensor([[-0.0073,  0.0018]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0414, -0.0224]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0022, -0.0327]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

107it [00:08, 15.01it/s]

 tensor([[-0.1008, -0.0032]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[ 0.5186, -0.3659]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0317,  0.0119]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

109it [00:08, 14.92it/s]

 tensor([[-0.0481, -0.0332]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0486, -0.0033]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0435,  0.0168]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

113it [00:08, 15.07it/s]

 tensor([[-0.0693, -0.1177]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0740, -0.0110]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0487,  0.0187]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

115it [00:08, 15.03it/s]

 tensor([[ 0.0039, -0.0358]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.1252, -0.0213]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0317,  0.0074]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

119it [00:09, 14.85it/s]

 tensor([[-0.3185,  0.3009]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.1322,  0.0855]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.1899, -0.1661]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

121it [00:09, 14.91it/s]

 tensor([[-0.0395,  0.0217]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0337,  0.0261]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.2478,  0.1669]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

125it [00:09, 14.99it/s]

 tensor([[-0.0888,  0.0018]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0391,  0.0021]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0492,  0.0254]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')


127it [00:09, 15.04it/s]

outputs:  tensor([[-0.0814,  0.0251]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0229, -0.0076]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0013, -0.0312]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

131it [00:09, 15.09it/s]

 tensor([[-0.0437,  0.0348]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.1913,  0.1214]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0188, -0.0948]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

133it [00:10, 15.11it/s]

 tensor([[-0.1229,  0.0848]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.4247,  0.1407]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0035, -0.2474]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

137it [00:10, 15.24it/s]

 tensor([[-0.0632, -0.1477]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0902, -0.0611]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.1358, -0.0143]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

139it [00:10, 15.14it/s]

 tensor([[ 0.0268, -0.1949]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0852, -0.0535]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0537, -0.0277]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')


143it [00:10, 15.19it/s]

outputs:  tensor([[-0.3018,  0.1856]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0215, -0.0506]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0428, -0.0476]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  

145it [00:10, 15.11it/s]

tensor([[-0.0126, -0.3729]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0071, -0.1121]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[ 0.0153, -0.1976]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  

149it [00:11, 15.19it/s]

tensor([[-0.1251, -0.4406]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0627, -0.0039]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0173, -0.0843]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

151it [00:11, 15.14it/s]

 tensor([[-0.1674,  0.0248]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0447, -0.0112]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0162, -0.0215]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

155it [00:11, 15.09it/s]

 tensor([[ 0.0140, -0.0262]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0483, -0.0169]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0376, -0.0147]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

157it [00:11, 15.07it/s]

 tensor([[-0.0857,  0.0552]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.1497, -0.0433]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0470,  0.0224]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

161it [00:11, 15.02it/s]

 tensor([[-0.0742, -0.0073]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0112, -0.0250]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0322,  0.0152]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs: 

163it [00:12, 15.02it/s]

 tensor([[ 0.0013, -0.0082]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.3508,  0.2742]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0739, -0.0377]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

167it [00:12, 15.11it/s]

 tensor([[-0.0694, -0.0941]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0401,  0.0381]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0106,  0.0213]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  

169it [00:12, 15.12it/s]

tensor([[-0.0170, -0.0061]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0429, -0.0494]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0864, -0.0014]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  

173it [00:12, 15.21it/s]

tensor([[-0.0495, -0.1217]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0566, -0.1423]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0245, -0.0138]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

175it [00:12, 15.16it/s]

 tensor([[-0.1247, -0.0691]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.2158,  0.1017]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.1096,  0.0299]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  

179it [00:13, 15.15it/s]

tensor([[-0.1433,  0.0401]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.1003, -0.0304]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.1398, -0.2085]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  

181it [00:13, 14.98it/s]

tensor([[ 0.0036, -0.1102]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0688, -0.0711]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs:  tensor([[-0.0361,  0.0563]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')
outputs: 

185it [00:13, 15.15it/s]

 tensor([[-0.0316,  0.0177]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0680, -0.1661]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([1], device='cuda:0')
outputs:  tensor([[-0.0266, -0.0173]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')


186it [00:13, 13.62it/s]


outputs:  tensor([[-0.0539, -0.1965]], device='cuda:0', grad_fn=<AddmmBackward>)
label:  tensor([0], device='cuda:0')


KeyboardInterrupt: 

In [None]:
def evaluate(val_loader, model, device):
    """
    Run the validation set on the trained model
    """
    model.eval() # BatchNorm uses moving mean/variance instead of mini-batch mean/variance
    with torch.no_grad():
        # initialize the stats
        correct = 0
        total = 0
        # pass through testing data once
        for images, labels in val_loader:

            label = labels[:, 2]
            # again move to device first
            images = images.to(device)
            label = label.to(device)
            # forward once
            outputs = model(images)
            # instead of calculating loss we will get predictions
            # it's essetially outputs just reformatting imo
            _, predicted = torch.max(outputs.data, 1)
            # accumulate stats
            total += label.size(0) # yeah again, number of elements in the tensor
            correct += (label == predicted).sum().item()

        # print
        print('Test accuracy on 10000 test images: {}%' \
                .format(100 * correct / total))
        
# run validation set
evaluate(val_loader, model, device)