In [1]:
%load_ext autoreload
%autoreload v2
%matplotlib inline

In [2]:
import keyword
import torch

In [3]:
import os
import torch
import tqdm
import numpy as np
import matplotlib.pylab as plt

from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from torchvision import models
from torchvision import datasets, transforms, utils

import torch
import torchvision
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms

In [4]:
root_folder = './dataset'

In [5]:
transformation = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
}

In [6]:
dataset = {x: datasets.ImageFolder(os.path.join(root_folder, x), transformation[x]) for x in ['train', 'valid']}
loader = {x: torch.utils.data.DataLoader(dataset[x], batch_size=32, shuffle=True, num_workers=4) for x in ['train', 'valid']}

dataset_size = {x: len(dataset[x]) for x in ['train', 'valid']}
class_names = dataset['train'].classes

In [7]:
from torch import nn, optim

In [8]:
output_size = len(dataset['train'].classes)
output_size

2

In [9]:
resnet18 = models.resnet18(pretrained=True)

In [10]:

resnet18.fc = torch.nn.Sequential(
    torch.nn.Linear(512, output_size),
    torch.nn.Softmax(dim=1))

loss_fce = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet18.fc.parameters())

In [11]:
for param in resnet18.parameters():
    param.requires_grad = False

In [12]:
for param in resnet18.fc.parameters():
    param.requires_grad = True

In [13]:
model = resnet18

In [14]:
model.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Co

In [23]:
def get_valid_acc_and_loss(model, loss_fce, valid_loader):
    accuracy = 0
    loss = 0
    was_training = model.training
    model.eval()
    
    for images, labels in valid_loader:
        images = images.cuda()
        labels = labels.cuda()
        predictions = model(images.cuda())
        accuracy += (predictions.argmax(dim=1) == labels).type(torch.FloatTensor).mean().item() 
        loss += loss_fce(predictions, labels).item()
    
    model.train(mode=was_training)
    return accuracy / len(valid_loader) * 100, loss / len(valid_loader)

In [24]:
from collections import deque

# Initial params setup.
epochs = 5
report_period = 3
batch_iteration = 0

# Storing of some data.
train_leak_loss = deque(maxlen=report_period)
train_loss_history = []
valid_loss_history = []
valid_acc_history = []

In [29]:
for epoch in range(epochs):
    # Setup net to train mode and go through one epoch.
    model.train()
    for images, labels in loader['train']:
        batch_iteration += 1
        images = images.cuda()
        labels = labels.cuda()
        
        # Training net on one batch.
        optimizer.zero_grad()
        predictions = model.forward(images)
        loss = loss_fce(predictions, labels)
        loss.backward()
        optimizer.step()
        
        train_leak_loss.append(loss.item())
        # In case we should report, lets make validation on valid set.
        if batch_iteration % report_period == 0:
            model.eval()
            # We don't want to collect info for gradients from here.
            with torch.no_grad():
                valid_accuracy, valid_loss = get_valid_acc_and_loss(model, loss_fce, loader['valid'])
                
            print(f'Epoch: {epoch+1}/{epochs}.. ',
                  f"Train Loss: {round(np.mean(train_leak_loss), 2)}.. ",
                  f"Valid Loss: {round(valid_loss, 2)}.. ",
                  f"Valid Acc: {round(valid_accuracy, 2)}%")
            
            train_loss_history.append(np.mean(train_leak_loss))
            valid_loss_history.append(valid_loss)
            valid_acc_history.append(valid_accuracy)
                        
            model.train()

Epoch: 1/2..  Train Loss: 0.57..  Valid Loss: 0.63..  Valid Acc: 66.62%
Epoch: 1/2..  Train Loss: 0.62..  Valid Loss: 0.62..  Valid Acc: 66.38%
Epoch: 1/2..  Train Loss: 0.62..  Valid Loss: 0.62..  Valid Acc: 65.88%
Epoch: 1/2..  Train Loss: 0.63..  Valid Loss: 0.61..  Valid Acc: 67.5%
Epoch: 1/2..  Train Loss: 0.6..  Valid Loss: 0.61..  Valid Acc: 67.88%
Epoch: 1/2..  Train Loss: 0.6..  Valid Loss: 0.61..  Valid Acc: 68.0%
Epoch: 1/2..  Train Loss: 0.62..  Valid Loss: 0.61..  Valid Acc: 68.75%
Epoch: 1/2..  Train Loss: 0.58..  Valid Loss: 0.61..  Valid Acc: 68.38%
Epoch: 1/2..  Train Loss: 0.62..  Valid Loss: 0.61..  Valid Acc: 68.12%
Epoch: 1/2..  Train Loss: 0.56..  Valid Loss: 0.6..  Valid Acc: 68.25%
Epoch: 1/2..  Train Loss: 0.65..  Valid Loss: 0.6..  Valid Acc: 69.25%
Epoch: 1/2..  Train Loss: 0.64..  Valid Loss: 0.6..  Valid Acc: 69.88%
Epoch: 1/2..  Train Loss: 0.63..  Valid Loss: 0.59..  Valid Acc: 70.25%
Epoch: 1/2..  Train Loss: 0.62..  Valid Loss: 0.59..  Valid Acc: 68.75%

Epoch: 2/2..  Train Loss: 0.51..  Valid Loss: 0.56..  Valid Acc: 73.75%
Epoch: 2/2..  Train Loss: 0.56..  Valid Loss: 0.56..  Valid Acc: 73.5%
Epoch: 2/2..  Train Loss: 0.6..  Valid Loss: 0.56..  Valid Acc: 73.88%
Epoch: 2/2..  Train Loss: 0.61..  Valid Loss: 0.56..  Valid Acc: 73.75%
Epoch: 2/2..  Train Loss: 0.67..  Valid Loss: 0.56..  Valid Acc: 73.38%
Epoch: 2/2..  Train Loss: 0.58..  Valid Loss: 0.55..  Valid Acc: 74.38%
Epoch: 2/2..  Train Loss: 0.49..  Valid Loss: 0.55..  Valid Acc: 75.12%
Epoch: 2/2..  Train Loss: 0.61..  Valid Loss: 0.55..  Valid Acc: 74.5%
Epoch: 2/2..  Train Loss: 0.51..  Valid Loss: 0.55..  Valid Acc: 74.5%
Epoch: 2/2..  Train Loss: 0.58..  Valid Loss: 0.56..  Valid Acc: 74.0%
Epoch: 2/2..  Train Loss: 0.59..  Valid Loss: 0.56..  Valid Acc: 74.0%
Epoch: 2/2..  Train Loss: 0.6..  Valid Loss: 0.56..  Valid Acc: 74.12%
Epoch: 2/2..  Train Loss: 0.53..  Valid Loss: 0.56..  Valid Acc: 73.75%


Traceback (most recent call last):
  File "/home/adam/.pyenv/versions/3.6.3/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
Traceback (most recent call last):
  File "/home/adam/.pyenv/versions/3.6.3/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/home/adam/.pyenv/versions/3.6.3/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/adam/.pyenv/versions/3.6.3/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/home/adam/.pyenv/versions/3.6.3/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe
  File "/home/adam/.pyenv/versions/3.6.3/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/adam/.pyenv/versions/3.6.3/lib/python3

KeyboardInterrupt: 

In [26]:
for param in model.layer4.parameters():
    param.requires_grad = True

In [27]:
for param in model.layer4.parameters():
    param.requires_grad = True

optimizer.param_groups[0]['params'] = optimizer.param_groups[0]['params'] + list(model.layer4.parameters())
optimizer.param_groups[0]['lr'] = 0.0001


In [28]:
epochs=2

In [30]:
torch.save(model.state_dict(), 'resnet.pkl')

In [31]:
transformation = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

dataset = datasets.ImageFolder('./dataset/train', transformation)
loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)

In [32]:
os.environ["TORCH_HOME"] = "./"
info = model.eval()

resnet18_features = torch.nn.Sequential(*list(model.children())[:-1])

In [34]:
resnet18_features.cpu()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (conv2): Conv2d(64, 64, ker

In [35]:
features = list()
labels = list()
images = list()
idx = 0
for img, label in tqdm.tqdm(loader):
    features += resnet18_features(img).squeeze().detach().numpy().tolist()
    labels += list(map(lambda l: dataset.classes[l], label.squeeze().detach().numpy().tolist()))
    
    img = img.squeeze().detach().numpy()
    img = np.transpose(img, (0, 2, 3, 1))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    img = std * img + mean
    img = np.clip(img, 0, 1)
    img = np.transpose(img, (0, 3, 1, 2))
    images += img.tolist()
    
    idx += 1
    if idx > 20:
        break
    
    
features = torch.tensor(np.array(features))
images = torch.tensor(np.array(images)) 

  9%|▉         | 20/228 [00:21<03:33,  1.02s/it]

In [36]:
# Writer will output to ./runs/ directory by default
writer = SummaryWriter()
writer.add_graph(resnet18_features, iter(loader).__next__()[0])

writer.add_embedding(mat=features, metadata=labels, label_img=images)
writer.close()