## This notebooks fits the cnn on permuted labeles for task 2.5

In [2]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms

In [3]:
model = torch.load("models/20_model.pth")
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [4]:
for param in model.features.parameters():
    param.requires_grad = False

In [5]:
trans_wo_norm = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor()
])

dataset0_wo_norm = torchvision.datasets.ImageFolder("archive/chest_xray/chest_xray/train", transform=trans_wo_norm)

In [6]:
mean0 = torch.zeros(3)
std0 = torch.zeros(3)
for img, _ in dataset0_wo_norm:
    mean0 += img.mean(dim=(1, 2))
    std0 += img.std(dim=(1, 2))

mean0 /= len(dataset0_wo_norm)
std0 /= len(dataset0_wo_norm)

print("Mean:", mean0)
print("Standard deviation:", std0)

Mean: tensor([0.5832, 0.5832, 0.5832])
Standard deviation: tensor([0.1413, 0.1413, 0.1413])


In [7]:
mean0 =torch.tensor([0.5832, 0.5832, 0.5832])
std0  =torch.tensor([0.1413, 0.1413, 0.1413])

In [8]:
std_normalise_0 = transforms.Normalize(
    mean=mean0,
    std=std0
)

trans0 = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.Grayscale(num_output_channels=3),
        transforms.ToTensor(),
        std_normalise_0
])

In [9]:
dataset_0 = torchvision.datasets.ImageFolder("archive/chest_xray/chest_xray/train", transform=trans0)

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

print('Doing computations on device = {}'.format(device))

model.to(device)

Doing computations on device = cuda


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [11]:
# now we define train and validation set randomly as the validation set provided is too small for meaningful results
trainset, testset = torch.utils.data.random_split(dataset_0, [4999, len(dataset_0) - 4999])


## Permute lables of train data

In [12]:
lables = np.array([lable for _, lable in trainset.dataset.imgs])

In [13]:
np.random.seed(0)
lables = np.random.permutation(lables)

In [14]:
for i, (_,lable) in enumerate(trainset.dataset.imgs):
    trainset.dataset.imgs[i] = (trainset.dataset.imgs[i][0], lables[i])

## Define data loaders

In [15]:
train_loader = torch.utils.data.DataLoader(trainset,batch_size=32)
test_loader  = torch.utils.data.DataLoader(testset,batch_size=32)

In [17]:
def train_long(net,train_loader,test_loader,epochs=5,lr=0.001,optimizer=None,loss_fn = nn.NLLLoss(),print_freq=10):
    optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
    for epoch in range(epochs):
        net.train()
        total_loss,acc,count = 0,0,0
        for i, (features,labels) in enumerate(train_loader):
            lbls = labels.long().to(default_device)
            optimizer.zero_grad()
            out = net(features.to(default_device))
            loss = loss_fn(out,lbls)
            loss.backward()
            optimizer.step()
            total_loss+=loss
            _,predicted = torch.max(out,1)
            acc+=(predicted==lbls).sum()
            count+=len(labels)
            if i%print_freq==0:
                print("Epoch {}, minibatch {}: train acc = {}, train loss = {}".format(epoch,i,acc.item()/count,total_loss.item()/count))
        vl,va = validate(net,test_loader,loss_fn)
        print("Epoch {} done, validation acc = {}, validation loss = {}".format(epoch,va,vl))


In [18]:
def validate(net, dataloader,loss_fn=nn.NLLLoss()):
    net.eval()
    count,acc,loss = 0,0,0
    with torch.no_grad():
        for features,labels in dataloader:
            
            lbls = labels.long().to(default_device)
            out = net(features.to(default_device))
            loss += loss_fn(out,lbls) 
            pred = torch.max(out,1)[1]
            acc += (pred==lbls).sum()
            count += len(labels)
    return loss.item()/count, acc.item()/count

In [19]:
for param in model.features.parameters():
    param.requires_grad = False

In [20]:
default_device = device
train_long(model,train_loader,test_loader,lr=0.00005,loss_fn=torch.nn.CrossEntropyLoss(),epochs=10,print_freq=90)

Epoch 0, minibatch 0: train acc = 0.59375, train loss = 0.7303264141082764
Epoch 0, minibatch 90: train acc = 0.7002060439560439, train loss = 0.049046060541173914
Epoch 0 done, validation acc = 0.7511520737327189, validation loss = 0.01969623345933202
Epoch 1, minibatch 0: train acc = 0.8125, train loss = 0.0173207838088274
Epoch 1, minibatch 90: train acc = 0.7451923076923077, train loss = 0.018100403167389253
Epoch 1 done, validation acc = 0.7511520737327189, validation loss = 0.019295123315626574
Epoch 2, minibatch 0: train acc = 0.8125, train loss = 0.016510585322976112
Epoch 2, minibatch 90: train acc = 0.7479395604395604, train loss = 0.01787801627274398
Epoch 2 done, validation acc = 0.7511520737327189, validation loss = 0.01882407632291592
Epoch 3, minibatch 0: train acc = 0.8125, train loss = 0.014752785675227642
Epoch 3, minibatch 90: train acc = 0.75, train loss = 0.01741045647925073
Epoch 3 done, validation acc = 0.7511520737327189, validation loss = 0.019676041493218065
E

In [21]:
validate(model, train_loader)

(-0.022232620352195438, 0.9105821164232847)

Validation accuracy over 90%

In [23]:
torch.save(model,'models/model_permuted.pth')