In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision

In [3]:
# set device
device = torch.device(0 if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

### Get data

In [4]:
batch_size = 128

train_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

test_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('../data/CIFAR-10', train=True, download=True,
         transform=train_transform), 
    batch_size=batch_size, 
    shuffle=True
)

test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.CIFAR10('../data/CIFAR-10', train=False, download=True,
         transform=test_transform),
    batch_size=batch_size, 
    shuffle=False
)

Files already downloaded and verified
Files already downloaded and verified


### Part 1
#### Implement KNN

In [11]:
class KNN():
    def __init__(self, train_loader):
        self.train_x, self.train_y = [],[]
        for i in range(len(train_loader.dataset)):
            x, y = train_loader.dataset[i]
            self.train_x.append(x.reshape(1, -1))
            self.train_y.append(torch.Tensor([[y]]))
        self.train_x = torch.cat(self.train_x, dim=0).to(device)
        self.train_y = torch.cat(self.train_y, dim=0).to(device)
        
    def predict(self, test_x, k=2, p=1):
        test_x = test_x.reshape(test_x.shape[0], -1)
        dists = torch.cdist(test_x, self.train_x, p)
        idxs = torch.topk(dists, k=k, dim=1, largest=False)[1]
        test_y_s = self.train_y[idxs]
        if k == 1:
            test_y_s = test_y_s.squeeze(dim=1)
        else:
            test_y_s = test_y_s.squeeze(dim=2)
        test_y_ = torch.mode(test_y_s, dim=1)[0]
        return test_y_

In [12]:
knn = KNN(train_loader)

In [13]:
for p in [1, 2]:
    for k in range(1, 5):
        correct = 0
        for batch_idx, (samples, labels) in enumerate(test_loader):
            samples, labels = samples.to(device), labels.to(device)
            preds = knn.predict(samples, k=k, p=p)
            correct += torch.sum(torch.eq(preds, labels)).item()
        test_acc = correct / len(test_loader.dataset)
        print(f'Test accuracy for KNN with k = {k}, norm = l{p}: {test_acc*100:.2f}')

Test accuracy for KNN with k = 1, norm = l1: 38.59
Test accuracy for KNN with k = 2, norm = l1: 33.92
Test accuracy for KNN with k = 3, norm = l1: 36.25
Test accuracy for KNN with k = 4, norm = l1: 36.84
Test accuracy for KNN with k = 1, norm = l2: 35.39
Test accuracy for KNN with k = 2, norm = l2: 31.16
Test accuracy for KNN with k = 3, norm = l2: 33.03
Test accuracy for KNN with k = 4, norm = l2: 33.98


### Part 2
#### Implement simple CNN

In [15]:
class DDNet(nn.Module):
    """Very simple CIFAR-10 DDNet. Described here: https://arxiv.org/abs/1511.04508.
    """
    def __init__(self, activation=F.relu, p_drop=.5):
        super(DDNet, self).__init__()
        self.activation = activation
        self.logits = None
        self.probabilities = None
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0)
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=0)
        self.conv4 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=0)
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        self.fc1 = nn.Linear(128*5*5, 256)
        self.fc1_drop = nn.Dropout(p=p_drop)
        self.fc2 = nn.Linear(256, 256)
        self.fc2_drop = nn.Dropout(p=p_drop)
        self.fc3 = nn.Linear(256, 10)
        
    def forward(self, x):
        x = self.activation(self.conv1(x))
        x = self.pool1(self.activation(self.conv2(x)))
        x = self.activation(self.conv3(x))
        x = self.pool2(self.activation(self.conv4(x)))
        x = x.view(x.shape[0],-1)
        x = self.fc1_drop(self.fc1(x))
        x = self.fc2_drop(self.fc2(x))
        self.logits = self.fc3(x)
        self.probabilities = F.log_softmax(self.logits, dim=1)
        return self.probabilities

In [16]:
# this function runs both training and test passes through data
def datapass(dataloader, train=True):
    if train:
        net.train()
    else:
        net.eval()
    
    num_correct = 0
    total_loss = 0
    for batch_idx, (samples, labels) in enumerate(dataloader):
        samples, labels = samples.to(device), labels.to(device)
        
        outputs = net(samples)
        
        loss = F.cross_entropy(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        
        num_correct += torch.eq(preds, labels).sum().item()
        total_loss += loss.item()
        
        if train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
    return num_correct / len(dataloader.dataset), total_loss / len(dataloader)

In [20]:
def train():
    for epoch in range(1, n_epochs + 1):
        train_acc, train_loss = datapass(train_loader)
        if epoch % print_freq == 0:
            print(f'Epoch #{epoch}:\tTrain loss: {train_loss:.4f}\tTrain acc: {train_acc:.4f}')
        lr_decayer.step()

In [21]:
# hyperparameters
n_epochs = 20
lr = .05
momentum = .6
print_freq = 5

In [22]:
for p_drop in [.125, .25, .5]:
    for wd in [.0001, .001, .01]:
        net = DDNet(p_drop=p_drop)
        net.cuda()

        optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=wd)
        lr_decayer = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=0)

        train()
        test_acc, test_loss = datapass(test_loader, train=False)
        print(f'With p_drop = {p_drop}, wd = {wd}; test accuracy: {test_acc}\n')

Epoch #5:	Train loss: 1.0513	Train acc: 0.6313
Epoch #10:	Train loss: 0.5794	Train acc: 0.7985
Epoch #15:	Train loss: 0.2639	Train acc: 0.9091
Epoch #20:	Train loss: 0.1266	Train acc: 0.9639
With p_drop = 0.125, wd = 0.0001; test accuracy: 0.7502

Epoch #5:	Train loss: 1.2766	Train acc: 0.5474
Epoch #10:	Train loss: 0.7239	Train acc: 0.7480
Epoch #15:	Train loss: 0.4443	Train acc: 0.8471
Epoch #20:	Train loss: 0.3055	Train acc: 0.8992
With p_drop = 0.125, wd = 0.001; test accuracy: 0.7617

Epoch #5:	Train loss: 1.5871	Train acc: 0.4255
Epoch #10:	Train loss: 1.3664	Train acc: 0.5101
Epoch #15:	Train loss: 1.2059	Train acc: 0.5733
Epoch #20:	Train loss: 1.1029	Train acc: 0.6125
With p_drop = 0.125, wd = 0.01; test accuracy: 0.5909

Epoch #5:	Train loss: 1.0641	Train acc: 0.6264
Epoch #10:	Train loss: 0.5764	Train acc: 0.7999
Epoch #15:	Train loss: 0.2890	Train acc: 0.9008
Epoch #20:	Train loss: 0.1607	Train acc: 0.9505
With p_drop = 0.25, wd = 0.0001; test accuracy: 0.7731

Epoch #5:	Tr

### Part 3
#### Simple linear classifiers

In [23]:
# the best values from the grid search in part 2
wd = .0001
p_drop = .5

In [24]:
class LinNet1(nn.Module):
    def __init__(self, bias=True):
        super(LinNet1, self).__init__()
        self.fc1 = nn.Linear(32*32*3, 10, bias=bias)
        
    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x
    
class LinNet2(nn.Module):
    def __init__(self, nh1, nh2):
        super(LinNet2, self).__init__()
        self.fc1 = nn.Linear(32*32*3, nh1)
        self.fc2 = nn.Linear(nh1, nh2)
        self.fc3 = nn.Linear(nh2, 10)
        
    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

##### Train simple linear classifier WITH bias

In [25]:
net = LinNet1(bias=True)
net.cuda()

optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=wd)
lr_decayer = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=0)

train()
test_acc, test_loss = datapass(test_loader, train=False)
print(f'Test accuracy: {test_acc}')

Epoch #5:	Train loss: 4.0884	Train acc: 0.2853
Epoch #10:	Train loss: 1.9403	Train acc: 0.3684
Epoch #15:	Train loss: 1.6660	Train acc: 0.4305
Epoch #20:	Train loss: 1.6242	Train acc: 0.4519
Test accuracy: 0.4131


##### Train simple linear classifier WITHOUT bias

In [26]:
net = LinNet1(bias=False)
net.cuda()

optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=wd)
lr_decayer = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=0)

train()
test_acc, test_loss = datapass(test_loader, train=False)
print(f'Test accuracy: {test_acc}')

Epoch #5:	Train loss: 4.4575	Train acc: 0.2773
Epoch #10:	Train loss: 1.9700	Train acc: 0.3642
Epoch #15:	Train loss: 1.6784	Train acc: 0.4245
Epoch #20:	Train loss: 1.6376	Train acc: 0.4471
Test accuracy: 0.4041


##### Train neural net with 2 hidden layers but no nonlinear activations

In [27]:
net = LinNet2(50, 50)
net.cuda()

optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=wd)
lr_decayer = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=0)

train()
test_acc, test_loss = datapass(test_loader, train=False)
print(f'Test accuracy: {test_acc}')

Epoch #5:	Train loss: 1.7597	Train acc: 0.3886
Epoch #10:	Train loss: 1.7160	Train acc: 0.4099
Epoch #15:	Train loss: 1.6812	Train acc: 0.4267
Epoch #20:	Train loss: 1.6642	Train acc: 0.4348
Test accuracy: 0.4116
