# Machine Learning 2 Assignment 2

## Junyoung Jung 

In [1]:
import torch
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
from torch.optim import SGD
import torch.nn
import time

### a) Load CIFAR10 dataset as follows:

In [2]:
trainset = datasets.CIFAR10(root='./CIFARdata', train=True, download=True, transform=transforms.ToTensor())
testset = datasets.CIFAR10(root='./CIFARdata', train=False, download=True, transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [3]:
labels_dict = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'}

### b) Visualize at least one image for each class. You may need to look into how dataset is implemented in PyTorch.

In [4]:
def visualize_image(data):
    figure = plt.figure(figsize=(8, 8))
    cols, rows = 3, 3
    for i in range(1, cols * rows + 1):
        sample_idx = torch.randint(len(data), size=(1,)).item()
        img, label = data[sample_idx]
        figure.add_subplot(rows, cols, i)
        plt.title(labels_dict[label])
        plt.axis("off")
        plt.imshow(img.T.squeeze(), cmap="gray")
    plt.show()

In [None]:
visualize_image(trainset)

### c) Split the trainset into training set and validation set with 90% : 10% ratio. Implement dataloaders for CIFAR10.

In [None]:
total_size = len(trainset)
train_size = int(0.9 * total_size)
val_size = int(0.1 * total_size)

train_dataset, val_dataset = random_split(trainset, [train_size, val_size])

# data shape[0] = (1, 3, 32, 32)
# label shape[0] = (1)

trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valloader = DataLoader(val_dataset, shuffle=True)
testloader = DataLoader(testset, shuffle=False)

anyclass1, anyclass2 = 3, 7

### d) Choose any two classes. Then, make a SVM classifier (implement a loss function yourself. Do not use PyTorch implementations of loss functions.) and its training/validation/evaluation code to perform binary classification between those two classes.


### e) Train for 10 epochs with batch size 64.

In [None]:
class svm(torch.nn.Module):
    def __init__(self):
        super(svm, self).__init__()
        self.fc = torch.nn.Linear(32, 1)

    def forward(self, x):
        fd = self.fc(x)
        return fd

    def hingeloss(self, data, label):
        zero = torch.zeros(1)
        # for i in range(data_size):
        #     ans = -1
        #     if data[i] is not ans_label1 or ans_label2:
        #         continue
        #     else:
        #         if data[i] is ans_label1:
        #             ans = 1
        #             result = torch.max(zero, 1 - ans * (w.T * data - b))
        #         if data[i] is ans_label2:
        #             result = torch.max(zero, 1 - ans * (w.T * data - b))
        loss = torch.mean(torch.max(zero, 1 - label * data))
        return loss



In [None]:
model = svm()
epoch = 10
batch_size = 64
learning_rate = 0.001
optimizer = SGD(model.parameters(), lr=learning_rate)

In [None]:
def train(model, optimizer):
    size = train_size
    n = 1
    model.train()

    for batch, (X, y) in enumerate(trainloader):
        label=0
        for i in y:
            if i == anyclass1:
                label = -1
            elif i == anyclass2:
                label = 1
            else:
                continue
        if label != anyclass1 and label != anyclass2: continue
        optimizer.zero_grad()
        prediction = model(X)
        loss = model.hingeloss(prediction, y)
        loss.backward()
        optimizer.step()
        if batch % batch_size == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"Training loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [None]:
def validation(model, optimizer):
    size = val_size
    n = 1
    for batch, (X, y) in enumerate(valloader):
        start_time = time.time()
        if y == anyclass1:
            y = -1
        elif y == anyclass2:
            y = 1
        else:
            continue
        optimizer.zero_grad()
        prediction = model.classifier(X)
        loss = model.hingeloss(prediction, y)
        loss.backward()
        optimizer.step()
        if batch % batch_size == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"Training loss: {loss:>7f}  [{current:>5d}/{size:>5d}] at step{n}")

    n += 1

In [None]:
def test(model, optimizer):
    size = val_size
    n = 1
    for batch, (X, y) in enumerate(testloader):
        start_time = time.time()
        if y == anyclass1:
            y = -1
        elif y == anyclass2:
            y = 1
        else:
            continue
        optimizer.zero_grad()
        prediction = model.classifier(X)
        loss = model.hingeloss(prediction, y)
        loss.backward()
        optimizer.step()
        if batch % batch_size == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"Training loss: {loss:>7f}  [{current:>5d}/{size:>5d}] at step{n}")

    n += 1

### f) Perform data normalization. You may need to look into how to use datasets in PyTorch.

### g) Again, train for 10 epochs with batch size 64 after data normalization. Write down your observations.