In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10, STL10
import os
import pickle
import zipfile
import datetime
import torch.utils.data as tud

# Data Preparation:

In [2]:
data_transform = transforms.Compose([
                transforms.ToTensor()
        ])

In [3]:
train = STL10(root="./data", split="train", transform=data_transform, download=True)
test = STL10(root="./data", split="test", transform=data_transform, download=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
test

Dataset STL10
    Number of datapoints: 8000
    Split: test
    Root Location: ./data
    Transforms (if any): Compose(
                             ToTensor()
                         )
    Target Transforms (if any): None

In [5]:
train_loader = torch.utils.data.DataLoader(test, batch_size=8000, shuffle=False, num_workers=0)

In [6]:
data, labels= next(iter(train_loader))

## Using 1500 labled data and 6500 unlabeled data in this example:

In [7]:
#keep 1500 as labeled data
np.random.seed(5)
labeled_ind = np.random.choice(8000,1500, replace = False)

In [8]:
unlabeled_ind = np.setdiff1d(list(range(8000)), labeled_ind)

In [9]:
unlabeled_ind.shape

(6500,)

In [10]:
labels = labels.numpy()

In [11]:
#unlabeled data coded as 10
np.put(labels,list(unlabeled_ind),10)

In [12]:
#make 0.3 of the labeled data dev set, dev set is made sure to have balanced labels
np.random.seed(5)
dev_ind = labeled_ind[np.random.choice(1500,450, replace = False)]

In [13]:
train_ind = np.setdiff1d(list(range(8000)), dev_ind)

In [15]:
#450 labeled data for dev set, 1050 labeled data + 6500 unlabeled data for training set
len(dev_ind), len(train_ind)

(450, 7550)

In [16]:
#prepare dataloader for pytorch
class TorchInputData(tud.Dataset):
    """
    A simple inheretance of torch.DataSet to enable using our customized DogBreed dataset in torch
    """
    def __init__(self, X, Y, transform=None):
        """
        X: a list of numpy images 
        Y: a list of labels coded using 0-9 
        """        
        self.X = X
        self.Y = Y 

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.Y[idx]

        return x, y

In [17]:
images_train = [data[i] for i in train_ind]
trainset = TorchInputData(images_train, labels[train_ind])
train_loader = tud.DataLoader(trainset, batch_size=50, shuffle=True)

In [18]:
images_dev = [data[i] for i in dev_ind]
devset = TorchInputData(images_dev, labels[dev_ind])
dev_loader = tud.DataLoader(devset, batch_size=50, shuffle=True)

# M2 Model:

In [1]:
#import nn.module for M2 and CNN classifier
from m2_stl10_cuda import M2,Classifier

In [2]:
classifier = Classifier(image_reso = 96, filter_size = 5, dropout_rate = 0.2)

In [3]:
m2 = M2(latent_features = 128, classifier = classifier, path = "m2_stl10_0.1_50epoch_5.pth")

### Structure of the M2 model: a convolutional variational autoencoder and a CNN classifier

In [5]:
m2.model

M2_base(
  (encoder): Encoder(
    (bottle): EncoderModule(
      (conv): Conv2d(3, 32, kernel_size=(1, 1), stride=(1, 1))
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m1): EncoderModule(
      (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m2): EncoderModule(
      (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(4, 4), padding=(1, 1))
      (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
    (m3): EncoderModule(
      (conv): Conv2d(128, 256, kernel_size=(3, 3), stride=(4, 4), padding=(1, 1))
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
    )
  )
  (fc1): Linear(in_features=9

### Training the M2 model for 50 epochs:

In [None]:
#set alpha, hyperparameter for weighing the classifier loss
alpha = 0.1*len(train_loader.dataset)

In [22]:
#fit M2 model
#labeled_data_len is the number of labeled data in train+dev set: 450+1050
m2.fit(train_loader,dev_loader,50,alpha,labeled_data_len = 1500)

0 151 Loss: 32058.003906
50 151 Loss: 31447.046147
100 151 Loss: 30987.901783
150 151 Loss: 30654.894829
Epoch: 1, train loss: 30654.8948, training accuracy 0.1914, dev set accuracy 0.2178
0 151 Loss: 29796.480469
50 151 Loss: 29337.005936
100 151 Loss: 29126.071492
150 151 Loss: 28930.423518
Epoch: 2, train loss: 28930.4235, training accuracy 0.2505, dev set accuracy 0.2667
0 151 Loss: 27343.324219
50 151 Loss: 27696.852903
100 151 Loss: 27659.622099
150 151 Loss: 27491.809797
Epoch: 3, train loss: 27491.8098, training accuracy 0.2886, dev set accuracy 0.3222
0 151 Loss: 26245.335938
50 151 Loss: 26934.046684
100 151 Loss: 26872.761719
150 151 Loss: 26730.880510
Epoch: 4, train loss: 26730.8805, training accuracy 0.3438, dev set accuracy 0.3289
0 151 Loss: 26574.585938
50 151 Loss: 26244.717984
100 151 Loss: 26186.863513
150 151 Loss: 26125.866398
Epoch: 5, train loss: 26125.8664, training accuracy 0.3857, dev set accuracy 0.3244
0 151 Loss: 26495.984375
50 151 Loss: 25459.733035
100 

In [23]:
#best dev set accuracy 
m2.model.best_dev_accuracy

0.4444444444444444

# Baseline Model:
### Only using the labeled data for supervised learning

In [None]:
#use the same dev set as M2
dev_ind_b = dev_ind
#training data is the same 1050 labeled data as M2
train_ind_b = (np.setdiff1d(labeled_ind, dev_ind))

In [25]:
len(dev_ind_b),len(train_ind_b)

(450, 1050)

In [None]:
#prepare dataloader for pytroch
images_train_b = [data[i] for i in train_ind_b]
trainset_b = TorchInputData(images_train_b, labels[train_ind_b])
train_loader_b = tud.DataLoader(trainset_b, batch_size=50, shuffle=True)

In [None]:
images_dev_b = [data[i] for i in dev_ind_b]
devset_b = TorchInputData(images_dev_b, labels[dev_ind_b])
dev_loader_b = tud.DataLoader(devset_b, batch_size=50, shuffle=True)

In [6]:
from baseline_cnn_stl10_cuda import BaselineConvNet

In [7]:
baseline = BaselineConvNet(96, path = "baseline_stl10_100epoch_5.pth")

### Structure of the baseline model: same as the classifier in the M2 model

In [8]:
baseline.model

TwoLayerConvNet(
  (conv1_drop): Dropout2d(p=0.2, inplace=False)
  (conv1): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2_drop): Dropout2d(p=0.2, inplace=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn2): BatchNorm2d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (avgpool): AvgPool2d(kernel_size=4, stride=4, padding=0)
  (fc1): Linear(in_features=2880, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [None]:
baseline.fit(train_loader_b,dev_loader_b)
baseline.train(100)

In [31]:
#best dev set accuracy 
baseline.model.best_dev_accuracy

0.3977777777777778

# Test Set Performance: 
### The M2 model successfully increase the accuracy of the classifier

In [9]:
#The testset dataloader
testset_loader = torch.utils.data.DataLoader(train, batch_size=1000, shuffle=True, num_workers=0)

In [10]:
conf_b, acc_b = baseline.test(testset_loader,path = "baseline_stl10_100epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 1796/5000 (36%)



In [16]:
conf, acc = m2.test(testset_loader,path = "m2_stl10_0.1_50epoch_5.pth",return_confusion_matrix = True)


Test set: Accuracy: 2001/5000 (40%)

