# Semi-Supervised Learning of MNIST CNN
Jared Nielsen



# **Results**


### MNIST
`model_B` achieves 90.6% accuracy on `data_A` when `model_A` has 89.9% accuracy on `data_A`.  
`model_B` achieves 96.6% accuracy on `data_A` when `model_A` has 97.9% accuracy on `data_A`.

### Fashion-MNIST
`model_B` achieves 81.1% accuracy on `data_A` when `model_A` has 83.5% accuracy on `data_A`.  
`model_B` achieves 85.5% accuracy on `data_A` when `model_A` has 87.6% accuracy on `data_A`.

# **Notes**

### Terminology
`acc*` - The training accuracy when `model_A` is trained on all the labeled data.  
`acc1` - `model_A` on `data_A`  
`acc2` - `model_A` on `data_B`  
`acc3` - `model_B` on `data_A`  
`acc4` - `model_B` on `data_B`  
`acc5` - `model_B` on `data_B_hat`

In [11]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
from mag.experiment import Experiment
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, TensorDataset
import torchvision.transforms as transforms

from tqdm import tqdm, tqdm_notebook, trange
from time import sleep
import ray

# import mnist_cnn
# from mnist_cnn import Net, model_A_path, model_B_path

import cifar_cnn
from cifar_cnn import Net, model_A_path, model_B_path

# tqdm_disable = True

### Load `dataset_A` and `dataset_B`

In [7]:
batch_size_train = 64
batch_size_test = 1000
learning_rate = 1e-3 #0.01
momentum = 0.5
log_interval = 10

if torch.cuda.is_available():
    print('using cuda')
    device = torch.device('cuda')
else:
    print('using cpu')
    device = torch.device('cpu')

random_seed = 1
# torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

# root_dir = "../data/fashion-mnist/"
# mnist_train = torchvision.datasets.FashionMNIST(root_dir, train=True, download=True,
#                              transform=torchvision.transforms.Compose([
#                                torchvision.transforms.ToTensor(),
#                                torchvision.transforms.Normalize(
#                                  (0.1307,), (0.3081,))
#                              ]))

root_dir = "../data/cifar10/"
data_train = torchvision.datasets.CIFAR10(root_dir, train=True, download=True,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                             ]))
len_data = len(data_train)

using cuda
Files already downloaded and verified


### Train `model_A` on `dataset_A`

In [27]:
def train(model_A, optimizer_A, epoch, dataloader):
    model_A.train()
    for batch_idx, (data, target) in enumerate(dataloader):
        data, target = data.to(device), target.to(device)
        optimizer_A.zero_grad()
        output = model_A(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer_A.step()
        if batch_idx % log_interval == 0:
            torch.save(model_A.state_dict(), model_A_path)

def test(network, dataloader):
    network.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in dataloader:
            data, target = data.to(device), target.to(device)
            output = network(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
        test_loss /= len(dataloader.dataset)
        acc = 100 * correct.item() / len(dataloader.dataset)
        print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
            test_loss, correct, len(dataloader.dataset), acc))
        
    return acc, test_loss

def train_model_A(model_A, opt_A, dataloader, n_epochs, test=False):  
    acc1s, acc2s = [], []
    for i_epoch in range(n_epochs):
        train(model_A=model_A, optimizer_A=opt_A, epoch=i_epoch,
              dataloader=dataloader)
        
def train_with_transfer_labels(model_A, model_B, optimizer_B, epoch, dataloader):
    model_A.eval()
    model_B.train()
    for batch_idx, (data, target) in enumerate(dataloader):
        data, target = data.to(device), target.to(device)
        target_hat = model_A(data)
        target_hat = torch.argmax(target_hat, dim=1)
        optimizer_B.zero_grad()
        output = model_B(data)
        loss_hat = F.nll_loss(output, target_hat)
        loss_hat.backward()
        optimizer_B.step()
        
        if batch_idx % log_interval == 0:
            torch.save(model_B.state_dict(), model_B_path)
            
def train_model_B(model_A, model_B, opt_B, dataloader, n_epochs):
    pbar = tqdm_notebook(range(n_epochs), desc='model_A', total=n_epochs)
    for i_epoch in pbar:
        train_with_transfer_labels(model_A=model_A, model_B=model_B, optimizer_B=opt_B,
                                  epoch=i_epoch, dataloader=dataloader)

# @ray.remote
def get_reverse_accuracy(pct, n_epochs):
    train_size = int(len_data * pct)
    test_size = len_data - train_size
    dataset_A, dataset_B = random_split(data_train, [train_size, test_size])
    print("data_A: {}, data_B: {}".format(len(dataset_A), len(dataset_B)))
    loader_A, loader_B = [DataLoader(dataset, batch_size=batch_size_train, shuffle=True) 
                          for dataset in (dataset_A, dataset_B)]
    
    model_A = Net().to(device)
    opt_A = optim.Adam(model_A.parameters(), lr=learning_rate)
    
    acc1s, acc2s = [], []
    for i_epoch in range(n_epochs):
        train(model_A=model_A, optimizer_A=opt_A, epoch=i_epoch,
              dataloader=loader_A)
        acc1, loss = test(network=model_A, dataloader=loader_A)
        acc2, loss = test(network=model_A, dataloader=loader_B)
        acc1s.append(acc1)
        acc2s.append(acc2)
        
#     train_model_A(model_A=model_A, opt_A=opt_A, dataloader=loader_A, n_epochs=n_epochs)
    
    model_B = Net().to(device)
    opt_B = optim.Adam(model_B.parameters(), lr=learning_rate)
    
    acc3s, acc4s = [], []
    pbar = tqdm_notebook(range(n_epochs), desc='model_B', total=n_epochs)
    for i_epoch in pbar:
        train_with_transfer_labels(model_A=model_A, model_B=model_B, optimizer_B=opt_B,
                                  epoch=i_epoch, dataloader=loader_B)
        acc3, loss = test(network=model_B, dataloader=loader_A)
        acc4, loss = test(network=model_B, dataloader=loader_B)
        acc3s.append(acc3)
        acc4s.append(acc4)
        
#     train_model_B(model_A=model_A, model_B=model_B, opt_B=opt_B,
#                  dataloader=loader_B, n_epochs=n_epochs)
    
#     acc1, loss = test(network=model_A, dataloader=loader_A)
#     acc2, loss = test(network=model_A, dataloader=loader_B)
#     acc3, loss = test(network=model_B, dataloader=loader_A)
#     acc4, loss = test(network=model_B, dataloader=loader_B)
    return acc1s, acc2s, acc3s, acc4s


print(get_reverse_accuracy(pct=0.50, n_epochs=2))
print("done")

data_A: 25000, data_B: 25000

Test set: Avg. loss: 1.6385, Accuracy: 9665/25000 (38.66%)


Test set: Avg. loss: 1.6503, Accuracy: 9419/25000 (37.68%)


Test set: Avg. loss: 1.5032, Accuracy: 11190/25000 (44.76%)


Test set: Avg. loss: 1.5331, Accuracy: 10918/25000 (43.67%)



HBox(children=(IntProgress(value=0, description='model_B', max=2), HTML(value='')))


Test set: Avg. loss: 2.2205, Accuracy: 9966/25000 (39.86%)


Test set: Avg. loss: 2.2300, Accuracy: 9876/25000 (39.50%)


Test set: Avg. loss: 2.4566, Accuracy: 10206/25000 (40.82%)


Test set: Avg. loss: 2.4851, Accuracy: 10137/25000 (40.55%)


([38.66, 44.76], [37.676, 43.672], [39.864, 40.824], [39.504, 40.548])
done


In [29]:
experiments_dir = './experiments_cifar_v6'

n_epochs = 200

min_samples = 100
min_pct = min_samples / len(data_train)
max_pct = 0.5
print("min_pct: {:.3f}".format(min_pct))

# pcts = np.logspace(np.log10(min_pct), np.log10(max_pct), num=10, base=10)
pcts = [0.05]
print("pcts: {}".format(pcts))

for pct in pcts:
    print("experimenting with pct={}".format(pct))
    config = {
        'n_epochs': n_epochs,
        'pct': pct
    }
    with Experiment(config=config, experiments_dir=experiments_dir) as experiment:
        config = experiment.config
        acc1, acc2, acc3, acc4 = get_reverse_accuracy(pct=config.pct, n_epochs=config.n_epochs)

        experiment.register_result("acc1s", acc1)
        experiment.register_result("acc2s", acc2)
        experiment.register_result("acc3s", acc3)
        experiment.register_result("acc4s", acc4)

min_pct: 0.002
pcts: [0.05]
experimenting with pct=0.05
data_A: 2500, data_B: 47500

Test set: Avg. loss: 2.1359, Accuracy: 541/2500 (21.64%)


Test set: Avg. loss: 2.1299, Accuracy: 10000/47500 (21.05%)


Test set: Avg. loss: 1.9545, Accuracy: 738/2500 (29.52%)


Test set: Avg. loss: 1.9567, Accuracy: 13265/47500 (27.93%)


Test set: Avg. loss: 1.8169, Accuracy: 805/2500 (32.20%)


Test set: Avg. loss: 1.8416, Accuracy: 14186/47500 (29.87%)


Test set: Avg. loss: 1.7334, Accuracy: 898/2500 (35.92%)


Test set: Avg. loss: 1.7741, Accuracy: 16256/47500 (34.22%)


Test set: Avg. loss: 1.7061, Accuracy: 891/2500 (35.64%)


Test set: Avg. loss: 1.7515, Accuracy: 15947/47500 (33.57%)


Test set: Avg. loss: 1.6289, Accuracy: 982/2500 (39.28%)


Test set: Avg. loss: 1.7071, Accuracy: 17057/47500 (35.91%)


Test set: Avg. loss: 1.6102, Accuracy: 1005/2500 (40.20%)


Test set: Avg. loss: 1.7013, Accuracy: 17234/47500 (36.28%)


Test set: Avg. loss: 1.5347, Accuracy: 1091/2500 (43.64%)


Test se

HBox(children=(IntProgress(value=0, description='model_B', max=200), HTML(value='')))


Test set: Avg. loss: 4.7899, Accuracy: 20571/47500 (43.31%)


Test set: Avg. loss: 1.5225, Accuracy: 1156/2500 (46.24%)


Test set: Avg. loss: 1.6210, Accuracy: 20280/47500 (42.69%)


Test set: Avg. loss: 1.4364, Accuracy: 1237/2500 (49.48%)


Test set: Avg. loss: 1.6031, Accuracy: 21043/47500 (44.30%)


Test set: Avg. loss: 1.3395, Accuracy: 1281/2500 (51.24%)


Test set: Avg. loss: 1.5666, Accuracy: 21448/47500 (45.15%)


Test set: Avg. loss: 1.3508, Accuracy: 1283/2500 (51.32%)


Test set: Avg. loss: 1.6255, Accuracy: 21289/47500 (44.82%)


Test set: Avg. loss: 1.3016, Accuracy: 1332/2500 (53.28%)


Test set: Avg. loss: 1.6187, Accuracy: 21523/47500 (45.31%)


Test set: Avg. loss: 1.2829, Accuracy: 1354/2500 (54.16%)


Test set: Avg. loss: 1.6508, Accuracy: 21492/47500 (45.25%)


Test set: Avg. loss: 1.2710, Accuracy: 1368/2500 (54.72%)


Test set: Avg. loss: 1.6963, Accuracy: 21233/47500 (44.70%)


Test set: Avg. loss: 1.2216, Accuracy: 1386/2500 (55.44%)


Test set: Avg. loss: 1.