In [46]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import SVHN, MNIST
import torchvision
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
from torchvision import transforms
from torch import optim


feature_model = nn.Sequential(nn.Conv2d(1, 32, 5), nn.BatchNorm2d(32), nn.ReLU(), 
                      nn.MaxPool2d(2, stride=2),
                      nn.Conv2d(32, 64, 5), nn.BatchNorm2d(64), nn.ReLU(),
                      nn.Conv2d(64, 64, 3), nn.BatchNorm2d(64), nn.ReLU(),
                      nn.AdaptiveAvgPool2d((1,1)), nn.Flatten())

# For (b)-(c) add the task heads on top of the feature_model
# Note this model can adapt the averaging to the size so inputs of 32x32 and 28x28 both work
# Grayscale conversion for SVHN, you may use transforms.Grayscale(num_output_channels=1) found in torchvision


In [47]:
# Initialize train and test datasets
train_set = MNIST('../data',
                           train=True,
                           download=True,
                           transform=transforms.ToTensor())
test_set = MNIST('../data',
                          train=False,
                          download=True,
                          transform=transforms.ToTensor())

# Initialize train and test data loaders
train_mnist = torch.utils.data.DataLoader(train_set,
                                           batch_size=256,
                                           shuffle=True,
                                           drop_last=True)
test_mnist = torch.utils.data.DataLoader(test_set,
                                          batch_size=256,
                                          shuffle=True,
                                          drop_last=True)

In [48]:
train_set1 = SVHN(root='..data/', download=True, split='train',
                   transform=torchvision.transforms.Compose([transforms.ToTensor()]))

test_set1 = SVHN(root='..data/', download=True, split='test',
                   transform=torchvision.transforms.Compose([transforms.ToTensor()]))

train_svhn = torch.utils.data.DataLoader(train_set1, 
                                           batch_size=256,
                                           shuffle=True,
                                           drop_last=True)
test_svhn = torch.utils.data.DataLoader(test_set1,
                                          batch_size=256,
                                          shuffle=True,
                                          drop_last=True)

Using downloaded and verified file: ..data/train_32x32.mat
Using downloaded and verified file: ..data/test_32x32.mat


In [49]:
device = 'cuda'

## MNIST model
model_mnist = nn.Sequential(nn.Conv2d(1, 32, 5), nn.BatchNorm2d(32), nn.ReLU(), #For (e) use SVHN nn.Conv2d(3,32,5)
                      nn.MaxPool2d(2, stride=2),
                      nn.Conv2d(32, 64, 5), nn.BatchNorm2d(64), nn.ReLU(),
                      nn.Conv2d(64, 64, 3), nn.BatchNorm2d(64), nn.ReLU(),
                      nn.AdaptiveAvgPool2d((1,1)), nn.Flatten()).to(device)

from torch.utils import model_zoo
from collections import OrderedDict
### SVHN model, we will download one that is already trained to clasify svhn digits
model_urls = {
    'svhn': 'http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/svhn-f564f3d8.pth',
}

class SVHN(nn.Module):
    def __init__(self, features, n_channel, num_classes):
        super(SVHN, self).__init__()
        assert isinstance(features, nn.Sequential), type(features)
        self.features = features

        #We won't use this classifier
        self.classifier = nn.Sequential(
            nn.Linear(n_channel, num_classes)
        )
        print(self.features)
        print(self.classifier)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for i, v in enumerate(cfg):
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            padding = v[1] if isinstance(v, tuple) else 1
            out_channels = v[0] if isinstance(v, tuple) else v
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=padding)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(out_channels, affine=False), nn.ReLU(), nn.Dropout(0.3)]
            else:
                layers += [conv2d, nn.ReLU(), nn.Dropout(0.3)]
            in_channels = out_channels
    return nn.Sequential(*layers)

def svhn_model(n_channel, pretrained=None):
    cfg = [n_channel, n_channel, 'M', 2*n_channel, 2*n_channel, 'M', 4*n_channel, 4*n_channel, 'M', (8*n_channel, 0), 'M']
    layers = make_layers(cfg, batch_norm=True)
    model = SVHN(layers, n_channel=8*n_channel, num_classes=10)
    if pretrained is not None:
        m = model_zoo.load_url(model_urls['svhn'])
        state_dict = m.state_dict() if isinstance(m, nn.Module) else m
        assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict)
        model.load_state_dict(state_dict)

    return model


base_svhn = svhn_model(n_channel=32,pretrained=True).features
svhn_to_joint = nn.Linear(256,64)

model_svhn = nn.Sequential(base_svhn, nn.AdaptiveAvgPool2d((1,1)), nn.Flatten(), svhn_to_joint).to(device)


#Transformation for SVHN data, you need to use this normalization for the pre-trained model to work properly 
transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                ])

AssertionError: Torch not compiled with CUDA enabled

In [None]:
optimizer = optim.Adam(list(model_mnist.parameters()) + list(svhn_to_joint.parameters()), lr=1e-5) # you may experiment with different learning rates
model_svhn.eval() #IMPORTANT: BEFORE running set to eval even for training to avoid dropout, we want to keep this fixed except the final layer, otherwise training will need to be much longer

In [45]:
device = 'cuda'
#s_labels is a vector with batch_size labels (0-9) for a minibatch of SVHN digits
#m_labels is a vector with batch_size labels (0-9) for a minibatch of MNIST digits 
iterations = 1000
train_losses = [] # use to append the avg loss for each minibatch 
triplet_loss = nn.TripletMarginLoss(margin=0.2)
for i in range(iterations):
  svhn_iter = iter(train_svhn)
  for m_data, m_labels in train_mnist:
    s_data, s_labels = next(svhn_iter)
    s_data
    s_labels
    m_data
    m_labels

    label_set_m = range(0,10)
    label_to_indices_m = {label: np.where(s_labels.cpu().numpy() == label)[0] for label in label_set_m}
    idx_pos_m = []
    idx_neg_m = []
    for lab in m_labels:
      positive_index_m = np.random.choice(label_to_indices_m[lab.item()])
      negative_label_m = np.random.choice(list(set(label_set_m) - set([lab.item()])))
      negative_index_m = np.random.choice(label_to_indices_m[negative_label_m])

      idx_pos_m.append(positive_index_m)
      idx_neg_m.append(negative_index_m)
    #idx_pos and idx_neg can now can now be used to index the MNIST data minibatch to give positives and negatives

    label_set_s = range(0,10)
    label_to_indices_s = {label: np.where(m_labels.cpu().numpy() == label)[0] for label in label_set_s}
    idx_pos_s = []
    idx_neg_s = []
    for lab in s_labels:
      positive_index_s = np.random.choice(label_to_indices_s[lab.item()])
      negative_label_s = np.random.choice(list(set(label_set_s) - set([lab.item()])))
      negative_index_s = np.random.choice(label_to_indices_s[negative_label_s])

      idx_pos_s.append(positive_index_s)
      idx_neg_s.append(negative_index_s)

    output1 = model_mnist(m_data)
    output2 = model_svhn(s_data)

    loss = triplet_loss(output1, model_svhn(s_data[idx_pos_m]), model_svhn(s_data[idx_neg_m])) + triplet_loss(output2, model_mnist(m_data[idx_pos_s]), model_mnist(m_data[idx_neg_s]))
    train_losses.append(loss.item())
    optimizer.zero_grad()

    loss.backward()
    optimizer.step()
    print(loss)
  

tensor(0.7286, grad_fn=<AddBackward0>)
tensor(0.7918, grad_fn=<AddBackward0>)
tensor(0.7297, grad_fn=<AddBackward0>)
tensor(0.8100, grad_fn=<AddBackward0>)
tensor(0.7213, grad_fn=<AddBackward0>)
tensor(0.7424, grad_fn=<AddBackward0>)
tensor(0.6885, grad_fn=<AddBackward0>)
tensor(0.8171, grad_fn=<AddBackward0>)
tensor(0.7682, grad_fn=<AddBackward0>)
tensor(0.7129, grad_fn=<AddBackward0>)
tensor(0.7642, grad_fn=<AddBackward0>)
tensor(0.7573, grad_fn=<AddBackward0>)
tensor(0.7544, grad_fn=<AddBackward0>)
tensor(0.7109, grad_fn=<AddBackward0>)
tensor(0.7202, grad_fn=<AddBackward0>)
tensor(0.7303, grad_fn=<AddBackward0>)
tensor(0.6877, grad_fn=<AddBackward0>)
tensor(0.6700, grad_fn=<AddBackward0>)
tensor(0.7086, grad_fn=<AddBackward0>)
tensor(0.6675, grad_fn=<AddBackward0>)
tensor(0.6926, grad_fn=<AddBackward0>)
tensor(0.6707, grad_fn=<AddBackward0>)
tensor(0.6863, grad_fn=<AddBackward0>)
tensor(0.6630, grad_fn=<AddBackward0>)
tensor(0.7164, grad_fn=<AddBackward0>)
tensor(0.6780, grad_fn=<A

KeyboardInterrupt: 