In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torchvision.utils import save_image
from torch.utils.data import Dataset

In [2]:
batch_size = 100
num_threads = 8

inp_dim = 784
hid_dim = 120
out_dim = 10

hid_dim1 = 256
hid_dim2 = 128
hid_dim3 = 32

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [3]:
def data_loader(txt_file):
    if txt_file==None:
        train_data = torchvision.datasets.MNIST(root='./data', 
                                                train=True, 
                                                transform=transforms.ToTensor(),
                                                download=True)
        test_data = torchvision.datasets.MNIST(root='./data', 
                                               train=False, 
                                               transform=transforms.ToTensor(), 
                                               download=True)
    else:
        train_data = MNIST_variation(txt_file='./data/'+txt_file+'_train.amat')
        test_data = MNIST_variation(txt_file='./data/'+txt_file+'_test.amat')
    train_loader = torch.utils.data.DataLoader(train_data, 
                                               batch_size=batch_size, 
                                               shuffle=True, 
                                               num_workers=num_threads)
    test_loader = torch.utils.data.DataLoader(test_data, 
                                              batch_size=batch_size, 
                                              shuffle=False, 
                                              num_workers=num_threads)   
    return train_loader, test_loader

In [4]:
class MNIST_variation(Dataset):
    def __init__(self, txt_file):
        raw_data = np.loadtxt(txt_file)
        self.dataset = raw_data
        x = torch.from_numpy(np.array(self.dataset[:, :-1], dtype=np.float32))
        self.x = x.view(-1,28,28)
        self.y = torch.from_numpy(np.array(self.dataset[:, -1], dtype=np.int_))
        
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.dataset)


In [5]:
def output_plot(outputs, nrow=10):
    outputs = outputs.view(-1, 1, 28, 28)
    save_image(outputs, filename='plots/outputs_%d.png' % (int(portion*100)), 
               nrow=nrow, padding=1, scale_each=True)

def filter_plot(model, nrow=12):
    weights = model.W.detach().clone()
    filters = torch.t(weights)
    filters = filters.view(hid_dim, 1, 28, 28)
    save_image(filters, filename='plots/filters_%d.png' % (int(portion*100)),
               nrow=nrow, padding=1, normalize=True, scale_each=False)

def plot_loss_curve(portion, train_list, test_list):
    plt.plot(train_list, 'r-', label='train loss')
    plt.plot(test_list, 'r--', label='test loss')
    plt.legend()
    plt.savefig('plots/loss_curves_%d.png' % (int(portion*100)))
    plt.close()

def plot_accur_curve(portion, accur_list):
    plt.plot(accur_list, 'b-', label='accuracy')
    plt.legend()
    plt.savefig('plots/accuracy_%d.png' % (int(portion*100)))
    plt.close()
    

In [6]:
class DenoisingAE(nn.Module):
    def __init__(self, mode='train', supervised=False, inp_dim=inp_dim, hid_dim=hid_dim, out_dim=out_dim):
        super(DenoisingAE, self).__init__()
        self.mode = mode
        self.supervised = supervised
        self.inp_dim = inp_dim
        self.hid_dim = hid_dim
        self.out_dim = out_dim
        
        self.b = torch.nn.Parameter(torch.zeros([self.hid_dim], requires_grad=True))
        self.c = torch.nn.Parameter(torch.zeros([self.inp_dim], requires_grad=True))
        self.W = torch.nn.Parameter(torch.empty((self.inp_dim, self.hid_dim), requires_grad=True))
        torch.nn.init.uniform_(self.W, a=-1/self.inp_dim, b=1/self.inp_dim)
        self.layer = nn.Linear(self.hid_dim, self.out_dim)
        
        self.sigmoid = nn.Sigmoid()

    def encoderNet(self, x):
        hid = self.sigmoid(self.b + torch.matmul(x, self.W))
        return hid
        
    def decoderNet(self, hid):
        out = self.sigmoid(self.c + torch.matmul(hid, torch.t(self.W)))
        return out
        
    def corruption(self, inputs):
        new_batch_size = inputs.size()[0]
        base = np.array([])
        for i in range(new_batch_size):
            tmp = np.random.choice([0.0, 1.0], size=self.inp_dim, p=[portion, 1-portion])
            base = np.append(base, tmp)
        base = np.reshape(base, [new_batch_size, self.inp_dim])
        base = torch.from_numpy(base).float().to(device)
        corr_inputs = inputs * base
        return corr_inputs
    
    def forward(self, x):
        if self.mode == 'train':
            corr_inputs = self.corruption(x)
            hid = self.encoderNet(corr_inputs)
        elif self.mode == 'test':
            hid = self.encoderNet(x)
        if self.supervised:
            y = self.layer(hid)
        elif not self.supervised:
            y = self.decoderNet(hid)
        return y

In [7]:
def DAE_train(model, supervised=False, txt_file=None):
    train_loader, test_loader = data_loader(txt_file)
    
    if supervised:
        criterion = nn.CrossEntropyLoss()
    elif not supervised:
        criterion = nn.BCELoss()
        
    #for name, param in model.named_parameters():
    #    if param.requires_grad:
    #        print(name)
    
    optimizer = optim.Adam(model.parameters(), lr)

    train_list, test_list = [], []
    
    for epoch in range(n_epoch):
        model.train()
        model.mode = 'train'
        train_loss = 0.0
        cnt = 0
        for x, y in train_loader:
            cnt += 1
            inputs, labels = x, y
            new_batch_size = inputs.size()[0]
            inputs = inputs.view(new_batch_size, -1).to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            if supervised:
                loss = criterion(outputs, labels)
            elif not supervised:
                loss = criterion(outputs, inputs)
            train_loss += loss * new_batch_size / batch_size
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        train_loss = train_loss / cnt
        train_list.append(train_loss)
        
        with torch.no_grad():
            model.eval()
            model.mode = 'test'
            test_loss = 0.0
            cnt = 0
            for x, y in test_loader:
                cnt += 1
                inputs, labels = x, y
                new_batch_size = x.size()[0]
                inputs = inputs.view(new_batch_size, -1).to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                if supervised:
                    loss = criterion(outputs, labels)
                elif not supervised:
                    loss = criterion(outputs, inputs)
                test_loss += loss * new_batch_size / batch_size
            
            test_loss = test_loss / cnt
            test_list.append(test_loss)
            
        torch.save(model.state_dict(), 'models/DenoisingAE_%d.pt' % (int(portion*100)))
        model.mode = 'train'
        
        if (epoch+1) % 1 == 0:
            print('[Epoch %d] train_loss: %.3f, test_loss: %.3f' % (epoch+1, train_loss, test_loss))
    
    if not supervised:
        output_plot(outputs)

In [None]:
# 1st experiment : filter plot in unsupervised learning setting.
if __name__ == '__main__':
    print(device)
    
    portion = 0.4
    lr = 1e-3
    n_epoch = 5
    
    model = DenoisingAE().to(device)
    model.load_state_dict(torch.load('models/DenoisingAE_%d.pt' % (int(portion*100))))
    DAE_train(model=model)
    filter_plot(model)
   

In [8]:
class SdA_3(nn.Module):
    def __init__(self, step):
        super(SdA_3, self).__init__()
        self.step = step
        if self.step == 1:
            bool1, bool2, bool3 = False, False, False
        elif self.step == 2:
            bool1, bool2, bool3 = True, False, False
        elif self.step == 3:
            bool1, bool2, bool3 = True, True, False
        elif self.step == 4:
            bool1, bool2, bool3 = True, True, True
        
        self.layer1 = DenoisingAE(supervised=bool1, inp_dim=inp_dim, hid_dim=hid_dim1, out_dim=hid_dim2)
        self.layer2 = DenoisingAE(supervised=bool2, inp_dim=hid_dim1, hid_dim=hid_dim2, out_dim=hid_dim3)
        self.layer3 = DenoisingAE(supervised=bool3, inp_dim=hid_dim2, hid_dim=hid_dim3, out_dim=out_dim)
        
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        if self.step == 1:
            hid1 = self.layer1(x)
            y = hid1
        elif self.step == 2:
            hid1 = self.layer1.encoderNet(x)
            hid2 = self.layer2(hid1)
            y = self.layer1.decoderNet(hid2)
        elif self.step == 3:
            hid1 = self.layer1.encoderNet(x)
            hid2 = self.layer2.encoderNet(hid1)
            hid3 = self.layer3(hid2)
            y_ = self.layer2.decoderNet(hid3)
            y = self.layer1.decoderNet(y_)
        elif self.step == 4:
            hid1 = self.layer1.encoderNet(x)
            hid2 = self.layer2.encoderNet(hid1)
            out = self.layer3(hid2)
            y = out
        return y

In [31]:
def SdA_train(model, supervised=False, txt_file=None):
    train_loader, test_loader = data_loader(txt_file)
    
    if supervised:
        criterion = nn.CrossEntropyLoss()
    elif not supervised:
        criterion = nn.BCELoss()
        
    #for name, param in model.named_parameters():
    #    if param.requires_grad:
    #        print(name)
    
    optimizer = optim.Adam(model.parameters(), lr)

    train_list, test_list, accur_list = [], [], []
    
    for epoch in range(n_epoch):
        model.train()
        model.mode = 'train'
        train_loss = 0.0
        cnt = 0
        for x, y in train_loader:
            cnt += 1
            inputs, labels = x, y
            new_batch_size = inputs.size()[0]
            inputs = inputs.view(new_batch_size, -1).to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            if supervised:
                loss = criterion(outputs, labels)
            elif not supervised:
                loss = criterion(outputs, inputs)
            train_loss += loss * new_batch_size / batch_size
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        train_loss = train_loss / cnt
        train_list.append(train_loss)
        
        with torch.no_grad():
            model.eval()
            model.mode = 'test'
            test_loss = 0.0
            cnt = 0
            length = 0
            accur = torch.tensor(0.0)
            for x, y in test_loader:
                cnt += 1
                length += len(x)
                inputs, labels = x, y
                new_batch_size = x.size()[0]
                inputs = inputs.view(new_batch_size, -1).to(device)
                labels = labels.to(device)
                outputs = model(inputs)
                if supervised:
                    loss = criterion(outputs, labels)
                    accur += sum(torch.argmax(outputs, dim=1) == labels)
                elif not supervised:
                    loss = criterion(outputs, inputs)
                test_loss += loss * new_batch_size / batch_size
            
            model.mode = 'train'
            accuracy = accur.item() / length * 100
            test_loss = test_loss / cnt
            test_list.append(test_loss)
            accur_list.append(accuracy)
            
            
        torch.save(model.state_dict(), 'models/SdA_3_%d.pt' % (int(portion*100)))
        if (epoch+1) % 1 == 0:
            print('[Epoch %d] train_loss: %.3f, test_loss: %.3f' % (epoch+1, train_loss, test_loss))
    
    if supervised:
        print('accuracy is', max(accur_list),'%')
        plot_loss_curve(portion, train_list, test_list)
        plot_accur_curve(portion, accur_list)


In [37]:
def param_fix(layer):
    for param in layer.parameters():
        param.requires_grad = False

def pretrain(txt_file=None):
    for i in range(3):
        step = i+1
        model = SdA_3(step=step).to(device)
        if step == 1:
            param_fix(model.layer2)
            param_fix(model.layer3)
        elif step == 2:
            param_fix(model.layer1)
            param_fix(model.layer3)
        elif step == 3:
            param_fix(model.layer1)
            param_fix(model.layer2)
        SdA_train(model=model, supervised=False, txt_file=txt_file)
    print('pretraining ends')
                
def fine_tune(txt_file=None):
    model = SdA_3(step=4).to(device)
    model.load_state_dict(torch.load('models/SdA_3_%d.pt' % (int(portion*100))))
    SdA_train(model=model, supervised=True, txt_file=txt_file)
    print('fine tuning ends')
    

In [56]:
# 2nd experiment : find error rate when applying SdA-3 on MNIST variations
if __name__ == '__main__':
    print(device)

    txt_file = 'rot-bg-img'
    n_epoch = 10
    lr = 1e-2
    portion = 0.25
    pretrain(txt_file)
    
    n_epoch = 100
    lr = 1e-3
    fine_tune(txt_file)

cuda:1
[Epoch 1] train_loss: 0.653, test_loss: 0.601
[Epoch 2] train_loss: 0.587, test_loss: 0.581
[Epoch 3] train_loss: 0.580, test_loss: 0.579
[Epoch 4] train_loss: 0.579, test_loss: 0.579
[Epoch 5] train_loss: 0.578, test_loss: 0.578
[Epoch 6] train_loss: 0.578, test_loss: 0.578
[Epoch 7] train_loss: 0.578, test_loss: 0.578
[Epoch 8] train_loss: 0.578, test_loss: 0.578
[Epoch 9] train_loss: 0.577, test_loss: 0.578
[Epoch 10] train_loss: 0.577, test_loss: 0.578
[Epoch 1] train_loss: 0.693, test_loss: 0.693
[Epoch 2] train_loss: 0.693, test_loss: 0.693
[Epoch 3] train_loss: 0.693, test_loss: 0.693
[Epoch 4] train_loss: 0.693, test_loss: 0.693
[Epoch 5] train_loss: 0.693, test_loss: 0.693
[Epoch 6] train_loss: 0.693, test_loss: 0.693
[Epoch 7] train_loss: 0.693, test_loss: 0.693
[Epoch 8] train_loss: 0.693, test_loss: 0.693
[Epoch 9] train_loss: 0.693, test_loss: 0.693
[Epoch 10] train_loss: 0.693, test_loss: 0.693
[Epoch 1] train_loss: 0.693, test_loss: 0.693
[Epoch 2] train_loss: 0.6