In [None]:
from __future__ import print_function
import argparse
import os
import random
import torch
import torch.nn as nn
import torch.nn.parallel

import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
import torchvision.datasets as datasets

import torchvision.transforms as transforms
import torchvision.utils as vutils
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import plotting
#import cv2
from torch.nn.utils import weight_norm
import scipy.misc
#%matplotlib inline

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
# to prevent opencv from initializing CUDA in workers
#torch.randn(8).cuda()
#os.environ['CUDA_VISIBLE_DEVICES'] = ''

def gallery(array, ncols=3):
    nindex, height, width, intensity = array.shape
    nrows = nindex//ncols
    assert nindex == nrows*ncols
    # want result.shape = (height*nrows, width*ncols, intensity)
    result = (array.reshape((nrows, ncols, height, width, intensity))
              .swapaxes(1,2)
              .reshape((height*nrows, width*ncols, intensity)))
    return result

def save_samples():
    img_bhwc = netG(noise).data.cpu()[0:100].view(100,1,28,28).expand(100,3,28,28)
    img_bhwc = img_bhwc.permute(0,2,3,1).numpy()
    array = img_bhwc.copy()
    result = gallery(array,10)*.5+.5
    scipy.misc.imsave('outfile_noent_1.jpg', result)


In [None]:
count = 400
learning_rate = .003
batch_size = 100
unlabeled_weight = 1
seed = 1
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)


In [None]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('.', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       #transforms.Normalize((0.1307,), (0.3081,)  )
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('.', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       #transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)

trainx = train_loader.dataset.train_data.float().view(-1,28*28)/255
trainy = train_loader.dataset.train_labels
#trainx.add_(-.5).mul_(2);

trainx_unl = trainx.clone()
trainx_unl2 = trainx.clone()



testx = test_loader.dataset.test_data.float().view(-1,28*28)/255
testy = test_loader.dataset.test_labels
#testx.add_(-.5).mul_(2);


In [None]:
noise_dim = (batch_size, 100)

class _netG(nn.Module):
    def __init__(self):
        super(_netG, self).__init__()
        self.main = nn.Sequential(
            # input is Z, going into a convolution
            nn.Linear(100, 500, bias=False),
            nn.BatchNorm1d(500),
            nn.ReLU(True),
            # state size. (ngf*8) x 4 x 4
            nn.Linear(500, 500, bias=False),
            nn.BatchNorm1d(500),
            nn.ReLU(True),
            # state size. (ngf*2) x 8 x 8
            nn.Linear(500, 784, bias=False),
            nn.Sigmoid()
            # state size. (nc) x 32 x 32
        )

    def forward(self, input):
        output = self.main(input)
        return output

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1 or classname.find('Linear')!=-1:
        m.weight.data.normal_(0.0, 0.1)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)
    
    
netG = _netG()
netG.cuda()
netG.apply(weights_init);


In [None]:
class DynamicGNoiseConv(nn.Module):
    def __init__(self, shape, std=0.05):
        super().__init__()
        self.noise = Variable(torch.zeros(1,1,shape,shape).cuda())
        self.std   = std
        
    def forward(self, x):
        if not training: return x
        self.noise.data.normal_(0, std=self.std)      
        return x + self.noise.expand_as(x)

class DynamicGNoise(nn.Module):
    def __init__(self, shape, std=0.1):
        super().__init__()
        self.noise = Variable(torch.zeros(batch_size,shape).cuda())
        self.std   = std
        
    def forward(self, x):
        if not training: return x
        self.noise.data.normal_(0, std=self.std)      
        return x + self.noise
    
    
ins = [784,1000,500,250,250,250,10]
sigma = [.3,.5,.5,.5,.5,.5]
  

class _netD(nn.Module):
    def __init__(self):
        super(_netD, self).__init__()

        self.linear = [None]*6
        self.noise = [None]*6
        for i in range(0,6):
            self.noise[i] = DynamicGNoise(ins[i], sigma[i])
            self.linear[i] = nn.Linear(ins[i],ins[i+1])
            self.linear[i].weight.data.normal_(0.0, 0.1)
            self.linear[i].bias.data.fill_(0)
            self.linear[i] = weight_norm(self.linear[i])
        self.linear = nn.ModuleList(self.linear)
        self.noise = nn.ModuleList(self.noise)
        
        
        
    def forward(self, x):
        for i in range(0,6):
            x = self.noise[i](x)
            x = self.linear[i](x)
            if(i!=5):
                x = F.relu(x)
            if(i==3):
                features = x.clone()
        return x, features

netD = _netD()
netD.cuda()
#netD.apply(weights_init);

optimizerC = optim.Adam(netD.parameters(), lr=learning_rate, betas=(.5, .999))
optimizerG = optim.Adam(netG.parameters(), lr=learning_rate, betas=(.5, .999))
noise = Variable(torch.randn(batch_size,100).cuda())
loss_fn = nn.CrossEntropyLoss()
loss_fn.cuda()
phi_all = torch.rand(3000,10).cuda()
phi_all /= phi_all.sum(1).unsqueeze(1).expand_as(phi_all)
classMat = torch.eye(10).cuda()

In [None]:
def initialize(x_unl):
    global avg, saved_g
    netD.eval()
    x = Variable(x_unl.cuda())
    for i in range(0,5):
        x_new = netD.linear[i](x).data
        m = x_new.mean(0)
        inv_stdv = 1/(x_new**2).mean(0).sqrt().view(-1,1)
        netD.linear[i].weight_g.data.copy_(netD.linear[i].weight_g.data*inv_stdv)
        netD.linear[i].bias.data.copy_(-m*inv_stdv.squeeze())
        x = netD.linear[i](x)
        x = F.relu(x)

    x_new = netD.linear[5](x).data
    m = x_new.mean(0)
    inv_stdv = 1/(x_new**2).mean(0).sqrt().view(-1,1)
    netD.linear[5].weight_g.data.copy_(netD.linear[5].weight_g.data*inv_stdv)
    netD.linear[5].bias.data.copy_(-m*inv_stdv.squeeze())

    avg = [None]*18
    i = 0
    for param in netD.parameters():
        avg[i] = param.data.clone()
        i += 1

    saved_g = [None]*5
    for i in range(0,5):
        saved_g[i] = netD.linear[i].weight_g.data.clone()


In [None]:
def Estep(phi):
    if(epoch==0):
        px_z = phi/phi.sum(0).expand_as(phi)
    else:
        px_z = phi/phi_all.sum(0).expand_as(phi)
    _, inds = px_z.max(1)
    z = classMat.index_select(0, inds.squeeze())
    #z = px_z**2
    #z /= z.sum(1, keepdim=True).expand_as(z)
    return z
    

In [None]:
def train_classifier(x_lab, labels, x_unl):
    global phi_all
    netD.train()
    
    optimizerC.zero_grad()
    labels = Variable(labels.cuda())
    x_lab = Variable(x_lab.cuda())
    output_before_softmax_lab = netD(x_lab)[0]
    loss_lab = loss_fn(output_before_softmax_lab, labels)
    
    x_unl = Variable(x_unl.cuda())
    output_before_softmax_unl = netD(x_unl)[0]
    output_after_softmax_unl = F.softmax(output_before_softmax_unl)
    phi_all = torch.cat((output_after_softmax_unl.data, phi_all),0)[0:3000]
    z = Estep(output_after_softmax_unl.data)
    z = Variable(z)
    log_phi = torch.log(output_after_softmax_unl+1e-5)
    
    exponent = torch.mm(z, log_phi.t())
    exponent2 = exponent - torch.diag(exponent).view(batch_size,1).expand_as(exponent)
    temp = exponent2.exp()
    px_z_inv = temp.sum(1)
    loss_unl = px_z_inv.log().mean()

    
    noise.data.normal_(0,1)
    gen_data = netG(noise)
    output_before_softmax_gen = netD(gen_data.detach())[0]
    output_after_softmax_gen = F.softmax(output_before_softmax_gen)
    loss_gen = (torch.log(output_after_softmax_gen+1e-5)).mean(1).mean()*-1

    loss = loss_lab + loss_unl + loss_gen
    loss.backward()
    
    optimizerC.step()
    train_err = (output_before_softmax_lab.data.max(1)[1]==labels.data).sum()/batch_size
    return train_err, loss_lab.data[0], loss_unl.data[0], loss_gen.data[0]
    

def test_classifier(x_test, labels):
    netD.eval()
    x_test = Variable(x_test.cuda())
    output_before_softmax = netD(x_test)[0]
    test_err = (output_before_softmax.data.max(1)[1]==labels).sum()/batch_size
    return test_err

In [None]:
def train_generator(x_unl):
    netD.train()

    optimizerG.zero_grad()
    x_unl = Variable(x_unl.cuda())
    noise.data.normal_(0,1)
    gen_data = netG(noise)
    output_unl = netD(x_unl)[1]
    output_gen = netD(gen_data)[1]
    m1 = output_unl.mean(0)
    m2 = output_gen.mean(0)
    loss_gen = ((m1-m2)**2).mean()
    loss_gen.backward()
    optimizerG.step()
    
    return loss_gen.data[0]

In [None]:
#select labeled data
shuffle = torch.randperm(trainx.size(0))
trainx = trainx.index_select(0,shuffle)
trainy = trainy.index_select(0,shuffle)

txs = torch.zeros(100,784)
tys = torch.zeros(100)
for i in range(0,10):
    inds = trainy.eq(i).nonzero()[0:10]
    txs[i*10:(i+1)*10] = trainx.index_select(0, inds.squeeze())
    tys[i*10:(i+1)*10] = trainy.index_select(0, inds.squeeze())
    


In [None]:
import time
import scipy.misc

train_err = torch.zeros(150)
loss_lab = torch.zeros(150)
loss_unl = torch.zeros(150)
loss_gen = torch.zeros(150)
lossG_gen = torch.zeros(150)
accuracy = torch.zeros(150)

scale = 1
for epoch in range(0,150):
    scale = scale*.99
    
    begin = time.time()
    lr = .003
    training = True

    for param_group in optimizerC.param_groups:
        param_group['lr'] = lr
        
    for param_group in optimizerG.param_groups:
        param_group['lr'] = lr   
    
        
    trainx = torch.zeros(int(np.ceil(trainx_unl.size(0)/float(txs.size(0))))*txs.size(0),784)
    trainy = torch.zeros(int(np.ceil(trainx_unl.size(0)/float(txs.size(0))))*txs.size(0))

    for t in range(int(np.ceil(trainx_unl.size(0)/float(txs.size(0))))):
        inds = torch.randperm(txs.size(0))
        trainx[t*txs.size(0):(t+1)*txs.size(0)] = txs.index_select(0,inds)
        trainy[t*txs.size(0):(t+1)*txs.size(0)] = tys.index_select(0,inds)

    trainx_unl = trainx_unl[torch.randperm(trainx_unl.size(0))]
    trainx_unl2 = trainx_unl2[torch.randperm(trainx_unl2.size(0))]

    if epoch==0:
        print(trainx.shape)
        initialize(trainx[:500]) # data based initialization

    
    
    numBatches = 0
    for i in range(0, trainx_unl.size(0), batch_size):
        numBatches +=1
        x_lab = trainx[i:i+batch_size]
        labels = trainy[i:i+batch_size].long()
        x_unl = trainx_unl[i:i+batch_size]
        te, ll, lu, lg = train_classifier(x_lab, labels, x_unl)
        train_err[epoch] += te
        loss_lab[epoch] += ll
        loss_unl[epoch] += lu
        loss_gen[epoch] += lg
        
        x_unl = trainx_unl2[i:i+batch_size]
        lgg = train_generator(x_unl)
        lossG_gen[epoch] += lgg
        j=0
        for param in netD.parameters():
            avg[j] = avg[j] + .0001*(param.data - avg[j])
            j += 1 
        for j in range(0,5):
            netD.linear[j].weight_g.data.copy_(saved_g[j])

    


    j=0
    backup = [None]*18
    for param in netD.parameters():
        backup[j] = param.data.clone()
        param.data.copy_(avg[j])
        j += 1 

    #Computation of test accuracy
    posterior = torch.zeros(testx.size(0), 10)
    netD.eval()
    training = False
    for i in range(0, testx.size(0), batch_size):
        real_cpu = testx[i:i+batch_size].clone()
        input = Variable(real_cpu.cuda())
        output = netD(input)[0]
        posterior[i:i+batch_size] = output.data.cpu().clone() #Estep(output.data)


    j=0
    for param in netD.parameters():
        param.data.copy_(backup[j])
        j += 1 
    

    _, indices_fake = posterior.cpu().max(1)
    indices_fake = indices_fake.squeeze().float()
    indices_real = testy
    intersect = torch.zeros(10,10)
    for i in range(0,10):
        for j in range(0,10):
            intersect[i][j] = ((indices_fake==i)*(indices_real==j)).sum()

    accuracy[epoch] = intersect.max(1)[0].sum()/intersect.sum()
    print("epoch:%d, loss_lab:%.4f, loss_gen:%.4f, loss_unl:%.4f, lossG_gen:%.4f, train_err:%.4f, test_err:%.4f" % (epoch, loss_lab[epoch]/numBatches, loss_gen[epoch]/numBatches, 
          loss_unl[epoch]/numBatches, lossG_gen[epoch]/numBatches, train_err[epoch]/numBatches, accuracy[epoch]))

    save_samples()