In [80]:
import numpy as np
import pandas as pd
import os
import time

import torch
import torch.nn as nn
import torch.optim as optim

from torchvision import transforms 
from torchvision.datasets import MNIST

from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
from sklearn.cluster import KMeans

from alexnet import alexnet

In [48]:
def main():
    # fix random seeds
    seed = 31
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    
    #device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    #
    n_epoch = 200
    lr = 1e-5
    momentum =0.9
    wd = 1e-5
    

    #==== CNN setting ====
    model = alexnet(1, 3)
    
    fd = int(model.top_layer.weight.size()[1])
    model.top_layer = None
    model.to(device)
    torch.backends.cudnn.benchmark = True
    
    
    #==== optimizer ====
    optimizer = torch.optim.SGD(
        model.parameters(),
        lr=lr,
        momentum=momentum,
        weight_decay=wd)
    
    #==== loss function ====
    criterion = nn.CrossEntropyLoss()
    
    #==== preprocessing of image data =====
    normalize = transforms.Normalize(mean=[0.485], std=[0.229])
    image_transform = transforms.Compose([transforms.Resize(224),
                            transforms.ToTensor(),
                            normalize])
    
    #==== load the data ====
    dataloader_feature = mnist_dataloader(batch_size=128, transform=image_transform, psuedo=None, shuffle=False)
    

    # training convnet with DeepCluster
    for epoch in range(n_epoch):
        
        #==== CNN setting ====
        # remove head
        model.top_layer = None
        model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])

        # get the features for the whole dataset
        features = compute_features(dataloader_feature, model)
        
        #==== cluster the features =====
        #preprocessing PCA -> L2 normalization
        pca = PCA(n_components=256)
        features = pca.fit_transform(features)
        features = normalize(features,norm='l2')
        
        # clustering use Kmeans 
        clustering = Kmeans(n_cluster=3)
        psuedo_label = clunstering.fit_predict(features)
        
        
        #==== dataloader ====
        loader = mnist_dataloader(batch_size=128, transform=image_transform, psuedo=psuedo_label, shuffle=True)
        
        # set last fully connected layer
        mlp = list(model.classifier.children())
        mlp.append(nn.ReLU(inplace=True).cuda())
        model.classifier = nn.Sequential(*mlp)
        model.top_layer = nn.Linear(fd, n_class)
        model.top_layer.weight.data.normal_(0, 0.01)
        model.top_layer.bias.data.zero_()
        model.top_layer.cuda()
        
        # train network with clusters as pseudo-labels
        model, loss_avg = train_model(loader, model, criterion, optimizer)
        
        

In [68]:
#==== preprocessing of image data =====
normalize = transforms.Normalize(mean=[0.485], std=[0.229])
image_transform = transforms.Compose([transforms.Resize(224),
                            transforms.ToTensor(),
                            normalize])

In [52]:
def mnist_dataloader(batch_size, transform, psuedo=None, shuffle=False):
    mnist = MNIST("MNIST", train=True, download=True, transform=transform)
    if psuedo != None:
        mnist.targets = torch.tensor(psuedo, dtype=torch.int64)
    batch_size = 128
    dataloader = torch.utils.data.DataLoader(mnist, batch_size=batch_size, shuffle=shuffle)
    
    return dataloader

In [77]:
def compute_features(dataloader, model):
    
    #device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    #
    N = len(dataloader.dataset)
    batch_size = dataloader.batch_size
    
    
    model.eval()
    
    # discard the label information in the dataloader
    for i, (image, _) in enumerate(dataloader):
        image = image.to(device)
        aux = model(image).data.cpu().numpy()

        if i == 0:
            features = np.zeros((N, aux.shape[1]), dtype='float32')

        aux = aux.astype('float32')
        
        if i < len(dataloader) - 1:
            features[i * batch_size: (i + 1) * batch_size] = aux
        else:
            # special treatment for final batch
            features[i * batch_size:] = aux

    return features

In [78]:
def train_model(loader, model, criterion, optimizer):
    
    #device
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    batch_size = loader.batch_size
    loss_avg = 0
    
    # switch to train mode
    model.train()

    # create an optimizer for the last fc layer
    optimizer_tl = torch.optim.SGD(model.top_layer.parameters(), lr=1e-5)

    for i, (image, label) in enumerate(loader):
        
        image = image.to(device)
        output = model(input_var)
        
        loss = criterion(output, label)
        
        # compute gradient and do SGD step
        optimizer.zero_grad()
        optimizer_tl.zero_grad()
        loss.backward()
        optimizer.step()
        optimizer_tl.step()
        
        loss_avg += loss.item()
        
    loss_avg = loss_avg/batch_size
    
    print(loss_avg)
    
    return model, loss_avg


In [79]:
start = time.time()
main()
end = time.time()

print(end-start)

KeyboardInterrupt: 