# Deep Clustering
[Source code](https://github.com/facebookresearch/deepcluster)



In [0]:
from google.colab import drive
mydrive = '/content/drive/'
drive.mount(mydrive)

Mounted at /content/drive/


In [0]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243


Change to CUDA 8.0

In [0]:
# !wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
# !dpkg -i cuda-repo-ubuntu1604-8-0-local-ga2_8.0.61-1_amd64-deb
# !apt-get update
# !apt-get install cuda=8.0.61-1

In [0]:
!pip install mkl
!pip install faiss-gpu==1.6.1

Collecting faiss-gpu==1.6.1
[?25l  Downloading https://files.pythonhosted.org/packages/0a/8d/d630c7ec7ad93feed005994c9849843d33bed08cf621ffb74fe9f81a45e2/faiss_gpu-1.6.1-cp36-cp36m-manylinux2010_x86_64.whl (41.0MB)
[K     |████████████████████████████████| 41.0MB 74kB/s 
Installing collected packages: faiss-gpu
Successfully installed faiss-gpu-1.6.1


# Import libraries

In [0]:
import faiss
import torch.backends.cudnn as cudnn

In [0]:
import cv2, numpy as np
import os, glob
import matplotlib.pyplot as plt
import time
import math

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [0]:
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms, utils
import torchvision.datasets as datasets

In [0]:
"""
from google.colab import files
print('Please upload kaggle.json.')
files.upload() #this will prompt you to update the json

!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
!kaggle datasets download -d moltean/fruits
!unzip fruits.zip
"""

Please upload kaggle.json.


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: fruits-360/Training/Tomato 3/112_100.jpg  
  inflating: fruits-360/Training/Tomato 3/113_100.jpg  
  inflating: fruits-360/Training/Tomato 3/114_100.jpg  
  inflating: fruits-360/Training/Tomato 3/115_100.jpg  
  inflating: fruits-360/Training/Tomato 3/116_100.jpg  
  inflating: fruits-360/Training/Tomato 3/117_100.jpg  
  inflating: fruits-360/Training/Tomato 3/119_100.jpg  
  inflating: fruits-360/Training/Tomato 3/120_100.jpg  
  inflating: fruits-360/Training/Tomato 3/121_100.jpg  
  inflating: fruits-360/Training/Tomato 3/122_100.jpg  
  inflating: fruits-360/Training/Tomato 3/123_100.jpg  
  inflating: fruits-360/Training/Tomato 3/124_100.jpg  
  inflating: fruits-360/Training/Tomato 3/125_100.jpg  
  inflating: fruits-360/Training/Tomato 3/126_100.jpg  
  inflating: fruits-360/Training/Tomato 3/127_100.jpg  
  inflating: fruits-360/Training/Tomato 3/128_100.jpg  
  inflating: fruits-360/Training/Tomato

# Load data

In [0]:
mydrive = '/content/drive/'
img_path = '/content/fruits-360/Training'

# img_path =  'My Drive/Project/0425guava-classified/'
test_path =  '/content/fruits-360/Test'

batch_size = 32

## preprocessing of data & dataloader

In [0]:
"""

tra = transforms.Compose([
                transforms.Resize((224, 224),interpolation=3),
                transforms.ToTensor() # Turn to tensor to do some calculation
            ])
print(os.path.join(mydrive,img_path))
guava_dataset = datasets.ImageFolder(os.path.join(img_path),
                                     transform=tra)
print('len(guava_dataset) = ',len(guava_dataset))
guava_dataloader = DataLoader(guava_dataset, batch_size ,shuffle=True)
print('len(guava_dataloader) = ',len(guava_dataloader))

(mean,std) = compute_mean_and_std(guava_dataloader)

"""
# fruits-360 mean and std
mean = [0.6838, 0.5784, 0.5035]
std = [0.2577, 0.3210, 0.3654]

normalize = transforms.Normalize( mean = mean, std = std )
print('mean = {0} \n, std = {1} \n' .format(mean, std))
# ============================================================================
tra = transforms.Compose([
                transforms.Resize((224, 224),interpolation=3),
                transforms.ToTensor(),
                normalize
            ])
guava_dataset = datasets.ImageFolder(os.path.join(img_path),
                                     transform=tra)
guava_dataloader =  DataLoader(guava_dataset, batch_size ,shuffle=True)

print(len(os.listdir(img_path)))

mean = [0.6838, 0.5784, 0.5035] 
, std = [0.2577, 0.321, 0.3654] 

131


# (*)Parameter settings

In [0]:
#######################################

arch = 'VGG16'
# arch = 'alexnet'

num_classes = len(os.listdir(img_path))


# how many epochs of training between two consecutive reassignments of clusters
reassign = 100 # 300
# number of data loading workers 
# num_workers = 0 to solve TypeError: Caught TypeError in DataLoader worker process 0
workers = 24

start_epoch = 1
epochs = 200

# criterion_ = 'MSELoss'
criterion_ = 'CrossEntropyLoss'

opt = 'SGD'
# opt = 'Adam'
opt_lr = 0.00001    #0.05
opt_momentum = 0.9
# opt_wd:weight decay (pow)
opt_wd = -3       #-5

verbose = True

# path to checkpoint
# resume = os.path.join(mydrive,'My Drive/Project/checkpoint', 'checkpoint.pth.tar')
checkpath = 'checkpoint_0602'
resume =  os.path.join(mydrive,'My Drive/Project/checkpoint_0602', 'checkpoint.pth.tar')

mode = 'train'
#######################################

# New Section

# Class & Functions

## Class 

### AverageMeter

In [0]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

### Kmeans (deepcluster)

In [0]:
class Kmeans(object):
    def __init__(self, k):
        self.k = k

    def cluster(self, data, verbose=False):
        """Performs k-means clustering.
            Args:
                x_data (np.array N * dim): data to cluster
        """
        end = time.time()

        # PCA-reducing, whitening and L2-normalization
        xb = preprocess_features(data) #

        # cluster the data
        I, loss = run_kmeans(xb, self.k, verbose)
        self.images_lists = [[] for i in range(self.k)]
        for i in range(len(data)):
            self.images_lists[I[i]].append(i)

        if verbose:
            print('k-means time: {0:.0f} s'.format(time.time() - end))

        return loss

### UnifLabelSampler

In [0]:
from torch.utils.data.sampler import Sampler
class UnifLabelSampler(Sampler):
    """Samples elements uniformely accross pseudolabels.
        Args:
            N (int): size of returned iterator.
            images_lists: dict of key (target), value (list of data with this target)
    """

    def __init__(self, N, images_lists):
        self.N = N
        self.images_lists = images_lists
        self.indexes = self.generate_indexes_epoch()

    def generate_indexes_epoch(self):
        nmb_non_empty_clusters = 0
        for i in range(len(self.images_lists)):
            if len(self.images_lists[i]) != 0:
                nmb_non_empty_clusters += 1

        size_per_pseudolabel = int(self.N / nmb_non_empty_clusters) + 1
        res = np.array([])

        for i in range(len(self.images_lists)):
            # skip empty clusters
            if len(self.images_lists[i]) == 0:
                continue
            indexes = np.random.choice(
                self.images_lists[i],
                size_per_pseudolabel,
                replace=(len(self.images_lists[i]) <= size_per_pseudolabel)
            )
            res = np.concatenate((res, indexes))

        np.random.shuffle(res)
        res = list(res.astype('int'))
        if len(res) >= self.N:
            return res[:self.N]
        res += res[: (self.N - len(res))]
        return res

    def __iter__(self):
        return iter(self.indexes)

    def __len__(self):
        return len(self.indexes)

### ReassignedDataset

In [0]:
import torch.utils.data as data
class ReassignedDataset(data.Dataset):
    """A dataset where the new images labels are given in argument.
    Args:
        image_indexes (list): list of data indexes
        pseudolabels (list): list of labels for each data
        dataset (list): list of tuples with paths to images
        transform (callable, optional): a function/transform that takes in
                                        an PIL image and returns a
                                        transformed version
    """

    def __init__(self, image_indexes, pseudolabels, dataset, transform=None):
        self.imgs = self.make_dataset(image_indexes, pseudolabels, dataset)
        self.transform = transform

    def make_dataset(self, image_indexes, pseudolabels, dataset):
        label_to_idx = {label: idx for idx, label in enumerate(set(pseudolabels))}
        images = []
        for j, idx in enumerate(image_indexes):
            path = dataset[idx][0]
            pseudolabel = label_to_idx[pseudolabels[j]]
            images.append((path, pseudolabel))
        return images

    def __getitem__(self, index):
        """
        Args:
            index (int): index of data
        Returns:
            tuple: (image, pseudolabel) where pseudolabel is the cluster of index datapoint
        """
        path, pseudolabel = self.imgs[index]
        img = pil_loader(path)
        if self.transform is not None:
            img = self.transform(img)
        return img, pseudolabel

    def __len__(self):
        return len(self.imgs)


## functions

### compute_mean_and_std (not used)
Unless you upload new data

In [0]:
def compute_mean_and_std(loader):
    mean = 0.
    std = 0.
    for images, _ in loader:
        batch_samples = images.size(0) # batch size (the last batch can have smaller size!)
        images = images.view(batch_samples, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
    mean /= len(loader.dataset)
    std /= len(loader.dataset)
    return mean, std

### preprocess_features

In [0]:
def preprocess_features(npdata, pca=256):
    """Preprocess an array of features.
    Args:
        npdata (np.array N * ndim): features to preprocess
        pca (int): dim of output
    Returns:
        np.array of dim N * pca: data PCA-reduced, whitened and L2-normalized
    """
    _, ndim = npdata.shape
    npdata =  npdata.astype('float32')

    # Apply PCA-whitening with Faiss
    mat = faiss.PCAMatrix (ndim, pca, eigen_power=-0.5)
    mat.train(npdata)
    assert mat.is_trained
    npdata = mat.apply_py(npdata)

    # L2 normalization
    row_sums = np.linalg.norm(npdata, axis=1)
    npdata = npdata / row_sums[:, np.newaxis]

    return npdata

### compute_features

In [0]:
def compute_features(dataloader, model, N, batch_size):
    if arch == 'VGG16':
        s = 7
    elif arch == 'alexnet':
        s = 6
    print('Compute features')
    model.eval()
    feature_model = nn.Sequential(
        *list(model.features.children()),
        nn.AdaptiveAvgPool2d(output_size=(s, s)),
        Flatten(),
        *list(model.classifier.children())[:-1]
    )
    feature_model.eval()
    # print('f',feature_model)
    # discard the label information (catch by _) in the dataloader 
    with torch.no_grad(): #?
        for i, (input_tensor, _) in enumerate(dataloader):
            if torch.cuda.is_available():
                input_ = torch.autograd.Variable(input_tensor.cuda()) 
            else:
                input_ = torch.autograd.Variable(input_tensor)
            # print('input_',input_)
            # aux: the feature of a certain data
            aux = feature_model(input_).data.cpu().numpy()
            # print('aux',aux)
            # print(aux.shape)
            # initialize
            if i == 0:
                features = np.zeros((N, aux.shape[1]), dtype='float32')

            aux = aux.astype('float32')
            if i < len(dataloader) - 1:
                features[i * batch_size: (i + 1) * batch_size] = aux
            else:
                features[i * batch_size:] = aux
            # print(aux.shape) # 1,4096

    return features

### run_kmeans

In [0]:
def run_kmeans(x, nmb_clusters, verbose=False):
    """Runs kmeans on 1 GPU.
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)

    # Change faiss seed at each k-means so that the randomly picked
    # initialization centroids do not correspond to the same feature ids
    # from an epoch to another.
    clus.seed = np.random.randint(1234)

    clus.niter = 20 # clustering iterations
    clus.max_points_per_centroid = 10000000
    res = faiss.StandardGpuResources()
    flat_config = faiss.GpuIndexFlatConfig()
    flat_config.useFloat16 = False
    flat_config.device = 0
    if torch.cuda.is_available():
        torch.cuda.empty_cache() ##?
    index = faiss.GpuIndexFlatL2(res, d, flat_config)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
    losses = faiss.vector_to_array(clus.obj)

    if verbose:
        print('k-means loss evolution: {0}'.format(losses))

    return [int(n[0]) for n in I], losses[-1]

### cluster_assign

In [0]:
def cluster_assign(images_lists, dataset, transforms):
    """Creates a dataset from clustering, with clusters as labels.
    Args:
        images_lists (list of list): for each cluster, the list of image indexes
                                    belonging to this cluster
        dataset (list): initial dataset
    Returns:
        ReassignedDataset(torch.utils.data.Dataset): a dataset with clusters as
                                                     labels
    """
    assert images_lists is not None
    pseudolabels = []
    image_indexes = []
    for cluster, images in enumerate(images_lists):
        image_indexes.extend(images)
        pseudolabels.extend([cluster] * len(images))
    """
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    t = transforms.Compose([transforms.RandomResizedCrop(224),
                            transforms.RandomHorizontalFlip(),
                            transforms.ToTensor(),
                            normalize])
    """
    return ReassignedDataset(image_indexes, pseudolabels, dataset, transforms)

### pil_loader

In [0]:
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

def pil_loader(path):
    """Loads an image.
    Args:
        path (string): path to image file
    Returns:
        Image
    """
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

## Model

### VGG16

In [0]:
class VGG(nn.Module):

    def __init__(self, features, num_classes, sobel):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            # nn.BatchNorm1d(4096), ##
            nn.Dropout(0.5),
            nn.Linear(4096, 4096), 
            nn.ReLU(inplace=True)
        )
        # top layer: the last output layer
        self.top_layer = nn.Sequential( ########
            nn.Linear(4096, num_classes)  #,
            #nn.Softmax()
        )
        # initialize
        self._initialize_weights()
        """
        model.top_layer[:-1].weight.data.normal_(0, 0.01)
        model.top_layer[:-1].bias.data.zero_()
        """
        if sobel:
            grayscale = nn.Conv2d(3, 1, kernel_size=1, stride=1, padding=0)
            grayscale.weight.data.fill_(1.0 / 3.0)
            grayscale.bias.data.zero_()
            sobel_filter = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1)
            sobel_filter.weight.data[0,0].copy_(
                torch.FloatTensor([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
            )
            sobel_filter.weight.data[1,0].copy_(
                torch.FloatTensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
            )
            sobel_filter.bias.data.zero_()
            self.sobel = nn.Sequential(grayscale, sobel_filter)
            for p in self.sobel.parameters():
                p.requires_grad = False
        else:
            self.sobel = None

    def forward(self, x):
        if self.sobel:
            x = self.sobel(x)
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        if self.top_layer:
            x = self.top_layer(x)
        return x

    def _initialize_weights(self):
        for y,m in enumerate(self.modules()):
            if isinstance(m, nn.Conv2d):
                #print(y)
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                for i in range(m.out_channels):
                    m.weight.data[i].normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


make feature layer

In [0]:
def make_layers(input_dim, batch_norm):
    layers = []
    in_channels = input_dim
    cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
    for v in cfg:
        if v == 'M':
            layers = layers + [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers = layers + [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers = layers + [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [0]:
def vgg16(sobel=False, batch_norm=True, out=num_classes):
    dim = 2 + int(not sobel)
    model = VGG(make_layers(dim, batch_norm), out, sobel)
    return model

### alexnet

In [0]:
class AlexNet(nn.Module):
    def __init__(self, features, num_classes, sobel):
        super(AlexNet, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(nn.Dropout(0.5),
                            nn.Linear(256 * 6 * 6, 4096),
                            nn.ReLU(inplace=True),
                            nn.Dropout(0.5),
                            nn.Linear(4096, 4096),
                            nn.ReLU(inplace=True))

        self.top_layer = nn.Linear(4096, num_classes)
        self._initialize_weights()

        if sobel:
            grayscale = nn.Conv2d(3, 1, kernel_size=1, stride=1, padding=0)
            grayscale.weight.data.fill_(1.0 / 3.0)
            grayscale.bias.data.zero_()
            sobel_filter = nn.Conv2d(1, 2, kernel_size=3, stride=1, padding=1)
            sobel_filter.weight.data[0, 0].copy_(
                torch.FloatTensor([[1, 0, -1], [2, 0, -2], [1, 0, -1]])
            )
            sobel_filter.weight.data[1, 0].copy_(
                torch.FloatTensor([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
            )
            sobel_filter.bias.data.zero_()
            self.sobel = nn.Sequential(grayscale, sobel_filter)
            for p in self.sobel.parameters():
                p.requires_grad = False
        else:
            self.sobel = None

    def forward(self, x):
        if self.sobel:
            x = self.sobel(x)
        x = self.features(x)
        x = x.view(x.size(0), 256 * 6 * 6)
        x = self.classifier(x)
        if self.top_layer:
            x = self.top_layer(x)
        return x

    def _initialize_weights(self):
        for y, m in enumerate(self.modules()):
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                for i in range(m.out_channels):
                    m.weight.data[i].normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


In [0]:
def make_layers_features(cfg, input_dim, bn):
    layers = []
    in_channels = input_dim
    for v in cfg:
        if v == 'M':
            layers = layers + [nn.MaxPool2d(kernel_size=3, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v[0], kernel_size=v[1], stride=v[2], padding=v[3])
            if bn:
                layers = layers + [conv2d, nn.BatchNorm2d(v[0]), nn.ReLU(inplace=True)]
            else:
                layers = layers + [conv2d, nn.ReLU(inplace=True)]
            in_channels = v[0]
    return nn.Sequential(*layers)

In [0]:
CFG = {
    '2012': [(96, 11, 4, 2), 'M', (256, 5, 1, 2), 'M', (384, 3, 1, 1), (384, 3, 1, 1), (256, 3, 1, 1), 'M']
}

def alexnet(sobel=False, batch_norm=True, out=num_classes):
    dim = 2 + int(not sobel)
    model = AlexNet(make_layers_features(CFG['2012'], dim, bn=batch_norm), out, sobel)
    return model

### Flatten
To avoid size mismatch of sequential()

In [0]:
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__()
        
    def forward(self, x):
        return x.view(x.size(0), -1)

## *Train function

In [0]:
def train(loader, model, crit, opt, epoch, opt_lr, opt_wd, verbose = True):
    
    batch_time = AverageMeter()
    losses = AverageMeter()
    data_time = AverageMeter()
    forward_time = AverageMeter()
    backward_time = AverageMeter()

    # switch to train mode
    model.train()

    # print(model)
    # create an optimizer for the last fc layer
    optimizer_tl = torch.optim.SGD(
        model.top_layer.parameters(),
        lr=opt_lr,
        weight_decay=opt_wd**10,
    )

    end = time.time()

    print('len(loader): ', len(loader))
    for i, (input_tensor, target) in enumerate(loader):
        data_time.update(time.time() - end)

        if torch.cuda.is_available():
            target = target.cuda(async=True)
        if torch.cuda.is_available():
            input_var = torch.autograd.Variable(input_tensor.cuda())
        else:
            input_var = torch.autograd.Variable(input_tensor)
        target_var = torch.autograd.Variable(target)
        
        output = model(input_var)
        # output: predict result
        # target_var: (pesudo) ground truth
        # assert 0 not in output, 'Gotcha!'

        loss = crit(output, target_var)
        
        # print(output)
        # record loss
        losses.update(loss.data, input_tensor.size(0))

        # compute gradient and do SGD step(
        # with  torch.autograd.detect_anomaly():
            
        opt.zero_grad()
        optimizer_tl.zero_grad()
            
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm, norm_type=2)
        # torch.nn.utils.clip_grad_value_(model.parameters(), 1)
        opt.step()
        optimizer_tl.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if verbose and (i % 200) == 0:
            
            print ('out:')
            print(output)
            print(output.shape)
            print ('cluster_var:')
            print(target_var)
            print(target_var.shape)
            
            print('Progress report:\t'
                  'Epoch: [{0}][{1}/{2}]\t'
                  'Time: {batch_time.val:.3f} (avg: {batch_time.avg:.3f})\t'
                  'Data: {data_time.val:.3f} (avg: {data_time.avg:.3f})\t'
                  'Loss: {loss.val:.4f} (avg: {loss.avg:.4f})'
                  .format(epoch, i, len(loader), batch_time=batch_time,
                          data_time=data_time, loss=losses))
        # ???
    return losses.avg


# Main 

## Model, optimizer, loss function, and cluster

cudnn.benchmark = True [link](https://zhuanlan.zhihu.com/p/73711222)

In [0]:
import torchvision.models as models

def MyModel():
    model = model = models.vgg16(pretrained=True)

    model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])
    model.top_layer = nn.Linear(4096, num_classes)
    print(model)


    for layer in model.features:
        layer.trainable = False

    # unfreeze
    for layer in model.features[-10:]:
        layer.trainable = True
        print(layer)


    model.eval()

    return model

In [0]:

 # CNN
if arch == 'VGG16':
    """
    model = vgg16(sobel=False, batch_norm=True, out=num_classes)
    """
    model = MyModel()

elif arch == 'alexnet':
    model = alexnet(sobel=False, batch_norm=True, out=num_classes)

if torch.cuda.is_available():
    model = model.cuda()
cudnn.benchmark = True 

# create optimizer
# If you see NaN's in loss try gradient clipping and data normalisation.
# Normalising data is a must (i.e normalize input data such that mean = 0 and variance =1)
assert opt is not None, 'Please select SGD or Adam'
if opt == 'SGD':
    optimizer = torch.optim.SGD(
        filter(lambda x: x.requires_grad, model.parameters()),
        lr=opt_lr,
        momentum=opt_momentum,
        weight_decay=10**opt_wd
    )
elif opt == 'Adam':
    optimizer = torch.optim.Adam(
        filter(lambda x: x.requires_grad, model.parameters()),
        lr=opt_lr, 
        betas=(opt_momentum, 0.999),
        weight_decay=10**opt_wd
    )
# define loss function
if torch.cuda.is_available():
    if criterion_ == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss().cuda()
    elif criterion_ == 'MSELoss':
        criterion = nn.MSELoss().cuda()
else:
    if criterion_ == 'CrossEntropyLoss':
        criterion = nn.CrossEntropyLoss()
    elif criterion_ == 'MSELoss':
        criterion = nn.MSELoss()

# specify clustering algorithm
deepcluster = Kmeans(num_classes)
print(model)
print(opt)
print('================================================')
feature_model = nn.Sequential(
    *list(model.features.children()),
    nn.AdaptiveAvgPool2d(output_size=(7, 7)),
    Flatten(),
    *list(model.classifier.children())[:-1]
    )
# print(feature_model)
print('================================================')


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

## Resume from a checkpoint
Load last start_epoch, model_state, and resume path. 

In [0]:
# remove last layer to load weight properly
# because we don't have parameters of last layer    

# model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])
# print(model)

if resume:
    if os.path.isfile(resume):

        layer = model.top_layer
        model.top_layer = None

        print("=> loading checkpoint '{}'".format(resume))
        checkpoint = torch.load(resume)
        start_epoch = checkpoint['epoch']
        # remove top_layer parameters from checkpoint
        for key in list(checkpoint['state_dict']):
            if 'top_layer' in key:
                del checkpoint['state_dict'][key]
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(resume, checkpoint['epoch']))
        
        model.top_layer = layer
    else:
        print("=> no checkpoint found at '{}'".format(resume))
else:
    print("=> Let's go. ")
# add ReLU back
# mlp = list(model.classifier.children())
# add another ReLU
# mlp.append(nn.ReLU(inplace=True).cuda())
# model.classifier = nn.Sequential(*mlp)
# print(model)

=> no checkpoint found at '/content/drive/My Drive/Project/checkpoint_0602/checkpoint.pth.tar'


In [0]:
# creating checkpoint repo file

exp_check = os.path.join(mydrive,'My Drive/Project', checkpath)
if not os.path.isdir(exp_check):
    os.makedirs(exp_check)

## *Main loop

In [0]:
if mode == 'train':
    print('train')
    for epoch in range(start_epoch, epochs):

        t0 = time.time()
        """
        # remove head, the last output layer
        model.top_layer = None
        # remove ReLU 
        model.classifier = nn.Sequential(*list(model.classifier.children())[:-1])
        """
        # print('model',model)
        # Step1: get the features for the whole dataset
        print('Step1: get the features for the whole dataset')
        # print('guava_dataset:',guava_dataset) 
        features = compute_features(guava_dataloader, model, len(guava_dataset), batch_size)
        print('features:',features)

        # Step2: cluster the features
        print('Step2: cluster the features')
        clustering_loss = deepcluster.cluster(features, verbose)

        # Step3: assign pseudo-labels
        print('Step3: assign pseudo-labels')
        train_dataset = cluster_assign(deepcluster.images_lists, guava_dataset.imgs, tra) 

        # Step4: uniformly sample for each target
        print('Step4: uniformly sample for each target')
        sampler = UnifLabelSampler(int(reassign * len(train_dataset)), deepcluster.images_lists)
        train_dataloader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            num_workers=workers,
            sampler=sampler,
            pin_memory=True,
        )
        # Step5: set last fully connected layer
        print('Step5: set last fully connected layer')

        """
        mlp = list(model.classifier.children())
        
        # add another ReLU (don't set inplace)
        
        # add another ReLU
        # Use softmax instead
        mlp.append(nn.ReLU(inplace = False).cuda())
        # print(*mlp) 
        model.classifier = nn.Sequential(*mlp)
    
        # initialize top layer
        """
        
        # print(model)
        # Step6: train network with clusters as pseudo-labels
        print('Step6: train network with clusters as pseudo-labels')

        t1 = time.time()
        loss = train(train_dataloader, model, criterion, optimizer, epoch,  \
                    opt_lr, opt_wd, verbose)
        t2 = time.time()

        # Step7: save running checkpoint
        print('Step7: save running checkpoint')
        torch.save({'epoch': epoch + 1,
                    'arch': arch,
                    'opt': opt,
                    'state_dict': model.state_dict(),
                    'optimizer' : optimizer.state_dict()},
                    os.path.join(mydrive,'My Drive/Project',checkpath, 'checkpoint.pth.tar'))

        if verbose:
            print('############# Epoch [{0}] report ############# \n' \
                'Time 1: {1:.3f} s\n' \
                'Time 1: {1:.3f} s\n' \
                'Clustering loss: {2:.3f} \n' \
                'ConvNet loss: {3:.3f} \n' \
                '############ End epoch [{0}] report ########## \n' \
                .format(epoch, t1 - t0, t2 - t1, clustering_loss, loss))
        

train
Step1: get the features for the whole dataset
Compute features
features: [[-0.03994256 -0.13372263  0.094648   ...  0.02388442 -0.01289483
  -0.05383533]
 [-0.0571321  -0.13122956  0.06550735 ...  0.06614278 -0.05120794
  -0.02831397]
 [-0.04511218 -0.09236339  0.10138722 ...  0.0435321  -0.02122463
  -0.06240815]
 ...
 [-0.03431515 -0.15333347  0.13474458 ...  0.0311146  -0.03363449
  -0.05386437]
 [-0.05994146 -0.102323    0.10542712 ...  0.03597405 -0.03140224
  -0.05271506]
 [-0.03648771 -0.16896342  0.16149065 ...  0.02556401 -0.04393809
  -0.02560278]]
Step2: cluster the features
k-means loss evolution: [92844.484 53105.938 50299.207 49175.42  48578.56  48221.96  47965.348
 47762.008 47623.496 47494.496 47365.777 47317.977 47285.973 47254.34
 47232.637 47170.98  47113.996 47105.492 47099.41  47092.11 ]
k-means time: 170 s
Step3: assign pseudo-labels
Step4: uniformly sample for each target
Step5: set last fully connected layer
Step6: train network with clusters as pseudo-lab

	cuda(torch.device device, bool async, *, torch.memory_format memory_format)
Consider using one of the following signatures instead:
	cuda(torch.device device, bool non_blocking, *, torch.memory_format memory_format)


out:
tensor([[-1.4516e+00,  7.6221e-01,  9.6431e-02,  ..., -3.1564e-01,
         -1.4923e-01,  3.1370e-01],
        [-3.9962e-01,  4.7326e-01, -5.5234e-01,  ..., -3.8424e-02,
         -4.7541e-01,  7.9814e-04],
        [-5.6268e-01,  1.7716e-01, -1.1597e-01,  ..., -2.1714e-02,
         -1.4529e-02,  4.3380e-01],
        ...,
        [-5.4041e-01,  4.7974e-01, -4.9645e-01,  ..., -1.7364e-02,
         -2.0172e-02,  1.4704e-01],
        [-7.7453e-01,  1.8234e-01, -8.2028e-01,  ...,  2.1164e-01,
         -3.0997e-01,  3.6238e-01],
        [-1.6297e-01,  5.4211e-01, -1.8733e-01,  ...,  2.3546e-01,
         -1.3022e+00, -1.8437e-01]], device='cuda:0', grad_fn=<AddmmBackward>)
torch.Size([32, 131])
cluster_var:
tensor([ 24, 107,  24,  45, 120, 104, 120, 107,  39,  62,  44, 120,  90, 120,
         98,  99,  16,  82,  33, 130, 108,  47, 120, 129, 117,  10, 100,  59,
         93,  85, 113,  39], device='cuda:0')
torch.Size([32])
Progress report:	Epoch: [1][0/211538]	Time: 5.137 (avg: 5.137)	Data

# Test

In [0]:
if mode == 'test':
    test_check = os.path.join(test_path)
    print(test_check)
    assert os.path.isdir(test_check) is True, 'No test set detected.'

    print('Testing')
    model.eval()
    test_dataset = datasets.ImageFolder(os.path.join(test_path),
                                        transform=tra)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=24, 
        pin_memory=True,
    )
    print(len(test_dataset))
    print(len(test_loader))

   
    hit = 0

    for i, (input_tensor, ground_truth) in enumerate(test_loader):
        with torch.no_grad():
            predict = model(input_).data.cpu().numpy()
        # remove batch
        predict = np.squeeze(predict)
        result = np.where(predict == np.amax(predict))

        ground_truth =  np.squeeze(ground_truth)
        result =  np.squeeze(result)
        print('predict', predict)
        print('predict', result)
        print('ground_truth', ground_truth)
        if predict == ground_truth:
            hit = hit + 1
            print('hit')
        else:
            print('miss')
    print("acc:", hit/len(test_loader) )
