In [3]:
from __future__ import print_function, division



import argparse
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
import os
import math
import fnmatch
import nets
import utils
import training_functions
from torch.utils.tensorboard import SummaryWriter

In [6]:

# Translate string entries to bool for parser
def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

parser = argparse.ArgumentParser(description='Use DCEC for clustering')
parser.add_argument('--mode', default='train_full', choices=['train_full', 'pretrain'], help='mode')
parser.add_argument('--tensorboard', default=True, type=bool, help='export training stats to tensorboard')
parser.add_argument('--pretrain', default=True, type=str2bool, help='perform autoencoder pretraining')
parser.add_argument('--pretrained_net', default=1, help='index or path of pretrained net')
parser.add_argument('--net_architecture', default='CAE_3', choices=['CAE_3', 'CAE_bn3', 'CAE_4', 'CAE_bn4', 'CAE_5', 'CAE_bn5'], help='network architecture used')
parser.add_argument('--dataset', default='MNIST-train',
                    choices=['MNIST-train', 'custom', 'MNIST-test', 'MNIST-full'],
                    help='custom or prepared dataset')
parser.add_argument('--dataset_path', default='data', help='path to dataset')
parser.add_argument('--batch_size', default=256, type=int, help='batch size')
parser.add_argument('--rate', default=0.001, type=float, help='learning rate for clustering')
parser.add_argument('--rate_pretrain', default=0.001, type=float, help='learning rate for pretraining')
parser.add_argument('--weight', default=0.0, type=float, help='weight decay for clustering')
parser.add_argument('--weight_pretrain', default=0.0, type=float, help='weight decay for clustering')
parser.add_argument('--sched_step', default=200, type=int, help='scheduler steps for rate update')
parser.add_argument('--sched_step_pretrain', default=200, type=int,
                    help='scheduler steps for rate update - pretrain')
parser.add_argument('--sched_gamma', default=0.1, type=float, help='scheduler gamma for rate update')
parser.add_argument('--sched_gamma_pretrain', default=0.1, type=float,
                    help='scheduler gamma for rate update - pretrain')
parser.add_argument('--epochs', default=1000, type=int, help='clustering epochs')
parser.add_argument('--epochs_pretrain', default=20, type=int, help='pretraining epochs')
parser.add_argument('--printing_frequency', default=10, type=int, help='training stats printing frequency')
parser.add_argument('--gamma', default=0.1, type=float, help='clustering loss weight')
parser.add_argument('--update_interval', default=80, type=int, help='update interval for target distribution')
parser.add_argument('--tol', default=1e-2, type=float, help='stop criterium tolerance')
parser.add_argument('--num_clusters', default=10, type=int, help='number of clusters')
parser.add_argument('--custom_img_size', default=[128, 128, 3], nargs=3, type=int, help='size of custom images')
parser.add_argument('--leaky', default=True, type=str2bool)
parser.add_argument('--neg_slope', default=0.01, type=float)
parser.add_argument('--activations', default=False, type=str2bool)
parser.add_argument('--bias', default=True, type=str2bool)
args = parser.parse_args("")
print(args)

Namespace(activations=False, batch_size=256, bias=True, custom_img_size=[128, 128, 3], dataset='MNIST-train', dataset_path='data', epochs=1000, epochs_pretrain=20, gamma=0.1, leaky=True, mode='train_full', neg_slope=0.01, net_architecture='CAE_3', num_clusters=10, pretrain=True, pretrained_net=1, printing_frequency=10, rate=0.001, rate_pretrain=0.001, sched_gamma=0.1, sched_gamma_pretrain=0.1, sched_step=200, sched_step_pretrain=200, tensorboard=True, tol=0.01, update_interval=80, weight=0.0, weight_pretrain=0.0)


In [None]:
def my_loss(output):
    # Nu: # of unlabeled data
    # C: number of clusters we have so far
    # 1/Nu * sum
    # Output are clustering scores for unlabeled data
    Nu = len(output)
    loss = Nu * torch.sum(output*torch.log(output))
    return loss

In [9]:
if args.mode == 'pretrain' and not args.pretrain:
        print("Nothing to do :(")
        exit()

board = args.tensorboard

# Deal with pretraining option and way of showing network path
pretrain = args.pretrain
net_is_path = True
if not pretrain:
    try:
        int(args.pretrained_net)
        idx = args.pretrained_net
        net_is_path = False
    except:
        pass
params = {'pretrain': pretrain}

# Directories
# Create directories structure
dirs = ['runs', 'reports', 'nets']
list(map(lambda x: os.makedirs(x, exist_ok=True), dirs))

# Net architecture
model_name = args.net_architecture
# Indexing (for automated reports saving) - allows to run many trainings and get all the reports collected
if pretrain or (not pretrain and net_is_path):
    reports_list = sorted(os.listdir('reports'), reverse=True)
    if reports_list:
        for file in reports_list:
            # print(file)
            if fnmatch.fnmatch(file, model_name + '*'):
                idx = int(str(file)[-7:-4]) + 1
                break
    try:
        idx
    except NameError:
        idx = 1

# Base filename
name = model_name + '_' + str(idx).zfill(3)

# Filenames for report and weights
name_txt = name + '.txt'
name_net = name
pretrained = name + '_pretrained.pt'

# Arrange filenames for report, network weights, pretrained network weights
name_txt = os.path.join('reports', name_txt)
name_net = os.path.join('nets', name_net)
if net_is_path and not pretrain:
    pretrained = args.pretrained_net
else:
    pretrained = os.path.join('nets', pretrained)
if not pretrain and not os.path.isfile(pretrained):
    print("No pretrained weights, try again choosing pretrained network or create new with pretrain=True")

model_files = [name_net, pretrained]
params['model_files'] = model_files

# Open file
if pretrain:
    f = open(name_txt, 'w')
else:
    f = open(name_txt, 'a')
params['txt_file'] = f

# Delete tensorboard entry if exist (not to overlap as the charts become unreadable)
try:
    os.system("rm -rf runs/" + name)
except:
    pass

# Initialize tensorboard writer
if board:
    writer = SummaryWriter('runs/' + name)
    params['writer'] = writer
else:
    params['writer'] = None

# Hyperparameters

# Used dataset
dataset = args.dataset

# Batch size
batch = args.batch_size
params['batch'] = batch
# Number of workers (typically 4*num_of_GPUs)
workers = 4
# Learning rate
rate = args.rate
rate_pretrain = args.rate_pretrain
# Adam params
# Weight decay
weight = args.weight
weight_pretrain = args.weight_pretrain
# Scheduler steps for rate update
sched_step = args.sched_step
sched_step_pretrain = args.sched_step_pretrain
# Scheduler gamma - multiplier for learning rate
sched_gamma = args.sched_gamma
sched_gamma_pretrain = args.sched_gamma_pretrain

# Number of epochs
epochs = args.epochs
pretrain_epochs = args.epochs_pretrain
params['pretrain_epochs'] = pretrain_epochs

# Printing frequency
print_freq = args.printing_frequency
params['print_freq'] = print_freq

# Clustering loss weight:
gamma = args.gamma
params['gamma'] = gamma

# Update interval for target distribution:
update_interval = args.update_interval
params['update_interval'] = update_interval

# Tolerance for label changes:
tol = args.tol
params['tol'] = tol

# Number of clusters
num_clusters = args.num_clusters

# Report for settings
tmp = "Training the '" + model_name + "' architecture"
utils.print_both(f, tmp)
tmp = "\n" + "The following parameters are used:"
utils.print_both(f, tmp)
tmp = "Batch size:\t" + str(batch)
utils.print_both(f, tmp)
tmp = "Number of workers:\t" + str(workers)
utils.print_both(f, tmp)
tmp = "Learning rate:\t" + str(rate)
utils.print_both(f, tmp)
tmp = "Pretraining learning rate:\t" + str(rate_pretrain)
utils.print_both(f, tmp)
tmp = "Weight decay:\t" + str(weight)
utils.print_both(f, tmp)
tmp = "Pretraining weight decay:\t" + str(weight_pretrain)
utils.print_both(f, tmp)
tmp = "Scheduler steps:\t" + str(sched_step)
utils.print_both(f, tmp)
tmp = "Scheduler gamma:\t" + str(sched_gamma)
utils.print_both(f, tmp)
tmp = "Pretraining scheduler steps:\t" + str(sched_step_pretrain)
utils.print_both(f, tmp)
tmp = "Pretraining scheduler gamma:\t" + str(sched_gamma_pretrain)
utils.print_both(f, tmp)
tmp = "Number of epochs of training:\t" + str(epochs)
utils.print_both(f, tmp)
tmp = "Number of epochs of pretraining:\t" + str(pretrain_epochs)
utils.print_both(f, tmp)
tmp = "Clustering loss weight:\t" + str(gamma)
utils.print_both(f, tmp)
tmp = "Update interval for target distribution:\t" + str(update_interval)
utils.print_both(f, tmp)
tmp = "Stop criterium tolerance:\t" + str(tol)
utils.print_both(f, tmp)
tmp = "Number of clusters:\t" + str(num_clusters)
utils.print_both(f, tmp)
tmp = "Leaky relu:\t" + str(args.leaky)
utils.print_both(f, tmp)
tmp = "Leaky slope:\t" + str(args.neg_slope)
utils.print_both(f, tmp)
tmp = "Activations:\t" + str(args.activations)
utils.print_both(f, tmp)
tmp = "Bias:\t" + str(args.bias)
utils.print_both(f, tmp)

# Data preparation
if dataset == 'MNIST-train':
    # Uses slightly modified torchvision MNIST class
    import mnist
    tmp = "\nData preparation\nReading data from: MNIST train dataset"
    utils.print_both(f, tmp)
    img_size = [28, 28, 1]
    tmp = "Image size used:\t{0}x{1}".format(img_size[0], img_size[1])
    utils.print_both(f, tmp)

    dataset = mnist.MNIST('../data', train=True, download=True,
                          transform=transforms.Compose([
                                                       transforms.ToTensor(),
                                                       # transforms.Normalize((0.1307,), (0.3081,))
                                                       ]))

    dataloader = torch.utils.data.DataLoader(dataset,
        batch_size=batch, shuffle=False, num_workers=workers)

    dataset_size = len(dataset)
    tmp = "Training set size:\t" + str(dataset_size)
    utils.print_both(f, tmp)

elif dataset == 'MNIST-test':
    import mnist
    tmp = "\nData preparation\nReading data from: MNIST test dataset"
    utils.print_both(f, tmp)
    img_size = [28, 28, 1]
    tmp = "Image size used:\t{0}x{1}".format(img_size[0], img_size[1])
    utils.print_both(f, tmp)

    dataset = mnist.MNIST('../data', train=False, download=True,
                          transform=transforms.Compose([
                              transforms.ToTensor(),
                              # transforms.Normalize((0.1307,), (0.3081,))
                          ]))

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch, shuffle=False, num_workers=workers)

    dataset_size = len(dataset)
    tmp = "Training set size:\t" + str(dataset_size)
    utils.print_both(f, tmp)

elif dataset == 'MNIST-full':
    import mnist
    tmp = "\nData preparation\nReading data from: MNIST full dataset"
    utils.print_both(f, tmp)
    img_size = [28, 28, 1]
    tmp = "Image size used:\t{0}x{1}".format(img_size[0], img_size[1])
    utils.print_both(f, tmp)

    dataset = mnist.MNIST('../data', full=True, download=True,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               # transforms.Normalize((0.1307,), (0.3081,))
                           ]))

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch, shuffle=False, num_workers=workers)

    dataset_size = len(dataset)
    tmp = "Training set size:\t" + str(dataset_size)
    utils.print_both(f, tmp)

else:
    # Data folder
    data_dir = args.dataset_path
    tmp = "\nData preparation\nReading data from:\t./" + data_dir
    utils.print_both(f, tmp)

    # Image size
    custom_size = math.nan
    custom_size = args.custom_img_size
    if isinstance(custom_size, list):
        img_size = custom_size

    tmp = "Image size used:\t{0}x{1}".format(img_size[0], img_size[1])
    utils.print_both(f, tmp)

    # Transformations
    data_transforms = transforms.Compose([
            transforms.Resize(img_size[0:2]),
            # transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    # Read data from selected folder and apply transformations
    image_dataset = datasets.ImageFolder(data_dir, data_transforms)
    # Prepare data for network: schuffle and arrange batches
    dataloader = torch.utils.data.DataLoader(image_dataset, batch_size=batch,
                                                  shuffle=False, num_workers=workers)

    # Size of data sets
    dataset_size = len(image_dataset)
    tmp = "Training set size:\t" + str(dataset_size)
    utils.print_both(f, tmp)

params['dataset_size'] = dataset_size

# GPU check
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
tmp = "\nPerforming calculations on:\t" + str(device)
utils.print_both(f, tmp + '\n')
params['device'] = device

# Evaluate the proper model
to_eval = "nets." + model_name + "(img_size, num_clusters=num_clusters, leaky = args.leaky, neg_slope = args.neg_slope)"
model = eval(to_eval)

# Tensorboard model representation
# if board:
#     writer.add_graph(model, torch.autograd.Variable(torch.Tensor(batch, img_size[2], img_size[0], img_size[1])))

model = model.to(device)
# Reconstruction loss
criterion_1 = nn.MSELoss(size_average=True)
# Clustering loss
criterion_2 = nn.KLDivLoss(size_average=False)

criterion_3 = 

criteria = [criterion_1, criterion_2]

optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=rate, weight_decay=weight)

optimizer_pretrain = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=rate_pretrain, weight_decay=weight_pretrain)

optimizers = [optimizer, optimizer_pretrain]

scheduler = lr_scheduler.StepLR(optimizer, step_size=sched_step, gamma=sched_gamma)
scheduler_pretrain = lr_scheduler.StepLR(optimizer_pretrain, step_size=sched_step_pretrain, gamma=sched_gamma_pretrain)

schedulers = [scheduler, scheduler_pretrain]

if args.mode == 'train_full':
    model = training_functions.train_model(model, dataloader, criteria, optimizers, schedulers, epochs, params)
elif args.mode == 'pretrain':
    model = training_functions.pretraining(model, dataloader, criteria[0], optimizers[1], schedulers[1], epochs, params)

# Save final model
torch.save(model.state_dict(), name_net + '.pt')

# Close files
f.close()
if board:
    writer.close()

Training the 'CAE_3' architecture

The following parameters are used:
Batch size:	256
Number of workers:	4
Learning rate:	0.001
Pretraining learning rate:	0.001
Weight decay:	0.0
Pretraining weight decay:	0.0
Scheduler steps:	200
Scheduler gamma:	0.1
Pretraining scheduler steps:	200
Pretraining scheduler gamma:	0.1
Number of epochs of training:	1000
Number of epochs of pretraining:	20
Clustering loss weight:	0.1
Update interval for target distribution:	80
Stop criterium tolerance:	0.01
Number of clusters:	10
Leaky relu:	True
Leaky slope:	0.01
Activations:	False
Bias:	True

Data preparation
Reading data from: MNIST train dataset
Image size used:	28x28
Training set size:	60000

Performing calculations on:	cuda:0

Pretraining:	Epoch 1/20
----------




Pretraining:	Epoch: [1][10/235]	Loss 0.0858 (0.1020)	
Pretraining:	Epoch: [1][20/235]	Loss 0.0753 (0.0901)	
Pretraining:	Epoch: [1][30/235]	Loss 0.0630 (0.0832)	
Pretraining:	Epoch: [1][40/235]	Loss 0.0675 (0.0794)	
Pretraining:	Epoch: [1][50/235]	Loss 0.0667 (0.0767)	
Pretraining:	Epoch: [1][60/235]	Loss 0.0609 (0.0748)	
Pretraining:	Epoch: [1][70/235]	Loss 0.0575 (0.0729)	
Pretraining:	Epoch: [1][80/235]	Loss 0.0590 (0.0715)	
Pretraining:	Epoch: [1][90/235]	Loss 0.0574 (0.0700)	
Pretraining:	Epoch: [1][100/235]	Loss 0.0507 (0.0682)	
Pretraining:	Epoch: [1][110/235]	Loss 0.0443 (0.0663)	
Pretraining:	Epoch: [1][120/235]	Loss 0.0413 (0.0645)	
Pretraining:	Epoch: [1][130/235]	Loss 0.0405 (0.0627)	
Pretraining:	Epoch: [1][140/235]	Loss 0.0365 (0.0609)	
Pretraining:	Epoch: [1][150/235]	Loss 0.0356 (0.0593)	
Pretraining:	Epoch: [1][160/235]	Loss 0.0321 (0.0577)	
Pretraining:	Epoch: [1][170/235]	Loss 0.0306 (0.0562)	
Pretraining:	Epoch: [1][180/235]	Loss 0.0302 (0.0548)	
Pretraining:	Epoch:

Pretraining:	Epoch: [7][90/235]	Loss 0.0149 (0.0148)	
Pretraining:	Epoch: [7][100/235]	Loss 0.0142 (0.0148)	
Pretraining:	Epoch: [7][110/235]	Loss 0.0139 (0.0148)	
Pretraining:	Epoch: [7][120/235]	Loss 0.0145 (0.0148)	
Pretraining:	Epoch: [7][130/235]	Loss 0.0145 (0.0148)	
Pretraining:	Epoch: [7][140/235]	Loss 0.0139 (0.0148)	
Pretraining:	Epoch: [7][150/235]	Loss 0.0159 (0.0148)	
Pretraining:	Epoch: [7][160/235]	Loss 0.0134 (0.0148)	
Pretraining:	Epoch: [7][170/235]	Loss 0.0140 (0.0148)	
Pretraining:	Epoch: [7][180/235]	Loss 0.0154 (0.0148)	
Pretraining:	Epoch: [7][190/235]	Loss 0.0152 (0.0148)	
Pretraining:	Epoch: [7][200/235]	Loss 0.0136 (0.0148)	
Pretraining:	Epoch: [7][210/235]	Loss 0.0146 (0.0148)	
Pretraining:	Epoch: [7][220/235]	Loss 0.0145 (0.0148)	
Pretraining:	Epoch: [7][230/235]	Loss 0.0142 (0.0147)	
Pretraining:	 Loss: 0.0147

Pretraining:	Epoch 8/20
----------
Pretraining:	Epoch: [8][10/235]	Loss 0.0134 (0.0141)	
Pretraining:	Epoch: [8][20/235]	Loss 0.0147 (0.0142)	
Pretr

Pretraining:	Epoch: [13][130/235]	Loss 0.0125 (0.0128)	
Pretraining:	Epoch: [13][140/235]	Loss 0.0120 (0.0127)	
Pretraining:	Epoch: [13][150/235]	Loss 0.0140 (0.0128)	
Pretraining:	Epoch: [13][160/235]	Loss 0.0117 (0.0128)	
Pretraining:	Epoch: [13][170/235]	Loss 0.0121 (0.0128)	
Pretraining:	Epoch: [13][180/235]	Loss 0.0135 (0.0128)	
Pretraining:	Epoch: [13][190/235]	Loss 0.0134 (0.0128)	
Pretraining:	Epoch: [13][200/235]	Loss 0.0117 (0.0128)	
Pretraining:	Epoch: [13][210/235]	Loss 0.0128 (0.0128)	
Pretraining:	Epoch: [13][220/235]	Loss 0.0127 (0.0128)	
Pretraining:	Epoch: [13][230/235]	Loss 0.0124 (0.0128)	
Pretraining:	 Loss: 0.0128

Pretraining:	Epoch 14/20
----------
Pretraining:	Epoch: [14][10/235]	Loss 0.0118 (0.0123)	
Pretraining:	Epoch: [14][20/235]	Loss 0.0127 (0.0124)	
Pretraining:	Epoch: [14][30/235]	Loss 0.0120 (0.0124)	
Pretraining:	Epoch: [14][40/235]	Loss 0.0126 (0.0125)	
Pretraining:	Epoch: [14][50/235]	Loss 0.0136 (0.0125)	
Pretraining:	Epoch: [14][60/235]	Loss 0.0129 

Pretraining:	Epoch: [19][180/235]	Loss 0.0124 (0.0117)	
Pretraining:	Epoch: [19][190/235]	Loss 0.0122 (0.0117)	
Pretraining:	Epoch: [19][200/235]	Loss 0.0108 (0.0118)	
Pretraining:	Epoch: [19][210/235]	Loss 0.0118 (0.0118)	
Pretraining:	Epoch: [19][220/235]	Loss 0.0118 (0.0117)	
Pretraining:	Epoch: [19][230/235]	Loss 0.0115 (0.0117)	
Pretraining:	 Loss: 0.0117

Pretraining:	Epoch 20/20
----------
Pretraining:	Epoch: [20][10/235]	Loss 0.0107 (0.0113)	
Pretraining:	Epoch: [20][20/235]	Loss 0.0116 (0.0114)	
Pretraining:	Epoch: [20][30/235]	Loss 0.0109 (0.0114)	
Pretraining:	Epoch: [20][40/235]	Loss 0.0116 (0.0115)	
Pretraining:	Epoch: [20][50/235]	Loss 0.0124 (0.0114)	
Pretraining:	Epoch: [20][60/235]	Loss 0.0117 (0.0115)	
Pretraining:	Epoch: [20][70/235]	Loss 0.0113 (0.0115)	
Pretraining:	Epoch: [20][80/235]	Loss 0.0103 (0.0115)	
Pretraining:	Epoch: [20][90/235]	Loss 0.0118 (0.0115)	
Pretraining:	Epoch: [20][100/235]	Loss 0.0110 (0.0115)	
Pretraining:	Epoch: [20][110/235]	Loss 0.0106 (0.



NMI: 0.74255	ARI: 0.68582	Acc 0.80833

Epoch 1/1000
----------
Epoch: [1][10/235]	Loss 0.0266 (0.0303)	Loss_recovery 0.0139 (0.0150)	Loss clustering 0.0127 (0.0152)	
Epoch: [1][20/235]	Loss 0.0257 (0.0285)	Loss_recovery 0.0138 (0.0145)	Loss clustering 0.0119 (0.0139)	
Epoch: [1][30/235]	Loss 0.0242 (0.0271)	Loss_recovery 0.0128 (0.0141)	Loss clustering 0.0114 (0.0130)	
Epoch: [1][40/235]	Loss 0.0225 (0.0262)	Loss_recovery 0.0134 (0.0140)	Loss clustering 0.0091 (0.0122)	
Epoch: [1][50/235]	Loss 0.0230 (0.0255)	Loss_recovery 0.0142 (0.0138)	Loss clustering 0.0089 (0.0116)	
Epoch: [1][60/235]	Loss 0.0229 (0.0249)	Loss_recovery 0.0135 (0.0138)	Loss clustering 0.0094 (0.0111)	
Epoch: [1][70/235]	Loss 0.0207 (0.0244)	Loss_recovery 0.0128 (0.0137)	Loss clustering 0.0080 (0.0107)	
Epoch: [1][80/235]	Loss 0.0204 (0.0240)	Loss_recovery 0.0122 (0.0136)	Loss clustering 0.0083 (0.0104)	

Updating target distribution:




NMI: 0.76430	ARI: 0.70587	Acc 0.81810	
Epoch: [1][90/235]	Loss 0.0397 (0.0257)	Loss_recovery 0.0145 (0.0137)	Loss clustering 0.0252 (0.0121)	
Epoch: [1][100/235]	Loss 0.0378 (0.0269)	Loss_recovery 0.0146 (0.0138)	Loss clustering 0.0232 (0.0131)	
Epoch: [1][110/235]	Loss 0.0383 (0.0279)	Loss_recovery 0.0145 (0.0140)	Loss clustering 0.0239 (0.0139)	
Epoch: [1][120/235]	Loss 0.0339 (0.0285)	Loss_recovery 0.0154 (0.0141)	Loss clustering 0.0185 (0.0144)	
Epoch: [1][130/235]	Loss 0.0345 (0.0290)	Loss_recovery 0.0154 (0.0142)	Loss clustering 0.0191 (0.0148)	
Epoch: [1][140/235]	Loss 0.0341 (0.0294)	Loss_recovery 0.0149 (0.0143)	Loss clustering 0.0192 (0.0151)	
Epoch: [1][150/235]	Loss 0.0360 (0.0299)	Loss_recovery 0.0171 (0.0144)	Loss clustering 0.0189 (0.0154)	
Epoch: [1][160/235]	Loss 0.0348 (0.0302)	Loss_recovery 0.0148 (0.0145)	Loss clustering 0.0201 (0.0156)	

Updating target distribution:




NMI: 0.78369	ARI: 0.72530	Acc 0.82678	
Epoch: [1][170/235]	Loss 0.0452 (0.0310)	Loss_recovery 0.0160 (0.0146)	Loss clustering 0.0292 (0.0164)	
Epoch: [1][180/235]	Loss 0.0430 (0.0317)	Loss_recovery 0.0170 (0.0147)	Loss clustering 0.0260 (0.0170)	
Epoch: [1][190/235]	Loss 0.0438 (0.0324)	Loss_recovery 0.0170 (0.0148)	Loss clustering 0.0268 (0.0175)	
Epoch: [1][200/235]	Loss 0.0421 (0.0329)	Loss_recovery 0.0155 (0.0150)	Loss clustering 0.0267 (0.0180)	
Epoch: [1][210/235]	Loss 0.0427 (0.0334)	Loss_recovery 0.0167 (0.0150)	Loss clustering 0.0260 (0.0183)	
Epoch: [1][220/235]	Loss 0.0422 (0.0338)	Loss_recovery 0.0165 (0.0151)	Loss clustering 0.0256 (0.0187)	
Epoch: [1][230/235]	Loss 0.0423 (0.0342)	Loss_recovery 0.0167 (0.0152)	Loss clustering 0.0256 (0.0190)	
Loss: 0.0343	Loss_recovery: 0.0152	Loss_clustering: 0.0191

Epoch 2/1000
----------

Updating target distribution:




NMI: 0.79038	ARI: 0.73242	Acc 0.82898	
Epoch: [2][10/235]	Loss 0.0459 (0.0465)	Loss_recovery 0.0161 (0.0166)	Loss clustering 0.0297 (0.0299)	
Epoch: [2][20/235]	Loss 0.0454 (0.0463)	Loss_recovery 0.0166 (0.0167)	Loss clustering 0.0288 (0.0297)	
Epoch: [2][30/235]	Loss 0.0455 (0.0460)	Loss_recovery 0.0167 (0.0166)	Loss clustering 0.0289 (0.0294)	
Epoch: [2][40/235]	Loss 0.0455 (0.0459)	Loss_recovery 0.0169 (0.0168)	Loss clustering 0.0287 (0.0292)	
Epoch: [2][50/235]	Loss 0.0470 (0.0457)	Loss_recovery 0.0180 (0.0167)	Loss clustering 0.0290 (0.0290)	
Epoch: [2][60/235]	Loss 0.0454 (0.0455)	Loss_recovery 0.0171 (0.0167)	Loss clustering 0.0283 (0.0287)	
Epoch: [2][70/235]	Loss 0.0428 (0.0453)	Loss_recovery 0.0162 (0.0168)	Loss clustering 0.0266 (0.0285)	
Epoch: [2][80/235]	Loss 0.0429 (0.0451)	Loss_recovery 0.0156 (0.0168)	Loss clustering 0.0273 (0.0284)	

Updating target distribution:




NMI: 0.80054	ARI: 0.74242	Acc 0.83363	
Epoch: [2][90/235]	Loss 0.0491 (0.0455)	Loss_recovery 0.0172 (0.0168)	Loss clustering 0.0319 (0.0287)	
Epoch: [2][100/235]	Loss 0.0458 (0.0456)	Loss_recovery 0.0158 (0.0168)	Loss clustering 0.0301 (0.0288)	
Epoch: [2][110/235]	Loss 0.0462 (0.0457)	Loss_recovery 0.0156 (0.0168)	Loss clustering 0.0306 (0.0289)	
Epoch: [2][120/235]	Loss 0.0453 (0.0458)	Loss_recovery 0.0162 (0.0169)	Loss clustering 0.0291 (0.0289)	
Epoch: [2][130/235]	Loss 0.0456 (0.0458)	Loss_recovery 0.0165 (0.0169)	Loss clustering 0.0291 (0.0289)	
Epoch: [2][140/235]	Loss 0.0449 (0.0458)	Loss_recovery 0.0160 (0.0168)	Loss clustering 0.0289 (0.0290)	
Epoch: [2][150/235]	Loss 0.0469 (0.0459)	Loss_recovery 0.0182 (0.0169)	Loss clustering 0.0287 (0.0290)	
Epoch: [2][160/235]	Loss 0.0461 (0.0459)	Loss_recovery 0.0161 (0.0169)	Loss clustering 0.0299 (0.0290)	

Updating target distribution:
NMI: 0.80646	ARI: 0.74860	Acc 0.83600	
Label divergence 0.009283333333333333< tol 0.01
Reached tole



In [8]:
!pip3 install scikit-learn==0.22.2

Collecting scikit-learn==0.22.2

ERROR: Could not install packages due to an EnvironmentError: [WinError 5] Access is denied: 'c:\\programdata\\anaconda3\\lib\\site-packages\\scikit_learn-0.23.2.dist-info\\COPYING'
Consider using the `--user` option or check the permissions.




  Downloading scikit_learn-0.22.2-cp38-cp38-win_amd64.whl (6.6 MB)
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.23.2
    Uninstalling scikit-learn-0.23.2:
