In [1]:
#pip3 install google.colab
#from google.colab import drive
#drive.mount('/content/drive', force_remount=True)

###  Libraries

In [2]:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.colors as mcolors

import torch

import os
import sys

from Autoencoder import Autoencoder, CD_Autoencoder
from Datasets.Datasets_Functions import *
from Visualization import Visualization
from Evaluations.Evaluation import Evaluator
from General_Functions import General_Functions

### Global Variables

In [3]:
path_to_module = './' # 'C:\\Users\\PAPASOFT INC\\Desktop\\SOFT_SIL'
sys.path.append(path_to_module)
os.environ['OMP_NUM_THREADS'] = '6'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

### Visualization

In [4]:
visualization = Visualization()

### ***LOAD DATASET***

In [5]:
hashmap_path = path_to_module+"Datasets/"
hashmap = get_hashmap(hashmap_path)
print(hashmap.keys())
dataset_name = 'emnist_balanced_digits'
dataset_properties = hashmap[dataset_name]
print(dataset_properties)
batch_size = dataset_properties['batch_size'] = 1024
n_clusters = dataset_properties['n_clusters'] = 10
dataloader, input_dim, data_np, labels = function_get_dataset(dataset_name, dataset_properties)
print('Data_Shape is:', input_dim)
print('Batches Number is:', len(dataloader))

dict_keys(['tcga', 'emnist_balanced_letters', 'emnist_mnist', 'emnist_balanced_digits', 'dermatology', 'ecoil', 'iris', 'rings', 'wine', 'australian', 'moons', 'squeezed_gauss', 'gauss_densities', 'pendigits', 'fashionmnist', '3dspheres', '20_newsgroups', 'coil20', 'cifar10', 'stl10', 'r15', 'r3'])
{'batch_size': 256, 'n_clusters': 10, 'module_name': 'Datasets.Datasets'}
Data_Shape is: 1
Batches Number is: 28


### Parameters

In [6]:
# Latent Dimension, Number of Channels and Negative Slope
latent_dim = 10
n_channels = 1
n_clusters = dataset_properties['n_clusters']

# Batch Size and Number of Clusters
batch_size = dataset_properties['batch_size']
negative_slope = 0

# Pre-Training Epochs and Learning Rate
n_pret_epochs = 100
pret_lr = 1e-3

# Lamdas, Training Epochs and Learning Rate
n_epochs = 100
lr = 1e-4
sil_lambda = 0.01
entr_lambda = 0.01

kmeans_initialization = True
pretrain = False
is_MLP_AE = False

### Create Autoencoder Model

In [7]:
if is_MLP_AE:
    autoencoder = Autoencoder(device=device, n_clusters=n_clusters, input_dim=input_dim, latent_dim=latent_dim, negative_slope=negative_slope)
    autoencoder.set_general_training_variables(dataloader=dataloader, batch_size=batch_size)
    autoencoder.set_pretraining_variables(n_pret_epochs=n_pret_epochs, pret_lr=pret_lr)
    autoencoder.set_training_variables(n_epochs=n_epochs, lr=lr, sil_lambda=sil_lambda, entr_lambda=entr_lambda)
    autoencoder.set_path_variables(path_to_module=path_to_module, dataset_name=dataset_name)
    autoencoder.set_path()
else:
    autoencoder = CD_Autoencoder(device=device, n_clusters=n_clusters, input_dim=input_dim, latent_dim=latent_dim, negative_slope=negative_slope, n_channels=n_channels)
    autoencoder.set_general_training_variables(dataloader=dataloader, batch_size=batch_size)
    autoencoder.set_pretraining_variables(n_pret_epochs=n_pret_epochs, pret_lr=pret_lr)
    autoencoder.set_training_variables(n_epochs=n_epochs, lr=lr, sil_lambda=sil_lambda, entr_lambda=entr_lambda)
    autoencoder.set_path_variables(path_to_module=path_to_module, dataset_name=dataset_name)
    autoencoder.set_path()
autoencoder = autoencoder.to(device)

In [8]:
autoencoder

CD_Autoencoder(
  (encoder_model): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): LeakyReLU(negative_slope=0, inplace=True)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (4): LeakyReLU(negative_slope=0, inplace=True)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
    (7): LeakyReLU(negative_slope=0, inplace=True)
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Linear(in_features=1152, out_features=10, bias=True)
    (11): Tanh()
    (12): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (cluster_model): Sequential(
    (0): Linear(in_features=10, out_features=10, bias=False)
  )
  (decoder_

### Pretrain Autoencoder Model

In [9]:
if(pretrain):
    autoencoder.pretrain_autoencoder()
    autoencoder.save_pretrained_weights()
else:
    model_save_path = autoencoder.data_dir_path + "/Weigths/autoencoder_weights.pth"
    autoencoder.load_state_dict(torch.load(model_save_path))

In [10]:
# Initialize the clustering layer using k-means
if kmeans_initialization: autoencoder.kmeans_initialization(100)

ACC: 0.78 PUR: 0.79 NMI: 0.72 ARI: 0.65


In [11]:
"""
while True:
    # Code block to execute repeatedly
    autoencoder.trEpochs = 10
    autoencoder.train_autoencoder(silhouette_method = "default")
    visualization.makeExcel()
    #visualization.makePlot(useTSNE=False)
    #visualization.makePlot(useTSNE=True)
    visualization.plot_3D()

    # Check the condition to exit the loop
    if autoencoder.experimentName == "Experiment_400":
        break
"""
autoencoder.set_training_variables(n_epochs=200, lr=lr, sil_lambda=0.01, entr_lambda=0.01)

latent_data, labels, clustering = autoencoder.train_autoencoder()

cluster_centers = autoencoder.get_cluster_centers().cpu().detach().numpy()

Ep: 0 Rec L: 0.2972 Cl L: 0.2577 Entropy: 0.2301 SSil: 2.2332 SIL: 0.0000 ACC: 0.79 PUR: 0.79 NMI: 0.73 ARI: 0.66
Ep: 1 Rec L: 0.3012 Cl L: 0.2378 Entropy: 0.2055 SSil: 4.2205 SIL: 0.0000 ACC: 0.80 PUR: 0.80 NMI: 0.75 ARI: 0.68
Ep: 2 Rec L: 0.3045 Cl L: 0.2302 Entropy: 0.1913 SSil: 4.9805 SIL: 0.0000 ACC: 0.80 PUR: 0.81 NMI: 0.76 ARI: 0.69
Ep: 3 Rec L: 0.3098 Cl L: 0.2231 Entropy: 0.1813 SSil: 5.6852 SIL: 0.0000 ACC: 0.81 PUR: 0.81 NMI: 0.76 ARI: 0.70
Ep: 4 Rec L: 0.3108 Cl L: 0.2186 Entropy: 0.1726 SSil: 6.1369 SIL: 0.0000 ACC: 0.81 PUR: 0.82 NMI: 0.77 ARI: 0.71
Ep: 5 Rec L: 0.3122 Cl L: 0.2140 Entropy: 0.1656 SSil: 6.5993 SIL: 0.0000 ACC: 0.81 PUR: 0.82 NMI: 0.77 ARI: 0.71
Ep: 6 Rec L: 0.3129 Cl L: 0.2103 Entropy: 0.1595 SSil: 6.9712 SIL: 0.0000 ACC: 0.81 PUR: 0.82 NMI: 0.77 ARI: 0.71
Ep: 7 Rec L: 0.3189 Cl L: 0.2073 Entropy: 0.1543 SSil: 7.2745 SIL: 0.0000 ACC: 0.81 PUR: 0.82 NMI: 0.78 ARI: 0.71
Ep: 8 Rec L: 0.3157 Cl L: 0.2034 Entropy: 0.1489 SSil: 7.6614 SIL: 0.0000 ACC: 0.82 PUR:

Ep: 71 Rec L: 0.3183 Cl L: 0.1445 Entropy: 0.0517 SSil: 13.5538 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.76
Ep: 72 Rec L: 0.3193 Cl L: 0.1436 Entropy: 0.0512 SSil: 13.6413 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.76
Ep: 73 Rec L: 0.3156 Cl L: 0.1424 Entropy: 0.0506 SSil: 13.7560 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.76
Ep: 74 Rec L: 0.3176 Cl L: 0.1420 Entropy: 0.0499 SSil: 13.8010 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.77
Ep: 75 Rec L: 0.3166 Cl L: 0.1422 Entropy: 0.0495 SSil: 13.7819 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.76
Ep: 76 Rec L: 0.3150 Cl L: 0.1413 Entropy: 0.0490 SSil: 13.8685 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.76
Ep: 77 Rec L: 0.3136 Cl L: 0.1392 Entropy: 0.0481 SSil: 14.0751 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.77
Ep: 78 Rec L: 0.3155 Cl L: 0.1396 Entropy: 0.0477 SSil: 14.0411 SIL: 0.0000 ACC: 0.83 PUR: 0.84 NMI: 0.82 ARI: 0.77
Ep: 79 Rec L: 0.3139 Cl L: 0.1387 Entropy: 0.0471 SSil: 14.1260 SIL: 0.0

KeyboardInterrupt: 

In [None]:
General_Functions().save_excel(autoencoder.data_dir_path, autoencoder.df_eval)

In [None]:
visualization.plot(latent_data, labels, clustering, cluster_centers, autoencoder.data_dir_path)

In [None]:
visualization.plot_tsne(latent_data, labels, clustering, cluster_centers, autoencoder.data_dir_path)

In [None]:
cluster_centers

In [None]:
clustering[0:100]

In [None]:
latent_data