In [1]:
#pip3 install google.colab
#from google.colab import drive
#drive.mount('/content/drive', force_remount=True)

###  Libraries

In [2]:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.colors as mcolors

import torch

import os
import sys

from Autoencoder import Autoencoder, CD_Autoencoder
from Datasets.Datasets_Functions import *
from Visualization import Visualization
from Evaluations.Evaluation import Evaluator
from General_Functions import General_Functions

### Global Variables

In [3]:
path_to_module = './' # 'C:\\Users\\PAPASOFT INC\\Desktop\\SOFT_SIL'
sys.path.append(path_to_module)
os.environ['OMP_NUM_THREADS'] = '6'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

### Visualization

In [4]:
visualization = Visualization()

### ***LOAD DATASET***

In [5]:
hashmap_path = path_to_module + 'Datasets/'
hashmap = get_hashmap(hashmap_path)
dataset_name = 'emnist_balanced_letters'
update_inner_hashmap([dataset_name,'batch_size'],1024,hashmap_path)
dataset_properties = hashmap[dataset_name]
print(dataset_properties)

dataloader, input_dim, data_np, labels = function_get_dataset(dataset_name, dataset_properties)
print('Data_Shape is:', input_dim)
print('Batches Number is:', len(dataloader))

{'batch_size': 1024, 'n_clusters': 10, 'module_name': 'Datasets.Datasets'}
Data_Shape is: 1
Batches Number is: 28


### Parameters

In [6]:
# Latent Dimension, Number of Channels and Negative Slope
latent_dim = 10
n_channels = 1
n_clusters = dataset_properties['n_clusters']

# Batch Size and Number of Clusters
batch_size = dataset_properties['batch_size']
negative_slope = 0

# Pre-Training Epochs and Learning Rate
n_pret_epochs = 100
pret_lr = 1e-3

# Lamdas, Training Epochs and Learning Rate
n_epochs = 100
lr = 5e-4
sil_lambda = 0.02
entr_lambda = 0.02

kmeans_initialization = True
pretrain = True
is_MLP_AE = False

### Create Autoencoder Model

In [7]:
if is_MLP_AE:
    autoencoder = Autoencoder(device=device, n_clusters=n_clusters, input_dim=input_dim, latent_dim=latent_dim, negative_slope=negative_slope)
    autoencoder.set_general_training_variables(dataloader=dataloader, batch_size=batch_size)
    autoencoder.set_pretraining_variables(n_pret_epochs=n_pret_epochs, pret_lr=pret_lr)
    autoencoder.set_training_variables(n_epochs=n_epochs, lr=lr, sil_lambda=sil_lambda, entr_lambda=entr_lambda)
    autoencoder.set_path_variables(path_to_module=path_to_module, dataset_name=dataset_name)
    autoencoder.set_path()
else:
    autoencoder = CD_Autoencoder(device=device, n_clusters=n_clusters, input_dim=input_dim, latent_dim=latent_dim, negative_slope=negative_slope, n_channels=n_channels)
    autoencoder.set_general_training_variables(dataloader=dataloader, batch_size=batch_size)
    autoencoder.set_pretraining_variables(n_pret_epochs=n_pret_epochs, pret_lr=pret_lr)
    autoencoder.set_training_variables(n_epochs=n_epochs, lr=lr, sil_lambda=sil_lambda, entr_lambda=entr_lambda)
    autoencoder.set_path_variables(path_to_module=path_to_module, dataset_name=dataset_name)
    autoencoder.set_path()

autoencoder = autoencoder.to(device)

In [8]:
autoencoder

CD_Autoencoder(
  (encoder_model): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): LeakyReLU(negative_slope=0, inplace=True)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Conv2d(32, 64, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (4): LeakyReLU(negative_slope=0, inplace=True)
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2))
    (7): LeakyReLU(negative_slope=0, inplace=True)
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Linear(in_features=1152, out_features=10, bias=True)
    (11): Tanh()
    (12): BatchNorm1d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (cluster_model): Sequential(
    (0): RBF()
  )
  (decoder_model): Sequential(
    (0): Linear(in_feature

### Pretrain Autoencoder

In [9]:
if(pretrain):
    autoencoder.pretrain_autoencoder()
    autoencoder.save_pretrained_weights()
else:
    model_save_path = autoencoder.data_dir_path + '/Weigths/autoencoder_weights.pth'
    autoencoder.load_state_dict(torch.load(model_save_path))

Epoch: 0, Loss: 20.638789
Epoch: 1, Loss: 15.330878
Epoch: 2, Loss: 13.348399
Epoch: 3, Loss: 11.967514
Epoch: 4, Loss: 10.824282
Epoch: 5, Loss: 9.824005
Epoch: 6, Loss: 8.943735
Epoch: 7, Loss: 8.150802
Epoch: 8, Loss: 7.432696
Epoch: 9, Loss: 6.772128
Epoch: 10, Loss: 6.170878
Epoch: 11, Loss: 5.608293
Epoch: 12, Loss: 5.101055
Epoch: 13, Loss: 4.635466
Epoch: 14, Loss: 4.211432
Epoch: 15, Loss: 3.826729
Epoch: 16, Loss: 3.477084
Epoch: 17, Loss: 3.149423
Epoch: 18, Loss: 2.843818
Epoch: 19, Loss: 2.578604
Epoch: 20, Loss: 2.328983
Epoch: 21, Loss: 2.114716
Epoch: 22, Loss: 1.918346
Epoch: 23, Loss: 1.741935
Epoch: 24, Loss: 1.584439
Epoch: 25, Loss: 1.436309
Epoch: 26, Loss: 1.311655
Epoch: 27, Loss: 1.199223
Epoch: 28, Loss: 1.103445
Epoch: 29, Loss: 1.016246
Epoch: 30, Loss: 0.939532
Epoch: 31, Loss: 0.865192
Epoch: 32, Loss: 0.808335
Epoch: 33, Loss: 0.753397
Epoch: 34, Loss: 0.712155
Epoch: 35, Loss: 0.670848
Epoch: 36, Loss: 0.634056
Epoch: 37, Loss: 0.603389
Epoch: 38, Loss: 

### Initialize the clustering layer using k-means

In [10]:
n_inits = 100
if kmeans_initialization: autoencoder.kmeans_initialization(n_inits)

ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.53


### Train Autoencoder

In [None]:
autoencoder.set_training_variables(n_epochs=20, lr=5e-4, sil_lambda=0.02, entr_lambda=0.02)
latent_data, labels, clustering = autoencoder.train_autoencoder()
cluster_centers = autoencoder.get_cluster_centers().cpu().detach().numpy()

Ep: 0 Rec L: 0.4449 Cl L: 0.5688 Entropy: 1.8479 SSil: -0.4386 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.62 ARI: 0.52
Ep: 1 Rec L: 0.4006 Cl L: 0.5759 Entropy: 1.8219 SSil: -0.7961 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.53
Ep: 2 Rec L: 0.4006 Cl L: 0.5802 Entropy: 1.8025 SSil: -1.0108 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.53
Ep: 3 Rec L: 0.4075 Cl L: 0.5822 Entropy: 1.7927 SSil: -1.1090 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.52
Ep: 4 Rec L: 0.4047 Cl L: 0.5839 Entropy: 1.7837 SSil: -1.1966 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.52
Ep: 5 Rec L: 0.4077 Cl L: 0.5850 Entropy: 1.7777 SSil: -1.2521 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.52
Ep: 6 Rec L: 0.4068 Cl L: 0.5858 Entropy: 1.7750 SSil: -1.2878 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.52
Ep: 7 Rec L: 0.4094 Cl L: 0.5867 Entropy: 1.7712 SSil: -1.3335 SIL: 0.0000 ACC: 0.70 PUR: 0.70 NMI: 0.63 ARI: 0.51
Ep: 8 Rec L: 0.4081 Cl L: 0.5875 Entropy: 1.7689 SSil: -1.3726 SIL: 0.0000 ACC: 

In [None]:
visualization.plot_tsne(latent_data, labels, clustering, cluster_centers, autoencoder.data_dir_path)

In [None]:
visualization.plot(latent_data, labels, clustering, cluster_centers, autoencoder.data_dir_path)

In [None]:
General_Functions().save_excel(autoencoder.data_dir_path, autoencoder.df_eval)