In [1]:
#pip3 install google.colab
#from google.colab import drive
#drive.mount('/content/drive', force_remount=True)

###  Libraries

In [2]:
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.colors as mcolors

import torch

import os
import sys

from MLP import MLP
from Datasets_Functions import *
from Visualization import Visualization
from Evaluations.Evaluation import Evaluator

### Global Variables

In [3]:
path_to_module = './' # 'C:\\Users\\PAPASOFT INC\\Desktop\\SOFT_SIL'
sys.path.append(path_to_module)
os.environ['OMP_NUM_THREADS'] = '6'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(device)

### Load datasets

In [4]:
hashmap = get_hashmap(path_to_module)
print(hashmap.keys())
dataset_name = "R3"
dataset_properties = hashmap[dataset_name]
print(dataset_properties)
batch_size = dataset_properties['batch_size'] #= 30
n_clusters = dataset_properties['n_clusters'] #= 3
dataloader, input_dim, data_np, labels = function_get_dataset(dataset_properties)
print('Data_Shape is:', input_dim)
print('Batches Number is:', len(dataloader))

dict_keys(['10x73k', 'TCGA', 'coil20', 'eMNIST_balanced_letters', 'eMNIST_mnist', 'eMNIST_balanced_digits', 'Dermatology', 'Ecoil', 'Iris', 'Rings', 'Wine', 'Australian', 'Moons', 'Squeezed_Gauss', 'Gauss_Densities', 'Pendigits', 'fashionMNIST', '3DSpheres', '20_NewsGroups', 'COIL20', 'CIFAR10', 'STL10', 'R15', 'R3'])
{'batch_size': 100, 'n_clusters': 3, 'module_name': 'Datasets.Datasets', 'function_name': 'load_R3_dataloader', 'option_name': ''}
Data_Shape is: 2
Batches Number is: 15


### Parameters

In [5]:
# Batch Size and Number of Clusters
batch_size = dataset_properties['batch_size']
n_clusters = dataset_properties['n_clusters']

# Training Epochs and Learning Rate
lr = 1e-3
n_epochs = 100
entr_lambda = 1

kmeans_initialization = True

### Create MLP model

In [6]:
mlp = MLP(device=device, n_clusters=n_clusters, input_dim=input_dim)
mlp.set_training_variables(dataloader=dataloader, batch_size=batch_size, n_epochs=n_epochs, lr=lr, entr_lambda=entr_lambda)
mlp.set_path_variables(path_to_module=path_to_module, dataset_name=dataset_name)
mlp.set_path()

mlp = mlp.to(device)

In [7]:
# Initialize the clustering layer using k-means
if kmeans_initialization : mlp.kmeans_initialization(10)



### Colors for visualization

In [8]:
color_list = list(mcolors.CSS4_COLORS.keys()) + list(mcolors.XKCD_COLORS.keys())
np.random.shuffle(color_list)
color_list = ['deepskyblue', 'gold', 'hotpink', 'limegreen'] + color_list
visualization = Visualization()

### Training the MLP

In [9]:
mlp.train()

Epoch: 0 Cl Loss: 17.0103 Entropy: 23.3303 Soft Sil: -2.0103 SIL: 0.0000 ACC: 0.34 PUR: 0.34 NMI: 0.02 ARI: 0.00
Epoch: 1 Cl Loss: 16.8403 Entropy: 23.3762 Soft Sil: -1.8403 SIL: 0.0000 ACC: 0.37 PUR: 0.37 NMI: 0.07 ARI: 0.01
Epoch: 2 Cl Loss: 16.6757 Entropy: 23.4189 Soft Sil: -1.6757 SIL: 0.0000 ACC: 0.44 PUR: 0.44 NMI: 0.18 ARI: 0.05
Epoch: 3 Cl Loss: 16.5053 Entropy: 23.4594 Soft Sil: -1.5053 SIL: 0.0000 ACC: 0.54 PUR: 0.54 NMI: 0.35 ARI: 0.20
Epoch: 4 Cl Loss: 16.3523 Entropy: 23.4972 Soft Sil: -1.3523 SIL: 0.0000 ACC: 0.60 PUR: 0.60 NMI: 0.50 ARI: 0.36
Epoch: 5 Cl Loss: 16.2018 Entropy: 23.5329 Soft Sil: -1.2018 SIL: 0.0000 ACC: 0.64 PUR: 0.64 NMI: 0.60 ARI: 0.46
Epoch: 6 Cl Loss: 16.0539 Entropy: 23.5707 Soft Sil: -1.0539 SIL: 0.0000 ACC: 0.65 PUR: 0.65 NMI: 0.65 ARI: 0.52
Epoch: 7 Cl Loss: 15.9052 Entropy: 23.6076 Soft Sil: -0.9052 SIL: 0.0000 ACC: 0.66 PUR: 0.66 NMI: 0.67 ARI: 0.53
Epoch: 8 Cl Loss: 15.7534 Entropy: 23.6403 Soft Sil: -0.7534 SIL: 0.0000 ACC: 0.66 PUR: 0.66 NMI

Epoch: 75 Cl Loss: 15.0030 Entropy: 23.7744 Soft Sil: -0.0030 SIL: 0.0000 ACC: 0.50 PUR: 0.50 NMI: 0.24 ARI: 0.14
Epoch: 76 Cl Loss: 15.0024 Entropy: 23.7744 Soft Sil: -0.0024 SIL: 0.0000 ACC: 0.37 PUR: 0.39 NMI: 0.01 ARI: 0.01
Epoch: 77 Cl Loss: 15.0029 Entropy: 23.7744 Soft Sil: -0.0029 SIL: 0.0000 ACC: 0.39 PUR: 0.40 NMI: 0.02 ARI: 0.01
Epoch: 78 Cl Loss: 15.0016 Entropy: 23.7744 Soft Sil: -0.0016 SIL: 0.0000 ACC: 0.37 PUR: 0.37 NMI: 0.00 ARI: 0.00
Epoch: 79 Cl Loss: 15.0020 Entropy: 23.7744 Soft Sil: -0.0020 SIL: 0.0000 ACC: 0.37 PUR: 0.37 NMI: 0.01 ARI: 0.01
Epoch: 80 Cl Loss: 15.0030 Entropy: 23.7744 Soft Sil: -0.0030 SIL: 0.0000 ACC: 0.40 PUR: 0.40 NMI: 0.01 ARI: 0.01
Epoch: 81 Cl Loss: 15.0022 Entropy: 23.7744 Soft Sil: -0.0022 SIL: 0.0000 ACC: 0.40 PUR: 0.40 NMI: 0.02 ARI: 0.02
Epoch: 82 Cl Loss: 15.0021 Entropy: 23.7744 Soft Sil: -0.0021 SIL: 0.0000 ACC: 0.44 PUR: 0.44 NMI: 0.05 ARI: 0.04
Epoch: 83 Cl Loss: 15.0022 Entropy: 23.7744 Soft Sil: -0.0022 SIL: 0.0000 ACC: 0.34 PUR:

### $k$-means evaluation

In [10]:
data, labels = mlp.get_data()
kmeans = KMeans(n_clusters=n_clusters, n_init=10).fit(data)
evaluator = Evaluator()
evaluator.evaluate_model(data, labels, kmeans.labels_)



(1.0, 1.0, 1.0, 1.0, 0)

In [11]:
#visualization.makeExcel()
#visualization.plot_tsne(mlp=True)
visualization.plot(data, )

TypeError: Visualization.plot() missing 5 required positional arguments: 'data', 'y_true', 'y_predict', 'cluster_centers', and 'data_dir_path'

In [None]:
visualization.plot(latent_data, labels, clustering, cluster_centers, autoencoder.data_dir_path)

In [None]:
visualization.plot_tsne(latent_data, labels, clustering, cluster_centers, autoencoder.data_dir_path)