In [1]:
## Import dependencies to run example ##
import os
import sys
sys.path.append('..')
from models.som import SOM
from dataloaders.datasets import Datasets
import torch.backends.cudnn as cudnn
import random
from torch.utils.data.dataloader import DataLoader
import argparse
import metrics
import torch
from utils import utils
from os.path import join
from sampling.custom_lhs import *

In [2]:
## Function to run full som model and load dataset
def train_som(root, dataset_path, device, use_cuda, workers,
              n_max=20,at=0.985, ds_beta=0.5, eb=0.1, eps_ds=1., som_epochs=2, manual_seed=1):

    ## Import dataset from dataset_path and root folder (the input from som is flatten)
    dataset = Datasets(dataset=dataset_path, root_folder=root, flatten=True)

    ## Create the SOM
    som = SOM(input_dim=dataset.dim_flatten,
              n_max=n_max,
              at=at,
              ds_beta=ds_beta,
              eb=eb,
              eps_ds=eps_ds,
              device=device)

    ## Utilize seed to reproduce experiments
    random.seed(manual_seed)
    torch.manual_seed(manual_seed)

    ## Flag to run model in cuda or not
    if use_cuda:
        torch.cuda.manual_seed_all(manual_seed)
        som.cuda()
        cudnn.benchmark = True

    ## Load train and test loaders
    train_loader = DataLoader(dataset.train_data, batch_size=batch_size, shuffle=True, num_workers=workers)
    test_loader = DataLoader(dataset.test_data, shuffle=False)

    ## Run model for som_epochs and for each epoch run the train_loader
    for epoch in range(som_epochs):

        ## Pass over all samples during one epoch
        for batch_idx, (sample, target) in enumerate(train_loader):
            
            ## Load data and target
            sample, target = sample.to(device), target.to(device)

            ## Forward sample in SOM model
            som(sample)
            
            ## Use test_loader to calculate CE metric 
            ## The true label is not using during the training phase, its only used to calculate the final metric during test
            _, predict_labels, true_labels = som.cluster(test_loader)
            print('{0} id [Epoch: {1} {2:.0f}%]\tCE: {3:.6f}'.format(dataset_path,
                                                                  epoch,
                                                                  100. * batch_idx / len(train_loader),
                                                                  metrics.cluster.predict_to_clustering_error(true_labels,
                                                                               predict_labels)))

    ## Calculate final CE metric with the som.cluster result
    cluster_result, predict_labels, true_labels = som.cluster(test_loader)
    print('{} \t \tCE: {:.3f}'.format(dataset_path,
                                                metrics.cluster.predict_to_clustering_error(true_labels,
                                                                                            predict_labels)))


In [3]:
## Example parameters
cuda_flag = False # Flag to use or not cuda
workers = 0 # Number of cpu workers (use default: 0)
ngpu = 1 # Number of gpus (use default: 1)
root = '../raw-datasets/' # Root dataset folder (default: ../raw-datasets/)
dataset_path = 'Realdata/breast.arff' # Dataset Path (default: Realdata/breast.arff)
batch_size = 1 # Batch size (default: 1)
manual_seed = 1 # Seed to reproduce experiments

## Warning to remember that you can run with cuda
if torch.cuda.is_available() and not cuda_flag:
    print("WARNING: You have a CUDA device, so you should probably run with --cuda")

## If cuda is available and you pass the flag, you can use it!
use_cuda = torch.cuda.is_available() and cuda_flag

## Initialize cuda
if use_cuda:
    torch.cuda.init()

## Set the device to convert the model and samples to gpu
device = torch.device('cuda:0' if use_cuda else 'cpu')



In [4]:
## SOM Parameters (The default parameters generate the best CE that we found in the breast dataset)
n_max = 145 # Number max of nodes that the SOM model can create
at = 0.81302 # Threshold of activation of SOM nodes (High at update nodes, low at create new nodes)
ds_beta = 0.029942 # Parameter to pertubate the update of the moving average
eb = 0.00720282 # Learning rate of SOM model
eps_ds = 1. # Parameter to change the decay of relevances
som_epochs = 2 # Epochs to run the model

In [5]:
train_som(root=root, dataset_path=dataset_path, device=device, use_cuda=use_cuda,
          workers=workers,n_max=n_max,at=at, ds_beta=ds_beta,
          eb=eb, eps_ds=eps_ds, som_epochs=som_epochs, manual_seed=manual_seed)

Realdata/breast.arff id [Epoch: 0 0%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 1%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 1%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 2%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 2%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 3%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 3%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 4%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 4%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 5%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 5%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 6%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 6%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 7%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 7%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 8%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 8%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 9%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 9%]	CE: 0.762626
Realdata/breast.arff id [Epoch:

Realdata/breast.arff id [Epoch: 0 80%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 80%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 81%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 81%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 82%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 82%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 83%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 83%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 84%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 84%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 85%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 85%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 86%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 86%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 87%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 87%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 88%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 88%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 0 89%]	CE: 0.762626
Realdata/bre

Realdata/breast.arff id [Epoch: 1 60%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 60%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 61%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 61%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 62%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 62%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 63%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 63%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 64%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 64%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 65%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 65%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 66%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 66%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 67%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 67%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 68%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 68%]	CE: 0.762626
Realdata/breast.arff id [Epoch: 1 69%]	CE: 0.762626
Realdata/bre