## Two Moons: Varying the amount of jitter

In [5]:
import sys  
sys.path.insert(0, '../../')

In [6]:
import optuna
import numpy as np
import pandas as pd

from sklearn.datasets import make_blobs, make_moons
from sklearn.cluster import KMeans, DBSCAN, SpectralClustering, MeanShift, estimate_bandwidth
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import adjusted_mutual_info_score as AMI
from rock import ROCK

import logging
import sys

In [7]:
seed = 0
n_samples = 1000
jitter = 15 * 0.01
datasets = []

jitter_grid = [1, 5, 10, 15, 20, 25, 30]

In [8]:
experiment = []
n_centers = 2
rock_results = []
other_results = []

In [9]:
for j in jitter_grid:
    step = 0
    for s in range(seed, seed+10):
        #print(f'jitter: {j}, seed: {s}')
        dataset = make_moons(n_samples=n_samples, noise=j * 0.01, shuffle=True, random_state=s) 
        run = {}
        run['step'] = step
        run['n_samples'] = n_samples
        run['n_centers'] = n_centers
        run['jitter'] = j * 0.01
        scaler = StandardScaler()
        data = scaler.fit_transform(dataset[0])
        kmeans = KMeans(n_clusters=n_centers, random_state=seed).fit(data).labels_
        eps, min_pts = (0.2, 4)
        dbscan = DBSCAN(eps=eps, min_samples=min_pts).fit(data).labels_
        run['eps'] = eps
        run['min_pts'] = min_pts
        spectral = SpectralClustering(n_clusters=2,random_state=seed).fit(data).labels_
        bandwidth = estimate_bandwidth(data)

        run['bandwidth'] = bandwidth
        mean_shift = MeanShift(bandwidth=bandwidth).fit(data).labels_

        rock = ROCK(tmax=15).fit(data).labels_

        gt = dataset[1]
        rock_results.append(AMI(rock, gt))
        other_results.append(np.max([AMI(kmeans, gt), AMI(dbscan, gt), AMI(spectral, gt), AMI(mean_shift, gt)]))

        run['ROCK'] = AMI(rock, gt)
        run['K_MEANS'] = AMI(kmeans, gt)
        run['DBSCAN'] = AMI(dbscan, gt)
        run['SPECTRAL'] = AMI(spectral, gt)
        run['MEAN_SHIFT'] = AMI(mean_shift, gt)

        step += 1

        experiment.append(run)

In [10]:
pd.DataFrame(experiment).to_csv('../../results/analysis/two_moons_analysis_jitter.csv')

In [11]:
pd.DataFrame(experiment)

Unnamed: 0,step,n_samples,n_centers,jitter,eps,min_pts,bandwidth,ROCK,K_MEANS,DBSCAN,SPECTRAL,MEAN_SHIFT
0,0,1000,2,0.01,0.2,4,1.245104,0.238206,0.387223,1.000000,0.439425,0.436695
1,1,1000,2,0.01,0.2,4,1.246109,0.244556,0.382260,1.000000,0.444914,0.442159
2,2,1000,2,0.01,0.2,4,1.245576,0.241767,0.382260,1.000000,0.442175,0.439425
3,3,1000,2,0.01,0.2,4,1.246323,0.243235,0.384746,1.000000,0.442159,0.442159
4,4,1000,2,0.01,0.2,4,1.245338,0.209449,0.389732,1.000000,0.439425,0.436711
...,...,...,...,...,...,...,...,...,...,...,...,...
65,5,1000,2,0.30,0.2,4,1.264171,0.364909,0.342518,0.011766,0.351616,0.000000
66,6,1000,2,0.30,0.2,4,1.258361,0.415461,0.346419,0.011737,0.358124,0.353466
67,7,1000,2,0.30,0.2,4,1.265554,0.444389,0.351161,0.014743,0.351343,0.375095
68,8,1000,2,0.30,0.2,4,1.258225,0.313783,0.317269,0.018672,0.337529,0.334016
