In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import adjusted_mutual_info_score
from sklearn.metrics import normalized_mutual_info_score
import hdbscan

In [2]:
df = pd.read_csv('../../../data/synthetic_data_training_scaled.csv')

In [3]:
print(df.shape)

(2500, 1013)


In [4]:
print(df.columns.values)

['edge_0_volume' 'edge_0_resptime' 'edge_0_error' ...
 'ratio_error_to_req_perc95' 'ratio_error_to_req_perc99'
 'ratio_edge_to_error']


In [5]:
clusterer = hdbscan.HDBSCAN(min_cluster_size=5, min_samples=5).fit(df)

In [6]:
index_list = []
for i, label in enumerate(clusterer.labels_):
    if label != -1:
        index_list.append(i)
        print('data point %d has value %d' % (i, label))

data point 55 has value 1
data point 89 has value 1
data point 639 has value 1
data point 660 has value 1
data point 726 has value 0
data point 898 has value 0
data point 2049 has value 0
data point 2089 has value 0
data point 2351 has value 0
data point 2404 has value 1
data point 2416 has value 0


In [7]:
for index in index_list:
    print('data point %d has outlier score %d' % (index, clusterer.outlier_scores_.tolist()[index]))

data point 55 has outlier score 0
data point 89 has outlier score 0
data point 639 has outlier score 0
data point 660 has outlier score 0
data point 726 has outlier score 0
data point 898 has outlier score 0
data point 2049 has outlier score 0
data point 2089 has outlier score 0
data point 2351 has outlier score 0
data point 2404 has outlier score 0
data point 2416 has outlier score 0


In [8]:
clusterer = hdbscan.HDBSCAN(min_cluster_size=3, min_samples=5).fit(df)

In [9]:
print(clusterer.labels_)

[-1 -1 -1 ... -1 -1 -1]


In [10]:
index_list = []
for i, label in enumerate(clusterer.labels_):
    if label != -1:
        index_list.append(i)
        print('data point %d has value %d' % (i, label))

data point 55 has value 0
data point 89 has value 0
data point 92 has value 0
data point 160 has value 0
data point 248 has value 0
data point 364 has value 1
data point 390 has value 0
data point 492 has value 0
data point 526 has value 0
data point 552 has value 0
data point 566 has value 0
data point 578 has value 1
data point 636 has value 0
data point 639 has value 0
data point 643 has value 0
data point 658 has value 0
data point 660 has value 0
data point 726 has value 0
data point 898 has value 0
data point 975 has value 0
data point 1003 has value 0
data point 1062 has value 0
data point 1319 has value 0
data point 1351 has value 0
data point 1386 has value 0
data point 1434 has value 0
data point 1567 has value 1
data point 1583 has value 0
data point 1800 has value 0
data point 2049 has value 0
data point 2089 has value 0
data point 2168 has value 0
data point 2267 has value 0
data point 2351 has value 0
data point 2404 has value 0
data point 2416 has value 0


In [11]:
for index in index_list:
    print('data point %d has outlier score %d' % (index, clusterer.outlier_scores_.tolist()[index]))

data point 55 has outlier score 0
data point 89 has outlier score 0
data point 92 has outlier score 0
data point 160 has outlier score 0
data point 248 has outlier score 0
data point 364 has outlier score 0
data point 390 has outlier score 0
data point 492 has outlier score 0
data point 526 has outlier score 0
data point 552 has outlier score 0
data point 566 has outlier score 0
data point 578 has outlier score 0
data point 636 has outlier score 0
data point 639 has outlier score 0
data point 643 has outlier score 0
data point 658 has outlier score 0
data point 660 has outlier score 0
data point 726 has outlier score 0
data point 898 has outlier score 0
data point 975 has outlier score 0
data point 1003 has outlier score 0
data point 1062 has outlier score 0
data point 1319 has outlier score 0
data point 1351 has outlier score 0
data point 1386 has outlier score 0
data point 1434 has outlier score 0
data point 1567 has outlier score 0
data point 1583 has outlier score 0
data point 1800

In [12]:
for score in clusterer.outlier_scores_.tolist():
    print(score, end=',')

0.030874423498662554,0.04911262321078484,0.045043568594204476,0.05102252581549831,0.053592052196502954,0.015448243932821551,0.024180121503448292,0.041766637197309935,0.042485027304922124,0.06405758675933941,0.037583302683977325,0.043421408432480296,0.006576813327799272,0.060309463789725265,0.09819043402222705,0.049705352051770554,0.04364972858272154,0.030810909272084724,0.030567619534322514,0.028796993541611905,0.027873239614789175,0.03490691886561091,0.042534411018008804,0.03220926381665173,0.050991299580531765,0.032476768755906035,0.02670860526593407,0.016684399613489005,0.03967005241028851,0.05017369311067281,0.041110661139229045,0.026053069562303598,0.05189063886732495,0.04178120671557717,0.06732608030615932,0.00999118633419039,0.039502295450505126,0.045459845375782695,0.025651799268310894,0.034948585364761885,0.06338267910759533,0.019826956958368756,0.023414353730128373,0.028627774937032686,0.03731264577494456,0.052620935294068374,0.013642867801207935,0.05530887640239004,0.0389031

0.006924506708483571,0.05073300567603519,0.01652491641348128,0.032062028454624976,0.03537030450797339,0.006578078535224348,0.04943102440246382,0.05304553625333886,0.03605480012638673,0.029754456426899414,0.02142925527872667,0.03224847329483371,0.03969737195514698,0.013486807992988199,0.026675803320092555,0.0477393082204826,0.033572625564222,0.028668773677764842,0.03395183081495605,0.013347417544278898,0.043093539503549314,0.019849880825734592,0.0407944016101796,0.01948389195513006,0.027673455115890602,0.0046870043438391315,0.02005029335927105,0.020056501695718307,0.051508129108157406,0.0027569362467355476,0.03226039136668173,0.03313439250095,0.05330742971585168,0.040483358892702416,0.018097552410726102,0.02820628675610718,0.02119407868377141,0.026093316030871054,0.06600653529285766,0.06575496402072768,0.05003685290112766,0.033790332139177405,0.02654939190844186,0.008538420399632233,0.052832579021575087,0.019838623786031986,0.032432567105328,0.034606824529972935,0.03835806827502432,0.02

,0.04292049835576837,0.060268513200649104,0.04138160525084605,0.01590801899969407,0.054573751764670164,0.04307736664846376,0.032105050076852895,0.06019380408536965,0.05214151451213384,0.037518801894098064,0.03458692877960899,0.03390680983746904,0.023113799631475446,0.042287332295443715,0.037522552381197075,0.05454185827917191,0.04957542299977096,0.05645604725820844,0.007249719517393993,0.05247106990254974,0.02710630764364388,0.044305302071086135,0.03975664548110601,0.02130349620232426,0.03734181646227553,0.03773051903637976,0.07201163557947951,0.004058612425181015,0.06690772611421454,0.03839953424979593,0.01546282352049308,0.014766464641252666,0.018066462777256837,0.05553350883315259,0.025302637914979376,0.047324811389958,0.04824074279291771,0.02945864776574164,0.05276474608593079,0.010368048193241735,0.030611078223505516,0.033449027968042666,0.03942206726879155,0.05559897139422432,0.04775690813542124,0.029527375271655876,0.03606159857101936,0.05693303753588136,0.032035730175403725,0.0

0.055702575636399014,0.03835632805482171,0.025598844633183675,0.03310308947377049,0.029946225912871768,0.025950575718715146,0.035265912705985204,0.04237531954011446,0.03491445079024464,0.04400206609436886,0.04464668761065342,0.042278612528727805,0.06228426531858172,0.03004712905502216,0.03475673309465131,0.05145462227642101,0.034225121714154275,0.04604972931180244,0.033718041400358494,0.064777097338669,0.06733002851964015,0.008746825916251992,0.05785893652562637,0.04956295280166267,0.0637623661922079,0.049916090557010906,0.03683991503045459,0.022407964630621023,0.005197665240212668,0.04056192888514202,0.04129779604186528,0.03649336515867769,0.022540615819586572,0.03519387706684155,0.05932847566578272,0.0447004373125033,0.024905091076756137,0.03364021009792509,0.037818294209720306,0.021433286009940516,0.03611754735914965,0.011190536216371744,0.0459587700030174,0.03596551104643557,0.04539462100938478,0.048693946877498666,0.042238883356016964,0.05174482438601784,0.047286689657432074,0.011