In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import adjusted_mutual_info_score
from sklearn.metrics import normalized_mutual_info_score
import hdbscan

In [2]:
df = pd.read_csv('../../../data/synthetic_data_training_scaled.csv')

In [3]:
print(df.shape)

(2500, 1013)


In [4]:
print(df.columns.values)

['edge_0_volume' 'edge_0_resptime' 'edge_0_error' ...
 'ratio_error_to_req_perc95' 'ratio_error_to_req_perc99'
 'ratio_edge_to_error']


In [5]:
clusterer = hdbscan.HDBSCAN(min_cluster_size=5, min_samples=5).fit(df)

In [6]:
print(clusterer.labels_)

[-1 -1 -1 ... -1 -1 -1]


In [7]:
for i, label in enumerate(clusterer.labels_):
    if label != -1:
        print('data point %d has value %d' % (i, label))

data point 55 has value 1
data point 89 has value 1
data point 639 has value 1
data point 660 has value 1
data point 726 has value 0
data point 898 has value 0
data point 2049 has value 0
data point 2089 has value 0
data point 2351 has value 0
data point 2404 has value 1
data point 2416 has value 0


In [8]:
clusters = set(clusterer.labels_)
print(clusters)
print(len(clusters))

{0, 1, -1}
3


In [9]:
for score in clusterer.outlier_scores_.tolist():
    print(score, end=',')

0.0350696114752813,0.05322886096877075,0.04917742063192625,0.055130495913814415,0.057688899230439614,0.01971020934737916,0.028404288031374667,0.04591467454138394,0.04662995485433685,0.06810913018574422,0.04174944900698868,0.047562282539345246,0.010877181731517435,0.06437723223463983,0.10209422186987395,0.05381902398240922,0.04778961432856049,0.03500637219152424,0.03476413561571281,0.03300117437673053,0.03208141923134658,0.03908465082157621,0.04667912479331169,0.03639867348529005,0.0550994048521718,0.03666502043890033,0.030921826396342666,0.02094101390991032,0.04382716553102526,0.05428533766856752,0.04526153809763017,0.030269128400816074,0.05599485104455665,0.04592918099056153,0.07136347495230101,0.014276774469044105,0.04366013476400548,0.04959189541860167,0.02986959514106697,0.039126136953187104,0.06743714410013622,0.02406996763198953,0.027641835142971017,0.03283268829213898,0.04147996372796764,0.05672198613610975,0.017912648397228365,0.0593982915818373,0.04306354326434702,0.0508541877

0.044636951341135114,0.022348049397102315,0.03241302466542171,0.025431171308416853,0.030309200648926313,0.07004964203607804,0.06979915977532253,0.054149089818171396,0.03797289761860258,0.030763302247740678,0.012830297323399662,0.056932713692128996,0.02408158395584157,0.036621010130123144,0.03878585554581164,0.04252086076302082,0.025599854541752173,0.020761928247897087,0.03766323618648072,0.018766284759036963,0.04157301611389703,0.028964840794286115,0.03456594811473282,0.024011491263414148,0.0442023321763242,0.04417492232356936,0.04181592583117713,0.039767261954839934,0.03277354534537612,0.02976817005640965,0.05638774657097731,0.03973748170868611,0.03554969701707191,0.04767736179316203,0.05758277741473695,0.046438970957248624,0.07244261348270037,0.03587100523823744,0.021777719293288793,0.03175626060845051,0.05654592158335897,0.022330936610837755,0.03787551412845266,0.017712353620144537,0.022266498601490596,0.048318946554484626,0.06577586186832424,0.05202374953346692,0.02781867070943156,

,0.04604816727860768,0.018617058397270767,0.056989052623882167,0.04675185681277922,0.0365463126337045,0.03905388056219216,0.04979729893381894,0.02616319077639864,0.025954364394725857,0.0007339545725032389,0.02605843004187822,0.0348322729581764,0.054560668412389275,0.05963652500540372,0.04881360002417533,0.01704481449558236,0.06280017634791697,0.0514309519520911,0.07865734134164777,0.0317376821687148,0.0358578796530803,0.036332337445214254,0.02260471873704322,0.021385228594280364,0.027085983364151862,0.0574474248345805,0.06947280628984058,0.03299337918555953,0.043750629591855605,0.03114695713599211,0.01783223722377239,0.04373429526541304,0.01906834026625807,0.05101025666414808,0.03150930069187404,0.05095507098115007,0.04468945048598822,0.043702665605193654,0.07883231650390889,0.05256928905062304,0.04064868093865346,0.030724427927394212,0.04718743046286873,0.0499443153191272,0.04020349635385141,0.03405473864935514,0.07493152235026751,0.025859712371460435,0.04374057057443547,0.03312421751

0.018767220624197455,0.07444136826017317,0.020768898813669168,0.04685301828551266,0.06377058820865529,0.03130349651281343,0.03539324903546137,0.016702937198713463,0.05381055812058485,0.027310903272516625,0.04055744798261823,0.06848410768366733,0.04640194486047593,0.04056596864201411,0.037018440614252926,0.06751223426440206,0.033649334272626036,0.05743900689781973,0.0407968327989589,0.06887834051023178,0.05020513111260222,0.01715569433688618,0.01885693496460815,0.07162938729948752,0.04656298444823097,0.029875194721658466,0.03471958361480349,0.04297044682347895,0.028467429399488042,0.03946358480137665,0.018402606798930014,0.047063376489024596,0.03768275458358464,0.03730632166197005,0.0280844911153526,0.03070877452180253,0.039083631781578636,0.048288614847341725,0.036471497806487924,0.04390423266384311,0.0695751813182615,0.03666398415680666,0.04612840714970593,0.03071937988081575,0.04918510326169268,0.01726611491299436,0.01355811614538993,0.0386521321756527,0.05334897939751159,0.021931081

0.03677005055261757,0.06705116138647105,0.05440690199542996,0.0,0.06587510667788404,0.042877458566866936,0.024412625306434635,0.018450430043665502,0.07729127080226975,0.05780182298219299,0.04556838549856131,0.07128437013172206,0.03476314111469292,0.0528351665871977,0.04494703286817432,0.04161462374616086,0.030860229549620827,0.04152508075767822,0.039641365350667576,0.05980610548448878,0.04054156677925353,0.05299512125991688,0.026874108647754975,0.08237592626645601,0.07054884144919275,0.02890268186576189,0.023572591942447253,0.0346279385977181,0.060529573171360054,0.039327427059591996,0.06170986648393438,0.037810983604923874,0.0301590345173384,0.07466505255813469,0.05810214142097254,0.0707529229586274,0.0647252943164324,0.059213068286032906,0.03575903021977688,0.04669532021332593,0.07426151204427091,0.049365858167878106,0.046599572852520976,0.06904175300379452,0.020386152516931412,0.01839033910379549,0.04735340257173484,0.08261782368093855,0.03786060086573907,0.020927621794881477,0.0406