# INDIVIDUAL CLUSTERING

In [1]:
import numpy as np
import datetime
import json
import pylab
import pandas as pd
import matplotlib.pyplot as plt

### Detalles

### Lista de Clientes

In [2]:
# Extraemos la lista de clientes sin repetir

def leer_data():
    outfile='./data2/data.csv'
    data = pd.read_csv(outfile)
    return data

data = leer_data()
clientes =  data.groupby('client_id').client_id.count().index

## Temporal TXs footprint

In [3]:
# Definimos la ruta de los archivos 

file='U'
raw_data='./data2/%s.json' %(file)
individual_footprint="%s.individual_footprint" %(raw_data)
individual_clusters="%s.clusters" %(individual_footprint)
individual_labels="%s.labels" %(individual_footprint)

### Funciones

In [4]:
def process_footprint(data,tests,log=False):
    from sklearn.cluster import MiniBatchKMeans
    #KMeans(init='k-means++', n_clusters=k, n_init=10)
    import datetime
    K={}
    for k in tests:
        if k<=len(data):
            if log:
                print("%s: processing %s"%(datetime.datetime.now(),k))
            K[k]=bench_k_means(MiniBatchKMeans(init='k-means++', n_clusters=k, batch_size=100,
                      n_init=10, max_no_improvement=10, verbose=0,
                      random_state=0),name="k-means++", data=data)
    return K

In [5]:
def compute_best_k(x,y,occurrencies, plot=False,points=1000,sf=0.9):
    import numpy as np
    
    if len(x)<5:
        return max(1, round(np.sqrt(occurrencies/2)))
    
    from scipy.interpolate import interp1d
    from scipy.interpolate import UnivariateSpline
    spl = UnivariateSpline(x, y)
    spl.set_smoothing_factor(sf)
    xs = np.linspace(min(x), max(x), points)
    ys = spl(xs)
    idx_better_k=get_change_point(xs, ys)
    if plot:
        import pylab
        pylab.plot(xs,ys)
        
        pylab.scatter(xs[idx_better_k],ys[idx_better_k],s=20, marker='o')
        pylab.text(xs[idx_better_k],ys[idx_better_k],"bestK %s" %(np.round(xs[idx_better_k])))
        return int(np.round(xs[idx_better_k])),pylab
    return int(np.round(xs[idx_better_k]))

In [6]:
def bench_k_means(estimator, name, data,distance_function=None):
    from sklearn import metrics
    from sklearn.metrics import silhouette_samples, silhouette_score
    import time
    t0 = time.time()
    if distance_function:
        estimator.fit(data,distance_function)
    else:
        estimator.fit(data)
    #cluster_labels = estimator.fit_predict(data)
    #silhouette_score_ = silhouette_score(data, cluster_labels)
    
    inertia=estimator.inertia_
    duration=time.time() - t0
    return {'inertia':inertia,'duration':duration, 'estimator':estimator}#,'silhouette':silhouette_score_}

def get_change_point(x, y):
    """
         Elección del mejor K
         :: param x: lista de valores de K
         :: param y: lista de valores de SSE
    """
    import math
    max_d = -float('infinity')
    index = 0

    for i in range(0, len(x)):
        c = closest_point_on_segment(a=[x[0], y[0]], b=[x[len(x)-1], y[len(y)-1]], p=[x[i], y[i]])
        d = math.sqrt((c[0]-x[i])**2 + (c[1]-y[i])**2)
        if d > max_d:
            max_d = d
            index = i
    
    return index

def closest_point_on_segment(a, b, p):
    sx1 = a[0]
    sx2 = b[0]
    sy1 = a[1]
    sy2 = b[1]
    px = p[0]
    py = p[1]

    x_delta = sx2 - sx1
    y_delta = sy2 - sy1

    if x_delta == 0 and y_delta == 0:
        return p

    u = ((px - sx1) * x_delta + (py - sy1) * y_delta) / (x_delta * x_delta + y_delta *  y_delta)
    if u < 0:
        closest_point = a
    elif u > 1:
        closest_point = b
    else:
        cp_x = sx1 + u * x_delta
        cp_y = sy1 + u * y_delta
        closest_point = [cp_x, cp_y]

    return closest_point
	

### Individual Clustering

In [None]:
# Numero de filas del archivo
f=open(individual_footprint)
num_rows = len(f.readlines())-1
f.close()

#<customer_id;year;week;profile_id;size;t1... tn >
import datetime
f=open(individual_footprint)
fw=open(individual_clusters,'w')  #uid,cluster_id,centroid
fw2=open(individual_labels,'w') #uid,year,week,cluster_id,profile
fw.write('customer_tag;individual_cluster;d0t0;d1t0;d2t0;d3t0;d4t0;d5t0;d6t0;d0t1;d1t1;d2t1;d3t1;d4t1;d5t1;d6t1;d0t2;d1t2;d2t2;d3t2;d4t2;d5t2;d6t2;d0t3;d1t3;d2t3;d3t3;d4t3;d5t3;d6t3\n')
fw2.write('customer_tag;year;week;individual_cluster;d0t0;d1t0;d2t0;d3t0;d4t0;d5t0;d6t0;d0t1;d1t1;d2t1;d3t1;d4t1;d5t1;d6t1;d0t2;d1t2;d2t2;d3t2;d4t2;d5t2;d6t2;d0t3;d1t3;d2t3;d3t3;d4t3;d5t3;d6t3\n')

f.readline()
data=[] #buffer

footprints_clustered=0
footprints_clusters=0
n_cliente=0
contador = 0
temporal= 0
for row in f: #reading individual footprint
    row=row.strip().split(',') # leemos cada elemento da linea parseada por ","
    uid=row[0]
    year=row[1]
    week=row[2]
    size=int(row[4])
    profile=np.array([float(el) for el in row[5:]])
    # Individual clustering
    if uid==clientes[n_cliente]: # Para cada fila donde los "uid" son iguales 
        data.append(((uid,year,week),profile))     
        contador+=1
    else: #final de cliente
        
        #---------------------------------------------------------------------
        # procesar data
        #---------------------------------------------------------------------
        to_cluster=[el[1] for el in data]
        K=process_footprint(to_cluster,np.arange(1,len(to_cluster)+1))

        # choose k
        x=list(K.keys())
        y=[K[k]['inertia'] for k in K]
        best_k=compute_best_k(x,y,len(to_cluster))
        print(str(contador)+' => clustering: '+str(clientes[n_cliente])+' len data: '+str(len(data))+" best k: "+str(best_k))
        
        # clustering
        if best_k==1:
            #to few records
            cluster_centers_=[np.average(to_cluster,axis=0)]
            labels_=[0]*len(to_cluster)  
        else:
            cluster_centers_=K[best_k]['estimator'].cluster_centers_
            labels_=K[best_k]['estimator'].labels_
        
        #export individual centroids
        for i in np.arange(len(cluster_centers_)):
            string="%s;%s;%s\n"%(clientes[n_cliente],i,';'.join([str(el) for el in cluster_centers_[i]])) #uid,cluster_id,centroid
            fw.write(string)
            footprints_clusters+=1
        fw.flush()

        #export original data and labels
        for i in np.arange(len(data)):
            uid2=data[i][0]
            profile2=data[i][1]
            label2=labels_[i]
            string="%s;%s;%s;%s;%s\n" %(uid2[0],uid2[1],uid2[2],label2
                                                    ,';'.join([str(el) for el in profile2]))#uid,year,week,cluster_id,profile
            fw2.write(string)
            footprints_clustered+=1
        fw2.flush()
        #---------------------------------------------------------------------
        #---------------------------------------------------------------------
        
        

        data=[] #buffer
        data.append(((uid,year,week),profile))
        
        contador+=1
        temporal+=1
        n_cliente+=1
        
    if contador == num_rows:        # Para el ultimo cliente y ultima fila
        #---------------------------------------------------------------------
        # procesar data
        #---------------------------------------------------------------------
        to_cluster=[el[1] for el in data]
        K=process_footprint(to_cluster,np.arange(1,len(to_cluster)+1))

        # choose k
        x=list(K.keys())
        y=[K[k]['inertia'] for k in K]
        best_k=compute_best_k(x,y,len(to_cluster))
        print(str(contador)+' => clustering: '+str(clientes[n_cliente])+' len data: '+str(len(data))+" best k: "+str(best_k))
        
        # clustering
        if best_k==1:
            #to few records
            cluster_centers_=[np.average(to_cluster,axis=0)]
            labels_=[0]*len(to_cluster)  
        else:
            cluster_centers_=K[best_k]['estimator'].cluster_centers_
            labels_=K[best_k]['estimator'].labels_
        
        #export individual centroids
        for i in np.arange(len(cluster_centers_)):
            string="%s;%s;%s\n"%(uid,i,';'.join([str(el) for el in cluster_centers_[i]])) #uid,cluster_id,centroid
            fw.write(string)
            footprints_clusters+=1
        fw.flush()

        #export original data and labels
        for i in np.arange(len(data)):
            uid2=data[i][0]
            profile2=data[i][1]
            label2=labels_[i]
            string="%s;%s;%s;%s;%s\n" %(uid2[0],uid2[1],uid2[2],label2
                                                    ,';'.join([str(el) for el in profile2]))#uid,year,week,cluster_id,profile
            fw2.write(string)
            footprints_clustered+=1
        fw2.flush()
        #---------------------------------------------------------------------
        #---------------------------------------------------------------------
        
        print("final")   
    
    

temporal

1 => clustering: +++g8j9k+5A= len data: 1 best k: 1
2 => clustering: ++/oQ9Lb9dI= len data: 1 best k: 1
5 => clustering: ++14g8obpj0= len data: 3 best k: 1
6 => clustering: ++1XMtcwMec= len data: 1 best k: 1
7 => clustering: ++3gxZFOJCM= len data: 1 best k: 1
8 => clustering: ++438ugzEhg= len data: 1 best k: 1
9 => clustering: ++5u+heOZ8o= len data: 1 best k: 1
13 => clustering: ++70ByX0a3Q= len data: 4 best k: 1
14 => clustering: ++7i5fi6kBU= len data: 1 best k: 1
16 => clustering: ++834mPfd7g= len data: 2 best k: 1
17 => clustering: ++8H96p+hLk= len data: 1 best k: 1
23 => clustering: ++93ZWJ5ONQ= len data: 6 best k: 5
24 => clustering: ++9tB+M5oWk= len data: 1 best k: 1
25 => clustering: ++C9HYE9YAk= len data: 1 best k: 1
26 => clustering: ++FMZ01PEY4= len data: 1 best k: 1
27 => clustering: ++FVgfEREuc= len data: 1 best k: 1
28 => clustering: ++JHadWy+ns= len data: 1 best k: 1
29 => clustering: ++JxWznHvfc= len data: 1 best k: 1
30 => clustering: ++KZ8A+f7MY= len data: 1 best k: 1


421 => clustering: +0uLvWNH8ic= len data: 9 best k: 4
422 => clustering: +0ugFUoLcZw= len data: 1 best k: 1
424 => clustering: +0vMG+809FA= len data: 2 best k: 1
425 => clustering: +0wZhwzyzJo= len data: 1 best k: 1
426 => clustering: +0xKwvsIkFA= len data: 1 best k: 1
427 => clustering: +0xr3WGLoS4= len data: 1 best k: 1
428 => clustering: +0yTOXnCLJI= len data: 1 best k: 1
429 => clustering: +0zRdwVpJz4= len data: 1 best k: 1
431 => clustering: +13/Y57RH00= len data: 2 best k: 1
433 => clustering: +15MZjEFNN4= len data: 2 best k: 1
434 => clustering: +15spL7OWvQ= len data: 1 best k: 1
440 => clustering: +1670k6aThA= len data: 6 best k: 3
441 => clustering: +182ID1sv5I= len data: 1 best k: 1
442 => clustering: +19b5bgGBMQ= len data: 1 best k: 1
448 => clustering: +1A+siRRE78= len data: 6 best k: 3
450 => clustering: +1D4fLSNElU= len data: 2 best k: 1
455 => clustering: +1DMXF3J0z8= len data: 5 best k: 4
458 => clustering: +1IQZkOp930= len data: 3 best k: 1
468 => clustering: +1JI7hdtS

872 => clustering: +4+ccMb2CoA= len data: 12 best k: 5
873 => clustering: +4/ElXyX6GI= len data: 1 best k: 1
875 => clustering: +47PkrKmzrM= len data: 2 best k: 1
876 => clustering: +47zbC+XIp8= len data: 1 best k: 1
877 => clustering: +4Co4wNMRs0= len data: 1 best k: 1
878 => clustering: +4DAjBvBsLY= len data: 1 best k: 1
879 => clustering: +4F9gT4gLAY= len data: 1 best k: 1
917 => clustering: +4F9nGxb1CY= len data: 38 best k: 11
918 => clustering: +4FII2KjFAI= len data: 1 best k: 1
919 => clustering: +4FMUyKjKWk= len data: 1 best k: 1
921 => clustering: +4FZhZJwt+Y= len data: 2 best k: 1
924 => clustering: +4J1a/e0lBA= len data: 3 best k: 1
927 => clustering: +4JY45X1ipg= len data: 3 best k: 1
929 => clustering: +4JlzJ/3YM8= len data: 2 best k: 1
931 => clustering: +4LHdQSxUeA= len data: 2 best k: 1
932 => clustering: +4LNqs4z/ME= len data: 1 best k: 1
934 => clustering: +4MHYTV5NhE= len data: 2 best k: 1
935 => clustering: +4MYsaW+zZU= len data: 1 best k: 1
937 => clustering: +4MjBW

1297 => clustering: +6yqhppua+4= len data: 2 best k: 1
1298 => clustering: +6z9ZqVCmNk= len data: 1 best k: 1
1299 => clustering: +6zuFCeffn8= len data: 1 best k: 1
1300 => clustering: +7+2/xFmesk= len data: 1 best k: 1
1301 => clustering: +7+2d7k/jqY= len data: 1 best k: 1
1302 => clustering: +7/PKlJxtdQ= len data: 1 best k: 1
1318 => clustering: +70+Hc1nrw0= len data: 16 best k: 6
1320 => clustering: +735pM7q6pg= len data: 2 best k: 1
1321 => clustering: +73Rfc3uF+4= len data: 1 best k: 1
1326 => clustering: +75MTWLveq8= len data: 5 best k: 3
1327 => clustering: +774MXQOkJc= len data: 1 best k: 1
1328 => clustering: +79fFDYvcmk= len data: 1 best k: 1
1330 => clustering: +79wHafD5Wo= len data: 2 best k: 1
1332 => clustering: +79ytbkLUNA= len data: 2 best k: 1
1333 => clustering: +7AY03ecTdA= len data: 1 best k: 1
1334 => clustering: +7Bjc0s276k= len data: 1 best k: 1
1336 => clustering: +7BrjCQ3gLI= len data: 2 best k: 1
1338 => clustering: +7CEcPAHU1U= len data: 2 best k: 1
1340 => c

1680 => clustering: +9syP8mKxBg= len data: 17 best k: 5
1681 => clustering: +9tbT3X+IYM= len data: 1 best k: 1
1682 => clustering: +9vw3JX+nQ4= len data: 1 best k: 1
1692 => clustering: +9wNyZkq684= len data: 10 best k: 6
1700 => clustering: +9x6WLEWIDI= len data: 8 best k: 5
1701 => clustering: +9yJoBsAES4= len data: 1 best k: 1
1702 => clustering: +9zI/m/hvVY= len data: 1 best k: 1
1706 => clustering: +9zM3Smnd+0= len data: 4 best k: 1
1708 => clustering: +9zkYu2Y5i8= len data: 2 best k: 1
1709 => clustering: +A+1O8b2RcM= len data: 1 best k: 1
1712 => clustering: +A0zWFEVbSA= len data: 3 best k: 1
1717 => clustering: +A1r7EOnPiM= len data: 5 best k: 2
1720 => clustering: +A3Lp8W8aFY= len data: 3 best k: 1
1722 => clustering: +A5q+QqzcCY= len data: 2 best k: 1
1723 => clustering: +A6M5V8hATw= len data: 1 best k: 1
1725 => clustering: +A8jnXMhYfc= len data: 2 best k: 1
1729 => clustering: +AAODo1Q4FE= len data: 4 best k: 1
1733 => clustering: +ACXKO05Ewc= len data: 4 best k: 1
1734 => 

2058 => clustering: +CcPqCa98ZM= len data: 17 best k: 3
2059 => clustering: +CgAogwZRp0= len data: 1 best k: 1
2066 => clustering: +CgDHLutDN0= len data: 7 best k: 4
2067 => clustering: +CisVHRc4nw= len data: 1 best k: 1
2068 => clustering: +CiygTEWlcA= len data: 1 best k: 1
2069 => clustering: +CjpNNY/Eq4= len data: 1 best k: 1
2070 => clustering: +Ckjq+21E+w= len data: 1 best k: 1
2073 => clustering: +ClBNPnKNXo= len data: 3 best k: 1
2075 => clustering: +Clhe3KMLSI= len data: 2 best k: 1
2076 => clustering: +CmKiHnYQ9c= len data: 1 best k: 1
2079 => clustering: +CpG/dTVhGg= len data: 3 best k: 1
2080 => clustering: +CsQuMBm/eE= len data: 1 best k: 1
2081 => clustering: +Ct75FF458c= len data: 1 best k: 1
2082 => clustering: +Ctj2IjOj54= len data: 1 best k: 1
2085 => clustering: +CuV57XhoQg= len data: 3 best k: 1
2088 => clustering: +CuptMCjMr0= len data: 3 best k: 1
2089 => clustering: +Cx1GVwUjOY= len data: 1 best k: 1
2090 => clustering: +Cxq3B/SZa8= len data: 1 best k: 1
2095 => c

2403 => clustering: +FhM3y9gtuI= len data: 15 best k: 6
2407 => clustering: +FiUYeMLDXE= len data: 4 best k: 1
2408 => clustering: +FkM1zPbANU= len data: 1 best k: 1
2410 => clustering: +Fkb/xpJkGo= len data: 2 best k: 1
2411 => clustering: +FlufsoRBAk= len data: 1 best k: 1
2415 => clustering: +FmNBEi89DA= len data: 4 best k: 1
2424 => clustering: +FmQ1iFJS4c= len data: 9 best k: 5
2425 => clustering: +FnDkRBDLcc= len data: 1 best k: 1
2430 => clustering: +FnSzZ1j4yc= len data: 5 best k: 3
2431 => clustering: +FnhNrV0W6M= len data: 1 best k: 1
2433 => clustering: +FoTxhMN9lI= len data: 2 best k: 1
2435 => clustering: +Fp2JZnjJbU= len data: 2 best k: 1
2436 => clustering: +FpE9ECCyZU= len data: 1 best k: 1
2437 => clustering: +FpGZBNJhU0= len data: 1 best k: 1
2445 => clustering: +Fq1iwypFKU= len data: 8 best k: 4
2446 => clustering: +FqmpleKqdk= len data: 1 best k: 1
2448 => clustering: +FqvWztoVJk= len data: 2 best k: 1
2450 => clustering: +FrG/AQbW+0= len data: 2 best k: 1
2457 => c

2785 => clustering: +HtOTwSnJNQ= len data: 8 best k: 5
2787 => clustering: +HtTI7k+CWo= len data: 2 best k: 1
2790 => clustering: +HtVnazVNUY= len data: 3 best k: 1
2793 => clustering: +Hvi3ZDOHoY= len data: 3 best k: 1
2796 => clustering: +I+qe3HvamE= len data: 3 best k: 1
2798 => clustering: +I/8RkSt/u8= len data: 2 best k: 1
2799 => clustering: +I0oNeWDbKE= len data: 1 best k: 1
2807 => clustering: +I12zQBgwq0= len data: 8 best k: 6
2812 => clustering: +I1Uzogvjz8= len data: 5 best k: 2
2813 => clustering: +I1ZNDOrKOY= len data: 1 best k: 1
2815 => clustering: +I1iM/xLKDc= len data: 2 best k: 1
2816 => clustering: +I3Y4wSaGSw= len data: 1 best k: 1
2817 => clustering: +I5MrO5hdyQ= len data: 1 best k: 1
2818 => clustering: +I6N0Rsa24o= len data: 1 best k: 1
2819 => clustering: +I7OYHvUF+Q= len data: 1 best k: 1
2821 => clustering: +I87cPNCt30= len data: 2 best k: 1
2826 => clustering: +I95EnwFM6Y= len data: 5 best k: 2
2828 => clustering: +I98Rq08seY= len data: 2 best k: 1
2829 => cl

3217 => clustering: +KIz3Xa5Ync= len data: 3 best k: 1
3218 => clustering: +KJUvTsj4xs= len data: 1 best k: 1
3219 => clustering: +KLVJcpe344= len data: 1 best k: 1
3220 => clustering: +KMvnfDQEhA= len data: 1 best k: 1
3222 => clustering: +KNVhHnMcHk= len data: 2 best k: 1
3227 => clustering: +KOUvHwVuUA= len data: 5 best k: 3
3228 => clustering: +KOj6dsP34I= len data: 1 best k: 1
3235 => clustering: +KPH1oG5Z4Q= len data: 7 best k: 4
3236 => clustering: +KPh7Kkday4= len data: 1 best k: 1
3239 => clustering: +KQZr0bCUS4= len data: 3 best k: 1
3240 => clustering: +KQj/dJNEXo= len data: 1 best k: 1
3243 => clustering: +KQuMjlWmIA= len data: 3 best k: 1
3244 => clustering: +KSM2YA/S9E= len data: 1 best k: 1
3246 => clustering: +KSdLjOx7yA= len data: 2 best k: 1
3250 => clustering: +KSdytptT4U= len data: 4 best k: 1
3251 => clustering: +KUIhTXwxVo= len data: 1 best k: 1
3256 => clustering: +KWgg9Yz9gE= len data: 5 best k: 3
3258 => clustering: +KX42Kli/xg= len data: 2 best k: 1
3260 => cl

3544 => clustering: +MZ9lF3OcNM= len data: 9 best k: 7
3545 => clustering: +MZeAmJxT+k= len data: 1 best k: 1
3546 => clustering: +Ma1GjUGhrc= len data: 1 best k: 1
3550 => clustering: +MaSwxRYkx4= len data: 4 best k: 1
3552 => clustering: +MazcwszM+Q= len data: 2 best k: 1
3553 => clustering: +MbTAY/7m/s= len data: 1 best k: 1
3556 => clustering: +McSzTaoGwg= len data: 3 best k: 1
3557 => clustering: +McsWwwSd8w= len data: 1 best k: 1
3558 => clustering: +MdW231UqTM= len data: 1 best k: 1
3559 => clustering: +Mf4CGCVQQI= len data: 1 best k: 1
3561 => clustering: +Mhl1et1drw= len data: 2 best k: 1
3562 => clustering: +MiRNhZ/u2s= len data: 1 best k: 1
3574 => clustering: +MlZHTkNcOs= len data: 12 best k: 4
3576 => clustering: +MmYYtErHbg= len data: 2 best k: 1
3579 => clustering: +MoOF8ovTw0= len data: 3 best k: 1
3580 => clustering: +MqECedQNqg= len data: 1 best k: 1
3583 => clustering: +MqklrHIHUI= len data: 3 best k: 1
3587 => clustering: +MtDiz/8R1M= len data: 4 best k: 1
3589 => c

3810 => clustering: +PahmanZOSs= len data: 2 best k: 1
3814 => clustering: +PcEhe2/a3M= len data: 4 best k: 1
3815 => clustering: +Pcxr099JLU= len data: 1 best k: 1
3824 => clustering: +PeBQTNysYs= len data: 9 best k: 3
3827 => clustering: +PebnFNIkD0= len data: 3 best k: 1
3828 => clustering: +Pes4Wnvckw= len data: 1 best k: 1
3829 => clustering: +PfJct/p6IU= len data: 1 best k: 1
3830 => clustering: +PfbgraQe6c= len data: 1 best k: 1
3831 => clustering: +PgNPqizsb0= len data: 1 best k: 1
3836 => clustering: +Pgb9Ocrqbc= len data: 5 best k: 4
3837 => clustering: +PhpWW5zkAA= len data: 1 best k: 1
3839 => clustering: +PiPAMUNUEQ= len data: 2 best k: 1
3840 => clustering: +PiZpDjNfew= len data: 1 best k: 1
3843 => clustering: +PjIY6QDYqI= len data: 3 best k: 1
3844 => clustering: +Pm3hLoThqA= len data: 1 best k: 1
3845 => clustering: +PmBvxPkzqU= len data: 1 best k: 1
3846 => clustering: +PmdIwIBzIM= len data: 1 best k: 1
3848 => clustering: +Pn8kRaJqWk= len data: 2 best k: 1
3849 => cl

4186 => clustering: +S7/cYY2jyQ= len data: 2 best k: 1
4189 => clustering: +S7xfKDzzoo= len data: 3 best k: 1
4191 => clustering: +S8W1vp3eDg= len data: 2 best k: 1
4193 => clustering: +S8n8bx07N8= len data: 2 best k: 1
4198 => clustering: +S9FyFKmWNw= len data: 5 best k: 4
4200 => clustering: +SAMzPqqDjM= len data: 2 best k: 1
4217 => clustering: +SDnZr3h5Vc= len data: 17 best k: 3
4218 => clustering: +SEzxbZLX7A= len data: 1 best k: 1
4219 => clustering: +SF1n/sDebA= len data: 1 best k: 1
4222 => clustering: +SFlCR2M+M4= len data: 3 best k: 1
4223 => clustering: +SFzZlI5WLI= len data: 1 best k: 1
4225 => clustering: +SGX3mGLzVc= len data: 2 best k: 1
4226 => clustering: +SHAxFpR5yA= len data: 1 best k: 1
4229 => clustering: +SHcDPjp0hQ= len data: 3 best k: 1
4230 => clustering: +SHwNSp8OKg= len data: 1 best k: 1
4231 => clustering: +SJBIY7hcmA= len data: 1 best k: 1
4234 => clustering: +SKjOyhiiEc= len data: 3 best k: 1
4238 => clustering: +SLH9mKRkYU= len data: 4 best k: 1
4239 => c