In [1]:
# import libs
from pyspark.ml.clustering import KMeans
from pyspark.ml.evaluation import ClusteringEvaluator
from pyspark.ml.feature import StringIndexer
from pyspark.ml.feature import OneHotEncoderEstimator
from pyspark.sql.functions import col
from pyspark.ml import Pipeline
from pyspark.ml.feature import VectorAssembler
import datetime
from pyspark.ml.feature import PCA
from pyspark.sql import Row
import csv

In [2]:
spark

In [3]:
# load dataset
df = spark.read.csv(
    "data/cus-prod-type-totalqty.csv", header=True, mode="DROPMALFORMED"
)
df.show(1)

+---------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|customer_number| AA| AB| AC| AD| AE| AF| AG| AH| AI| AJ| AK| AL| AM| AN| AO| AP| AQ| AR| AS| AT| AU| AV| AW| AX| AY| AZ| BA| BB| BC| BD| BE| BF| BG| BH| BI| BJ| BK| BL| BM| BN| BO| BP| BQ| BR| BS| BT| BU| BV| BW| BX| BY| BZ| CA| CC| CD| CE| CF| CG| CH| CI| CJ| CK| CL| CM| CN|
+---------------+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
|          10000|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0|  0| 

In [4]:
# check column types
df.dtypes

[('customer_number', 'string'),
 ('AA', 'string'),
 ('AB', 'string'),
 ('AC', 'string'),
 ('AD', 'string'),
 ('AE', 'string'),
 ('AF', 'string'),
 ('AG', 'string'),
 ('AH', 'string'),
 ('AI', 'string'),
 ('AJ', 'string'),
 ('AK', 'string'),
 ('AL', 'string'),
 ('AM', 'string'),
 ('AN', 'string'),
 ('AO', 'string'),
 ('AP', 'string'),
 ('AQ', 'string'),
 ('AR', 'string'),
 ('AS', 'string'),
 ('AT', 'string'),
 ('AU', 'string'),
 ('AV', 'string'),
 ('AW', 'string'),
 ('AX', 'string'),
 ('AY', 'string'),
 ('AZ', 'string'),
 ('BA', 'string'),
 ('BB', 'string'),
 ('BC', 'string'),
 ('BD', 'string'),
 ('BE', 'string'),
 ('BF', 'string'),
 ('BG', 'string'),
 ('BH', 'string'),
 ('BI', 'string'),
 ('BJ', 'string'),
 ('BK', 'string'),
 ('BL', 'string'),
 ('BM', 'string'),
 ('BN', 'string'),
 ('BO', 'string'),
 ('BP', 'string'),
 ('BQ', 'string'),
 ('BR', 'string'),
 ('BS', 'string'),
 ('BT', 'string'),
 ('BU', 'string'),
 ('BV', 'string'),
 ('BW', 'string'),
 ('BX', 'string'),
 ('BY', 'string'),

In [5]:
# change product_code column types to int
cols = df.columns[1:]
df = df.select(df.columns[0],*(col(c).cast("int").alias(c) for c in cols))
df.dtypes

[('customer_number', 'string'),
 ('AA', 'int'),
 ('AB', 'int'),
 ('AC', 'int'),
 ('AD', 'int'),
 ('AE', 'int'),
 ('AF', 'int'),
 ('AG', 'int'),
 ('AH', 'int'),
 ('AI', 'int'),
 ('AJ', 'int'),
 ('AK', 'int'),
 ('AL', 'int'),
 ('AM', 'int'),
 ('AN', 'int'),
 ('AO', 'int'),
 ('AP', 'int'),
 ('AQ', 'int'),
 ('AR', 'int'),
 ('AS', 'int'),
 ('AT', 'int'),
 ('AU', 'int'),
 ('AV', 'int'),
 ('AW', 'int'),
 ('AX', 'int'),
 ('AY', 'int'),
 ('AZ', 'int'),
 ('BA', 'int'),
 ('BB', 'int'),
 ('BC', 'int'),
 ('BD', 'int'),
 ('BE', 'int'),
 ('BF', 'int'),
 ('BG', 'int'),
 ('BH', 'int'),
 ('BI', 'int'),
 ('BJ', 'int'),
 ('BK', 'int'),
 ('BL', 'int'),
 ('BM', 'int'),
 ('BN', 'int'),
 ('BO', 'int'),
 ('BP', 'int'),
 ('BQ', 'int'),
 ('BR', 'int'),
 ('BS', 'int'),
 ('BT', 'int'),
 ('BU', 'int'),
 ('BV', 'int'),
 ('BW', 'int'),
 ('BX', 'int'),
 ('BY', 'int'),
 ('BZ', 'int'),
 ('CA', 'int'),
 ('CC', 'int'),
 ('CD', 'int'),
 ('CE', 'int'),
 ('CF', 'int'),
 ('CG', 'int'),
 ('CH', 'int'),
 ('CI', 'int'),
 ('CJ', 

In [6]:
# assemble feature cols
vecAssembler = VectorAssembler(inputCols=df.columns[1:], outputCol="features")
stream_df = vecAssembler.transform(df)

In [7]:
stream_df = stream_df.drop(*df.columns[1:])
# stream_df = stream_df.drop(df.columns[1])
stream_df.columns

['customer_number', 'features']

In [8]:
# run k-means
result_stats = []

# calculate time useds
start_time = datetime.datetime.now() 

for k in range(2, 1000):
    kmeans = KMeans(k=k, distanceMeasure='cosine')
    train_model = kmeans.fit(stream_df)
    # Make predictions
    predictions = train_model.transform(stream_df)
    # Evaluate clustering by computing Silhouette score
    evaluator = ClusteringEvaluator(distanceMeasure='cosine')
    

    silhouette = evaluator.evaluate(predictions)
    cost = train_model.summary.trainingCost
    stat = { 'k':k, 'Silhouette distance': silhouette, 'Cost': cost }
    result_stats.append(stat)
    print("k = {} ---- Silhouette distance = {} ---- Cost = {} ---- {} passed".format(k, str(silhouette), str(cost), datetime.datetime.now() - start_time))

k = 2 ---- Silhouette distance = 0.02789118082007418 ---- Cost = 28322.777659462274 ---- 0:00:15.996564 passed
k = 3 ---- Silhouette distance = 0.037783498146624905 ---- Cost = 27796.63933678341 ---- 0:00:23.110178 passed
k = 4 ---- Silhouette distance = 0.05731489029150743 ---- Cost = 26638.613583257502 ---- 0:00:31.192768 passed
k = 5 ---- Silhouette distance = 0.06739987288772795 ---- Cost = 25520.335971188906 ---- 0:00:38.656034 passed
k = 6 ---- Silhouette distance = 0.08039724067800524 ---- Cost = 25015.630040724343 ---- 0:00:45.665769 passed
k = 7 ---- Silhouette distance = 0.09064387298675411 ---- Cost = 24548.07788000168 ---- 0:00:51.803867 passed
k = 8 ---- Silhouette distance = 0.10744935570374883 ---- Cost = 23747.97139659508 ---- 0:00:58.181288 passed
k = 9 ---- Silhouette distance = 0.11825390600278637 ---- Cost = 23293.276780590855 ---- 0:01:04.687893 passed
k = 10 ---- Silhouette distance = 0.12598113759003204 ---- Cost = 22705.245057657172 ---- 0:01:10.688596 passed
k 

k = 76 ---- Silhouette distance = 0.5332290046397249 ---- Cost = 4926.209052116594 ---- 0:08:50.031986 passed
k = 77 ---- Silhouette distance = 0.5448741363306433 ---- Cost = 4620.5521293744705 ---- 0:08:59.244538 passed
k = 78 ---- Silhouette distance = 0.5339221615250853 ---- Cost = 4746.789665062973 ---- 0:09:07.673437 passed
k = 79 ---- Silhouette distance = 0.5446561630462157 ---- Cost = 4474.313653311103 ---- 0:09:15.979683 passed
k = 80 ---- Silhouette distance = 0.5430876022070888 ---- Cost = 4430.356782296247 ---- 0:09:23.714956 passed
k = 81 ---- Silhouette distance = 0.5536241701738481 ---- Cost = 4135.674098190186 ---- 0:09:32.224959 passed
k = 82 ---- Silhouette distance = 0.5601703220900647 ---- Cost = 4074.0819578127052 ---- 0:09:40.117217 passed
k = 83 ---- Silhouette distance = 0.5598801549790913 ---- Cost = 3899.995362194225 ---- 0:09:49.820572 passed
k = 84 ---- Silhouette distance = 0.5339554841991784 ---- Cost = 4328.997257262931 ---- 0:09:59.790028 passed
k = 85 -

k = 150 ---- Silhouette distance = 0.5087205840601084 ---- Cost = 3693.256428693668 ---- 0:20:28.357716 passed
k = 151 ---- Silhouette distance = 0.5028696536423894 ---- Cost = 3696.0743038294067 ---- 0:20:40.783800 passed
k = 152 ---- Silhouette distance = 0.5149499062385023 ---- Cost = 3690.9208495442344 ---- 0:20:53.187870 passed
k = 153 ---- Silhouette distance = 0.51235300889385 ---- Cost = 3686.1324519546274 ---- 0:21:05.220366 passed
k = 154 ---- Silhouette distance = 0.5176373349152175 ---- Cost = 3673.2206602038223 ---- 0:21:15.659653 passed
k = 155 ---- Silhouette distance = 0.5092423822277953 ---- Cost = 3673.5604788459564 ---- 0:21:26.945843 passed
k = 156 ---- Silhouette distance = 0.5138389653954458 ---- Cost = 3666.033716591409 ---- 0:21:37.061922 passed
k = 157 ---- Silhouette distance = 0.5140782011114623 ---- Cost = 3666.76503438518 ---- 0:21:47.411210 passed
k = 158 ---- Silhouette distance = 0.5106355105480209 ---- Cost = 3663.787167672267 ---- 0:21:57.594041 passed

k = 224 ---- Silhouette distance = 0.5116254343858129 ---- Cost = 3487.584944089907 ---- 0:34:47.012228 passed
k = 225 ---- Silhouette distance = 0.5035940983550848 ---- Cost = 3494.515876746214 ---- 0:35:00.565549 passed
k = 226 ---- Silhouette distance = 0.5067243086519763 ---- Cost = 3483.8608924784844 ---- 0:35:13.223590 passed
k = 227 ---- Silhouette distance = 0.5031303663795779 ---- Cost = 3471.3350561625366 ---- 0:35:25.744247 passed
k = 228 ---- Silhouette distance = 0.5061328428289746 ---- Cost = 3474.3379522917726 ---- 0:35:38.418667 passed
k = 229 ---- Silhouette distance = 0.5085476125412033 ---- Cost = 3457.7884103463653 ---- 0:35:51.166763 passed
k = 230 ---- Silhouette distance = 0.5052197401054226 ---- Cost = 3460.5065729371518 ---- 0:36:04.122284 passed
k = 231 ---- Silhouette distance = 0.5047974409174489 ---- Cost = 3457.3162713514525 ---- 0:36:17.617150 passed
k = 232 ---- Silhouette distance = 0.5048128666509814 ---- Cost = 3454.1515972802213 ---- 0:36:30.565965 p

k = 298 ---- Silhouette distance = 0.5039471739548703 ---- Cost = 3304.8302660207646 ---- 0:52:15.656467 passed
k = 299 ---- Silhouette distance = 0.5021478053642309 ---- Cost = 3305.151338265822 ---- 0:52:31.364798 passed
k = 300 ---- Silhouette distance = 0.5058624343897545 ---- Cost = 3305.0809720416446 ---- 0:52:46.000820 passed
k = 301 ---- Silhouette distance = 0.5039990212121942 ---- Cost = 3305.767197345744 ---- 0:53:01.026860 passed
k = 302 ---- Silhouette distance = 0.5005856090736267 ---- Cost = 3296.468816391898 ---- 0:53:16.463759 passed
k = 303 ---- Silhouette distance = 0.5031411797360754 ---- Cost = 3284.211715022155 ---- 0:53:31.701451 passed
k = 304 ---- Silhouette distance = 0.5038279783832736 ---- Cost = 3294.211050291879 ---- 0:53:46.908738 passed
k = 305 ---- Silhouette distance = 0.5028018665573678 ---- Cost = 3299.3635641357655 ---- 0:54:02.475909 passed
k = 306 ---- Silhouette distance = 0.5036737369621187 ---- Cost = 3286.815140341342 ---- 0:54:18.954865 passe

k = 372 ---- Silhouette distance = 0.5113674938996456 ---- Cost = 3124.539347664573 ---- 1:12:51.813767 passed
k = 373 ---- Silhouette distance = 0.5104997241350685 ---- Cost = 3127.0302150916605 ---- 1:13:09.725544 passed
k = 374 ---- Silhouette distance = 0.5109099121259004 ---- Cost = 3120.0543998092853 ---- 1:13:27.590281 passed
k = 375 ---- Silhouette distance = 0.510173540685345 ---- Cost = 3117.990571444806 ---- 1:13:45.851938 passed
k = 376 ---- Silhouette distance = 0.5123916602174818 ---- Cost = 3125.6655283459713 ---- 1:14:04.576602 passed
k = 377 ---- Silhouette distance = 0.5075749613904387 ---- Cost = 3130.8851528733185 ---- 1:14:24.161973 passed
k = 378 ---- Silhouette distance = 0.5081666707096739 ---- Cost = 3131.402335986361 ---- 1:14:41.668391 passed
k = 379 ---- Silhouette distance = 0.5102835220034745 ---- Cost = 3119.132695075293 ---- 1:14:59.496661 passed
k = 380 ---- Silhouette distance = 0.5105448093010874 ---- Cost = 3115.824420150512 ---- 1:15:18.181950 passe

k = 446 ---- Silhouette distance = 0.510471588262246 ---- Cost = 2974.7243493426895 ---- 1:37:07.396552 passed
k = 447 ---- Silhouette distance = 0.5135049881084285 ---- Cost = 2984.9772014761315 ---- 1:37:29.900402 passed
k = 448 ---- Silhouette distance = 0.5118847265597567 ---- Cost = 2994.570244720778 ---- 1:37:49.194017 passed
k = 449 ---- Silhouette distance = 0.5122858489149256 ---- Cost = 2980.4199811210096 ---- 1:38:10.145162 passed
k = 450 ---- Silhouette distance = 0.5143408276264101 ---- Cost = 2962.275070052419 ---- 1:38:31.908227 passed
k = 451 ---- Silhouette distance = 0.5169714979181573 ---- Cost = 2972.9216578462324 ---- 1:38:53.201547 passed
k = 452 ---- Silhouette distance = 0.5171321038035767 ---- Cost = 2971.4047936622674 ---- 1:39:14.777378 passed
k = 453 ---- Silhouette distance = 0.5187890381227112 ---- Cost = 2957.411655650411 ---- 1:39:35.720320 passed
k = 454 ---- Silhouette distance = 0.5147609827180009 ---- Cost = 2959.8232615651395 ---- 1:39:56.588522 pas

k = 520 ---- Silhouette distance = 0.5184003755765952 ---- Cost = 2839.965991136929 ---- 2:04:48.639132 passed
k = 521 ---- Silhouette distance = 0.5206105800626689 ---- Cost = 2841.8177188910486 ---- 2:05:13.867519 passed
k = 522 ---- Silhouette distance = 0.517354224880943 ---- Cost = 2831.6357196013296 ---- 2:05:37.145995 passed
k = 523 ---- Silhouette distance = 0.5249510623079506 ---- Cost = 2822.7308263015793 ---- 2:06:00.174281 passed
k = 524 ---- Silhouette distance = 0.5186906101247591 ---- Cost = 2839.7000671661244 ---- 2:06:23.549136 passed
k = 525 ---- Silhouette distance = 0.5187044533935331 ---- Cost = 2838.1045412653248 ---- 2:06:46.715641 passed
k = 526 ---- Silhouette distance = 0.5147242104638453 ---- Cost = 2843.5587478352836 ---- 2:07:10.648336 passed
k = 527 ---- Silhouette distance = 0.5227604821812978 ---- Cost = 2811.987686623538 ---- 2:07:34.014570 passed
k = 528 ---- Silhouette distance = 0.5199117268730523 ---- Cost = 2819.7442413290346 ---- 2:07:56.856271 pa

k = 594 ---- Silhouette distance = 0.526697159116978 ---- Cost = 2702.2245808230873 ---- 2:35:46.116431 passed
k = 595 ---- Silhouette distance = 0.5298383554483672 ---- Cost = 2684.4163568318163 ---- 2:36:11.649893 passed
k = 596 ---- Silhouette distance = 0.527904916797796 ---- Cost = 2706.0698991619056 ---- 2:36:37.577610 passed
k = 597 ---- Silhouette distance = 0.5280018989184196 ---- Cost = 2704.4313720089676 ---- 2:37:02.724131 passed
k = 598 ---- Silhouette distance = 0.5220948012760506 ---- Cost = 2698.558282428021 ---- 2:37:29.000189 passed
k = 599 ---- Silhouette distance = 0.528558270029397 ---- Cost = 2680.853811535622 ---- 2:37:58.037297 passed
k = 600 ---- Silhouette distance = 0.524402513730465 ---- Cost = 2694.212904922423 ---- 2:38:23.895886 passed
k = 601 ---- Silhouette distance = 0.5201189381990327 ---- Cost = 2693.0145572057686 ---- 2:38:49.780155 passed
k = 602 ---- Silhouette distance = 0.5141148591485932 ---- Cost = 2706.6257375355885 ---- 2:39:16.082393 passed

k = 668 ---- Silhouette distance = 0.5217345445213134 ---- Cost = 2592.3175812047584 ---- 3:10:00.554672 passed
k = 669 ---- Silhouette distance = 0.5247811543574137 ---- Cost = 2587.387821018038 ---- 3:10:31.548199 passed
k = 670 ---- Silhouette distance = 0.5288122797854169 ---- Cost = 2574.4359803994494 ---- 3:10:59.949403 passed
k = 671 ---- Silhouette distance = 0.531223303266336 ---- Cost = 2564.013211502345 ---- 3:11:28.361282 passed
k = 672 ---- Silhouette distance = 0.5290548392212255 ---- Cost = 2565.08770898282 ---- 3:11:56.565687 passed
k = 673 ---- Silhouette distance = 0.5333491290449571 ---- Cost = 2542.0172679018906 ---- 3:12:25.862987 passed
k = 674 ---- Silhouette distance = 0.5342316129944664 ---- Cost = 2553.3378172909256 ---- 3:12:55.337533 passed
k = 675 ---- Silhouette distance = 0.5324721900531573 ---- Cost = 2552.6973778100864 ---- 3:13:25.522909 passed
k = 676 ---- Silhouette distance = 0.5373332453090213 ---- Cost = 2540.706230260078 ---- 3:13:53.894998 passe

k = 742 ---- Silhouette distance = 0.5182478208241867 ---- Cost = 2470.203642110454 ---- 3:47:43.371520 passed
k = 743 ---- Silhouette distance = 0.5236180994597928 ---- Cost = 2476.0720659136587 ---- 3:48:17.126231 passed
k = 744 ---- Silhouette distance = 0.5300074499743037 ---- Cost = 2474.818687975059 ---- 3:48:47.314482 passed
k = 745 ---- Silhouette distance = 0.5351601453780198 ---- Cost = 2435.6136395951494 ---- 3:49:20.081783 passed
k = 746 ---- Silhouette distance = 0.5274525729397398 ---- Cost = 2459.7595298482656 ---- 3:49:51.634930 passed
k = 747 ---- Silhouette distance = 0.5312311195207087 ---- Cost = 2443.8871139658822 ---- 3:50:25.102642 passed
k = 748 ---- Silhouette distance = 0.5268180876973666 ---- Cost = 2466.3790974246176 ---- 3:50:57.242258 passed
k = 749 ---- Silhouette distance = 0.5270032589381266 ---- Cost = 2463.476951893759 ---- 3:51:29.058892 passed
k = 750 ---- Silhouette distance = 0.5290624883672155 ---- Cost = 2441.939051581874 ---- 3:52:01.156958 pas

k = 816 ---- Silhouette distance = 0.5398163952052205 ---- Cost = 2343.0549007085074 ---- 4:29:16.506833 passed
k = 817 ---- Silhouette distance = 0.5437930104576649 ---- Cost = 2338.164145790071 ---- 4:29:49.587854 passed
k = 818 ---- Silhouette distance = 0.5349776639727097 ---- Cost = 2361.2506963278342 ---- 4:30:27.171529 passed
k = 819 ---- Silhouette distance = 0.5290537915771969 ---- Cost = 2357.4583261413936 ---- 4:31:04.154103 passed
k = 820 ---- Silhouette distance = 0.5320852623135441 ---- Cost = 2345.1014714798566 ---- 4:31:40.227995 passed
k = 821 ---- Silhouette distance = 0.5460018348306587 ---- Cost = 2330.552154633977 ---- 4:32:15.149870 passed
k = 822 ---- Silhouette distance = 0.5374186615422106 ---- Cost = 2351.246324550096 ---- 4:32:50.458516 passed
k = 823 ---- Silhouette distance = 0.5216922132343196 ---- Cost = 2361.9104646038945 ---- 4:33:26.889003 passed
k = 824 ---- Silhouette distance = 0.5456843718237473 ---- Cost = 2339.389553676948 ---- 4:34:02.184657 pas

k = 890 ---- Silhouette distance = 0.534893322099376 ---- Cost = 2251.0667422014176 ---- 5:17:11.296904 passed
k = 891 ---- Silhouette distance = 0.5412225202868944 ---- Cost = 2257.179986367415 ---- 5:17:53.707333 passed
k = 892 ---- Silhouette distance = 0.5447204877498959 ---- Cost = 2245.4187807238136 ---- 5:18:32.486122 passed
k = 893 ---- Silhouette distance = 0.5300960322520246 ---- Cost = 2268.542147474583 ---- 5:19:19.338117 passed
k = 894 ---- Silhouette distance = 0.5438563852760036 ---- Cost = 2255.2560470374956 ---- 5:20:00.660054 passed
k = 895 ---- Silhouette distance = 0.5483944408872168 ---- Cost = 2246.56521316013 ---- 5:20:35.008477 passed
k = 896 ---- Silhouette distance = 0.5426300494702062 ---- Cost = 2253.6711918435353 ---- 5:21:13.319732 passed
k = 897 ---- Silhouette distance = 0.5378749275716425 ---- Cost = 2232.576517579383 ---- 5:21:58.256689 passed
k = 898 ---- Silhouette distance = 0.5297847951283398 ---- Cost = 2260.811465332421 ---- 5:22:32.534196 passed

k = 964 ---- Silhouette distance = 0.5390553921776501 ---- Cost = 2171.9079407195118 ---- 6:01:57.681676 passed
k = 965 ---- Silhouette distance = 0.5439768191591191 ---- Cost = 2177.4452548126046 ---- 6:02:35.147986 passed
k = 966 ---- Silhouette distance = 0.5454353053070589 ---- Cost = 2154.602064105611 ---- 6:03:14.588008 passed
k = 967 ---- Silhouette distance = 0.5418028637687702 ---- Cost = 2154.492596094918 ---- 6:03:49.750098 passed
k = 968 ---- Silhouette distance = 0.5400101566441373 ---- Cost = 2163.1503292929756 ---- 6:04:26.174219 passed
k = 969 ---- Silhouette distance = 0.5339390509683465 ---- Cost = 2180.0688615016816 ---- 6:05:02.303830 passed
k = 970 ---- Silhouette distance = 0.5367142416024461 ---- Cost = 2165.366439419846 ---- 6:05:40.403551 passed
k = 971 ---- Silhouette distance = 0.5426364297077881 ---- Cost = 2180.0449765696494 ---- 6:06:17.149390 passed
k = 972 ---- Silhouette distance = 0.5432589277955446 ---- Cost = 2177.119741256814 ---- 6:06:54.607251 pas

In [9]:
result_stats

[{'k': 2,
  'Silhouette distance': 0.02789118082007418,
  'Cost': 28322.777659462274},
 {'k': 3,
  'Silhouette distance': 0.037783498146624905,
  'Cost': 27796.63933678341},
 {'k': 4,
  'Silhouette distance': 0.05731489029150743,
  'Cost': 26638.613583257502},
 {'k': 5,
  'Silhouette distance': 0.06739987288772795,
  'Cost': 25520.335971188906},
 {'k': 6,
  'Silhouette distance': 0.08039724067800524,
  'Cost': 25015.630040724343},
 {'k': 7,
  'Silhouette distance': 0.09064387298675411,
  'Cost': 24548.07788000168},
 {'k': 8,
  'Silhouette distance': 0.10744935570374883,
  'Cost': 23747.97139659508},
 {'k': 9,
  'Silhouette distance': 0.11825390600278637,
  'Cost': 23293.276780590855},
 {'k': 10,
  'Silhouette distance': 0.12598113759003204,
  'Cost': 22705.245057657172},
 {'k': 11,
  'Silhouette distance': 0.1546336086352984,
  'Cost': 21880.447310631887},
 {'k': 12,
  'Silhouette distance': 0.16121372170156575,
  'Cost': 21575.732379843987},
 {'k': 13,
  'Silhouette distance': 0.16502

In [10]:
keys = result_stats[0].keys()
with open('cus-prod-type-cosine-top1000.csv', 'w') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(result_stats)

In [11]:
# end time
end_time = datetime.datetime.now()
print('Job done at: {} ---------- {} passed'.format(end_time, end_time - start_time))

Job done at: 2019-05-30 18:37:56.617271 ---------- 6:23:44.517771 passed
