diff --git a/solving_problems/kmeans_descriptions.py b/solving_problems/kmeans_descriptions.py index 019097d..e2fa0bc 100644 --- a/solving_problems/kmeans_descriptions.py +++ b/solving_problems/kmeans_descriptions.py @@ -32,7 +32,7 @@ sys.exit(1) -input_data = csv.reader(open('descriptions.csv','rb')) +input_data = csv.reader(open('descriptions_100.csv','rb')) dataset_data = [] dataset_target = [] for row in input_data: @@ -50,31 +50,21 @@ print "done in %fs" % (time() - t0) print "n_samples: %d, n_features: %d" % X.shape -print ############################################################################### # Do the actual clustering -if opts.minibatch: - km = MiniBatchKMeans(k=true_k, init='k-means++', n_init=1, - init_size=1000, - batch_size=1000, verbose=1) -else: - km = KMeans(k=true_k, init='random', max_iter=100, n_init=1, verbose=1) +km = MiniBatchKMeans(k=true_k, init='k-means++', n_init=1,init_size=1000,batch_size=1000, verbose=1) -print "Clustering sparse data with %s" % km +print "Clustering with %s" % km t0 = time() km.fit(X) -print "done in %0.3fs" % (time() - t0) -print - -print "Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_) -print "Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_) -print "V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_) -print "Adjusted Rand-Index: %.3f" % \ - metrics.adjusted_rand_score(labels, km.labels_) -print "Silhouette Coefficient: %0.3f" % metrics.silhouette_score( - X, labels, sample_size=1000) - -print +print "done in %0.3fs\n" % (time() - t0) +print km.labels_ + +# print "Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_) +# print "Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_) +# print "V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_) + +