Skip to content

Commit

Permalink
kmeans update
Browse files Browse the repository at this point in the history
  • Loading branch information
hmason committed Feb 17, 2012
1 parent b2fa4f2 commit b05c29f
Showing 1 changed file with 11 additions and 21 deletions.
32 changes: 11 additions & 21 deletions solving_problems/kmeans_descriptions.py
Expand Up @@ -32,7 +32,7 @@
sys.exit(1) sys.exit(1)




input_data = csv.reader(open('descriptions.csv','rb')) input_data = csv.reader(open('descriptions_100.csv','rb'))
dataset_data = [] dataset_data = []
dataset_target = [] dataset_target = []
for row in input_data: for row in input_data:
Expand All @@ -50,31 +50,21 @@


print "done in %fs" % (time() - t0) print "done in %fs" % (time() - t0)
print "n_samples: %d, n_features: %d" % X.shape print "n_samples: %d, n_features: %d" % X.shape
print




############################################################################### ###############################################################################
# Do the actual clustering # Do the actual clustering


if opts.minibatch: km = MiniBatchKMeans(k=true_k, init='k-means++', n_init=1,init_size=1000,batch_size=1000, verbose=1)
km = MiniBatchKMeans(k=true_k, init='k-means++', n_init=1,
init_size=1000,
batch_size=1000, verbose=1)
else:
km = KMeans(k=true_k, init='random', max_iter=100, n_init=1, verbose=1)


print "Clustering sparse data with %s" % km print "Clustering with %s" % km
t0 = time() t0 = time()
km.fit(X) km.fit(X)
print "done in %0.3fs" % (time() - t0) print "done in %0.3fs\n" % (time() - t0)
print print km.labels_


print "Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_) # print "Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_)
print "Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_) # print "Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_)
print "V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_) # print "V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_)
print "Adjusted Rand-Index: %.3f" % \
metrics.adjusted_rand_score(labels, km.labels_)
print "Silhouette Coefficient: %0.3f" % metrics.silhouette_score(
X, labels, sample_size=1000)

print

0 comments on commit b05c29f

Please sign in to comment.