In [None]:
import csv
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

# Reading the calculated csv
df = pd.read_csv('processed_vals.csv', names=['image', 'a_width', 'a_height', 'width', 'height', 'normed_w', 'normed_h', 'area'], header=None)
df = df.drop(labels=['image', 'a_width', 'a_height', 'width', 'height'], axis=1)

# Converting to numpy
x = df.iloc[:]
x  = np.array(x)

# Defining the model
kbox_model = KMeans(n_clusters=3, random_state=24)

# Fit the data
kbox_predict = kbox_model.fit_predict(x)

# Add a cluster coloum
df['cluster'] = kbox_predict

# Getting the labels
label_df = df.groupby(['cluster'])['area'].min()
label_df = label_df.to_frame()
label_df = label_df.sort_values(by=['area'])
labels = ['Small', 'Medium', 'Large']
label_df['label'] = labels
label_df = label_df.reset_index()
zero_lab = label_df[label_df['cluster']==0]['label'].values[0]
one_lab = label_df[label_df['cluster']==1]['label'].values[0]
two_lab = label_df[label_df['cluster']==2]['label'].values[0]

# Visualising the clusters
plt.scatter(x[kbox_predict == 0, 0], x[kbox_predict == 0, 1], c = 'red', label = zero_lab)
plt.scatter(x[kbox_predict == 1, 0], x[kbox_predict == 1, 1], c = 'blue', label = one_lab)
plt.scatter(x[kbox_predict == 2, 0], x[kbox_predict == 2, 1], c = 'green', label = two_lab)

# Plotting the centroids of the clusters
plt.scatter(kbox_model.cluster_centers_[:, 0], kbox_model.cluster_centers_[:,1], s=100, c = 'yellow', label = 'Centroids')
plt.legend()
plt.savefig('clusters.png')