# K-Means implementation in python 1
Author: Patuan P. Tampubolon

You are given an array `points` of size 300x2, where each row gives the (x, y) co-ordinates of a point on a map.  Make a scatter plot of these points, and use the scatter plot to guess how many clusters there are.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans as km
from sklearn.metrics import silhouette_samples, silhouette_score

## Load the dataset locally

In [None]:
df = pd.read_csv('data1.csv')
points = df.values

## Load the dataset for Google Colab

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import io
df = pd.read_csv(io.BytesIO(uploaded['data1.csv']))
points = df.values

## Look to few rows of the df

In [None]:
df.head()

Create an array called `xs` that contains the values of `points[:,0]` - that is, column `0` of `points`:

In [None]:
xs = points[:,0]

Create an array called `ys` that contains the values of `points[:,1]` - that is, column `1` of `points`

In [None]:
ys = points[:,1]

## Make a scatter plot

In [None]:
plt.scatter(xs, ys)
plt.show()

# Find 3 clusters

In [None]:
model = km(n_clusters=3)

In [None]:
model.fit(points)

In [None]:
cluster_centers_indices = model.cluster_centers_
cluster_centers_indices

In [None]:
n_clusters_ = len(cluster_centers_indices)
n_clusters_

In [None]:
labels = model.predict(points)

In [None]:
centroids_x = cluster_centers_indices[:,0]
centroids_y = cluster_centers_indices[:,1]

In [None]:
plt.scatter(xs, ys, c=labels)
plt.scatter(centroids_x, centroids_y, marker='X', s=200)
plt.show()

## Conforming parameter K using Elbow method

In [None]:
ks = range(2, 6)
inertias = []

for k in ks:
    # Create a KMeans instance with k clusters: model
    model = km(n_clusters=k)

    # Fit model to samples
    model.fit(points)

    # Append the inertia to the list of inertias
    inertias.append(model.inertia_)

In [None]:
# Plot ks vs inertias
plt.plot(ks, inertias, '-o')
plt.xlabel('number of clusters, k')
plt.ylabel('inertia')
plt.xticks(ks)
plt.show()

## Conforming parameter K using Elbow method

In [None]:
range_n_clusters = range(2, 6)

In [None]:
for n_clusters in range_n_clusters:
    model = km(n_clusters=n_clusters)
    labels = model.fit_predict(df)
    silhouette_avg = silhouette_score(df, labels)
    print("For n_clusters =", n_clusters, "The average silhouette_score is :", silhouette_avg)