## k-means

In [16]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans

# Importing the dataset
data = pd.read_csv('./dataset/xclara.csv')
print("Input Data and Shape")
print(data.shape)

# Getting the values and plotting it
f1 = data['V1'].values
f2 = data['V2'].values
X = np.array(list(zip(f1, f2)))

kmeans = KMeans(n_clusters=3)
kmeans = kmeans.fit(X)
labels = kmeans.predict(X)

centroids = kmeans.cluster_centers_

print('centroids: {}'.format(centroids))
print('prediction on each data: {}'.format(labels))

labels = kmeans.predict(np.array([[12.0,14.0]]))
print('prediction on data point (12.0, 14.0): {}'.format(labels))


Input Data and Shape
(3000, 2)
centroids: [[ 69.92418447 -10.11964119]
 [ 40.68362784  59.71589274]
 [  9.4780459   10.686052  ]]
prediction on each data: [2 2 2 ..., 0 0 0]
prediction on data point (12.0, 14.0): [2]


# DBSCAN

In [3]:
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
import numpy as np

# Importing the dataset
df = pd.read_csv('./dataset/iris.csv', header=None)
df = df.drop([4], axis=1)
print("Input Data and Shape")
print(df.head())

X = np.array(df)

db = DBSCAN(eps=0.3, min_samples=5).fit(X)
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
print('number of clusters: {}'.format(n_clusters))
print('cluster on X {}'.format(labels))

Input Data and Shape
     0    1    2    3
0  5.1  3.5  1.4  0.2
1  4.9  3.0  1.4  0.2
2  4.7  3.2  1.3  0.2
3  4.6  3.1  1.5  0.2
4  5.0  3.6  1.4  0.2
number of clusters: 3
cluster on X [ 0  0  0  0  0 -1  0  0  0  0  0  0  0  0 -1 -1 -1  0 -1  0 -1  0 -1  0  0
  0  0  0  0  0  0 -1 -1 -1  0  0 -1  0  0  0  0 -1  0  0 -1  0  0  0  0  0
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  1 -1  1  2 -1 -1 -1 -1
 -1 -1 -1 -1 -1  1  1  1 -1 -1 -1 -1 -1  1  1 -1 -1  1 -1  1  1  1 -1 -1  1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1  2  2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1  2]


# EM

In [6]:
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn import mixture
import pandas as pd
import numpy as np

df = pd.read_csv('./dataset/iris.csv', header=None)
df = df.drop([4], axis=1)
print("Input Data and Shape")
print(df.head())

X = np.array(df)

gmm = mixture.GaussianMixture(n_components=3).fit(X)
X_pred = gmm.predict(X)

print(X_pred)

Input Data and Shape
     0    1    2    3
0  5.1  3.5  1.4  0.2
1  4.9  3.0  1.4  0.2
2  4.7  3.2  1.3  0.2
3  4.6  3.1  1.5  0.2
4  5.0  3.6  1.4  0.2
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 2 1 2
 2 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1]
