In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
from sklearn.cluster import MeanShift
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [2]:
# load the dataset into a pandas DataFrame
df = pd.read_csv('wine.data', delimiter=',')

# print the first five entries of the DataFrame
print(df.head())

   1  14.23  1.71  2.43  15.6  127   2.8  3.06   .28  2.29  5.64  1.04  3.92  \
0  1  13.20  1.78  2.14  11.2  100  2.65  2.76  0.26  1.28  4.38  1.05  3.40   
1  1  13.16  2.36  2.67  18.6  101  2.80  3.24  0.30  2.81  5.68  1.03  3.17   
2  1  14.37  1.95  2.50  16.8  113  3.85  3.49  0.24  2.18  7.80  0.86  3.45   
3  1  13.24  2.59  2.87  21.0  118  2.80  2.69  0.39  1.82  4.32  1.04  2.93   
4  1  14.20  1.76  2.45  15.2  112  3.27  3.39  0.34  1.97  6.75  1.05  2.85   

   1065  
0  1050  
1  1185  
2  1480  
3   735  
4  1450  


In [3]:
# initialize the MeanShift clustering algorithm
ms = MeanShift()

# fit the MeanShift model to the DataFrame
ms.fit(df)

In [4]:
# get the cluster centers from the MeanShift clustering model
centroids = ms.cluster_centers_

# print the coordinates of the cluster centers
print(centroids)

# get the cluster labels from the MeanShift clustering model
labels = ms.labels_

# print the cluster labels for each data point
print(labels)

[[2.40816327e+00 1.26765306e+01 2.56051020e+00 2.34071429e+00
  2.06602041e+01 9.50000000e+01 2.01673469e+00 1.53959184e+00
  4.02857143e-01 1.40938776e+00 5.00591836e+00 9.01734694e-01
  2.30122449e+00 5.62153061e+02]
 [1.53333333e+00 1.34051111e+01 2.32866667e+00 2.40444444e+00
  1.84844444e+01 1.08555556e+02 2.48600000e+00 2.28955556e+00
  3.19777778e-01 1.69977778e+00 5.19977778e+00 9.63466667e-01
  2.88466667e+00 9.15177778e+02]
 [1.02564103e+00 1.37433333e+01 1.89538462e+00 2.42179487e+00
  1.71743590e+01 1.04384615e+02 2.80589744e+00 2.94512821e+00
  2.84871795e-01 1.85948718e+00 5.45333333e+00 1.07256410e+00
  3.11128205e+00 1.14230769e+03]]
[2 2 2 0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 1 1 1 2 2 1 2 2 2 1 2 2 1 1 2
 1 1 1 2 2 0 1 2 2 1 2 2 2 2 2 2 2 2 1 2 2 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 1
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0

In [5]:
from collections import Counter

# print the number of clusters from the MeanShift algorithm
num_clusters = len(set(labels))
print('Number of clusters by the MeanShift algorithm:', num_clusters)

# print the number of wine records in each cluster
records = Counter(labels)
for cluster in range(num_clusters):
    print(f'Cluster {cluster} contains {records[cluster]} wine records')

Number of clusters by the MeanShift algorithm: 3
Cluster 0 contains 106 wine records
Cluster 1 contains 31 wine records
Cluster 2 contains 40 wine records
