In [11]:
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import pandas as pd

from sklearn_model.export import Model

In [2]:
# Load the iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names
feature_names = iris.feature_names

In [3]:
# Since it is unsupervised learning Y is not needed
df = pd.DataFrame(X, columns = feature_names)
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [4]:
# Perform MinMax scaling
scaler = MinMaxScaler()
X = scaler.fit_transform(df)

In [14]:
# Create and fit the model
model = KMeans(n_clusters=3, random_state=1)
model.fit(X)

KMeans(n_clusters=3, random_state=1)

In [15]:
model.labels_

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2,
       2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2,
       2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 0], dtype=int32)

In [18]:
mdl = Model()
mdl.add_fields(df)

# Since it is unsupervised learning the cluster ids are further studied and assigned labels
# In this case the three labels are the species 
# 'versicolor' (cluster id 0), 'setosa' (cluster id 1) and 'virginica' (cluster id 2) 
# which were determined after further study.
# These labels are now assigned using add_output_field method
mdl.add_output_field("species", "category", values = ['versicolor', 'setosa', 'virginica' ])

# MinMax transformer is added
mdl.add_transformer(scaler, list(df.columns))

# Kmeans model is added
mdl.add_model(model)

In [19]:
# View the exported model
print(mdl.exportJSON())

{
    "input": {
        "sepal length (cm)": {
            "type": "float"
        },
        "sepal width (cm)": {
            "type": "float"
        },
        "petal length (cm)": {
            "type": "float"
        },
        "petal width (cm)": {
            "type": "float"
        }
    },
    "output": {
        "species": {
            "type": "category",
            "values": [
                "versicolor",
                "setosa",
                "virginica"
            ]
        }
    },
    "transformer": {
        "type": "MinMax",
        "scale_fields": {
            "sepal length (cm)": {
                "scale": 0.27777777777777773,
                "min": -1.1944444444444442
            },
            "sepal width (cm)": {
                "scale": 0.41666666666666663,
                "min": -0.8333333333333333
            },
            "petal length (cm)": {
                "scale": 0.1694915254237288,
                "min": -0.1694915254237288
            },
   

In [20]:
# Save the model in a file
mdl.exportJSON('kmeans.json')