# Hello, Ignite ML!

This document contains example of Ignite ML Python API.

In [1]:
from sklearn.datasets import make_regression
from sklearn.datasets import make_classification
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## 1. Regression

Lets prepare a regression dataset using scikit-learn.

In [2]:
x, y = make_regression(n_targets=1)
x_train, x_test, y_train, y_test = train_test_split(x, y)

In [3]:
from ignite_ml.cache import Ignition

ignite = Ignition.ignite()
train_cache = ignite.createCache("train")

xy_train = np.column_stack((x_train, y_train))

for i in range(xy_train.shape[0]):
    train_cache.put(i, xy_train[i])

Py4JJavaError: An error occurred while calling o0.createCache.
: org.apache.ignite.cache.CacheExistsException: Failed to start cache (a cache with the same name is already started): train
	at org.apache.ignite.internal.processors.cache.GridCacheProcessor.prepareCacheChangeRequest(GridCacheProcessor.java:5448)
	at org.apache.ignite.internal.processors.cache.GridCacheProcessor.lambda$dynamicStartCache$15(GridCacheProcessor.java:3792)
	at org.apache.ignite.internal.processors.cache.GridCacheProcessor.dynamicStartCache(GridCacheProcessor.java:3822)
	at org.apache.ignite.internal.processors.cache.GridCacheProcessor.dynamicStartCache(GridCacheProcessor.java:3731)
	at org.apache.ignite.internal.IgniteKernal.createCache(IgniteKernal.java:2982)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)


### 1.1. Linear Regression

Now lets train a linear model using Ignite ML and estimate the quality of the model:

In [None]:
from ignite_ml.regression import LinearRegressionTrainer
trainer = LinearRegressionTrainer()
model = trainer.fit(x_train, y_train)

r2_score(y_test, model.predict(x_test))

And just to compare lets do the same for scikit-learn:

In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(x_train, y_train)

r2_score(y_test, reg.predict(x_test))

### 1.2. Decision Tree Regression

Using Ignite ML:

In [None]:
from ignite_ml.regression import DecisionTreeRegressionTrainer
trainer = DecisionTreeRegressionTrainer()
model = trainer.fit(x_train, y_train)

r2_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.tree import DecisionTreeRegressor
reg = DecisionTreeRegressor()
reg.fit(x_train, y_train)

r2_score(y_test, reg.predict(x_test))

### 1.3. KNN Regression

Using Ignite ML:

In [None]:
from ignite_ml.regression import KNNRegressionTrainer
trainer = KNNRegressionTrainer()
model = trainer.fit(x_train, y_train)

r2_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.neighbors import KNeighborsRegressor
reg = KNeighborsRegressor()
reg.fit(x_train, y_train)

r2_score(y_test, reg.predict(x_test))

### 1.4 Random Forest Regression

Using Ignite ML:

In [None]:
from ignite_ml.regression import RandomForestRegressionTrainer
trainer = RandomForestRegressionTrainer()
model = trainer.fit(x_train, y_train)

r2_score(y_test, model.predict(x_test))

In [None]:
from sklearn.ensemble import RandomForestRegressor
reg = RandomForestRegressor()
reg.fit(x_train, y_train)

r2_score(y_test, reg.predict(x_test))

### 1.5 MLP Regression

Using Ignite ML:

In [None]:
from ignite_ml.common import MLPArchitecture
from ignite_ml.regression import MLPRegressionTrainer

trainer = MLPRegressionTrainer(MLPArchitecture(100).with_layer(2, activator='linear'))
model = trainer.fit(x_train, y_train)

r2_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.neural_network import MLPRegressor
reg = MLPRegressor(max_iter=1000)
reg.fit(x_train, y_train)

r2_score(y_test, reg.predict(x_test))

## 2. Classification

Lets prepare a classification dataset using scikit-learn.

In [None]:
x, y = make_classification()
x_train, x_test, y_train, y_test = train_test_split(x, y)

### 2.1 Decision Tree Classification

Using Ignite ML:

In [None]:
from ignite_ml.classification import DecisionTreeClassificationTrainer
trainer = DecisionTreeClassificationTrainer()
model = trainer.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
clf.fit(x_train, y_train)

accuracy_score(y_test, clf.predict(x_test))

### 2.2 ANN Classification

Using Ignite ML:

In [None]:
from ignite_ml.classification import ANNClassificationTrainer
trainer = ANNClassificationTrainer()
model = trainer.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

### 2.3 KNN Classification

Using Ignite ML:

In [None]:
from ignite_ml.classification import KNNClassificationTrainer
trainer = KNNClassificationTrainer()
model = trainer.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier()
clf.fit(x_train, y_train)

accuracy_score(y_test, clf.predict(x_test))

### 2.4 LogReg Classification

Using Ignite ML:

In [None]:
from ignite_ml.classification import LogRegClassificationTrainer
trainer = LogRegClassificationTrainer()
model = trainer.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

### 2.5 SVM Classification

Using Ignite ML:

In [None]:
from ignite_ml.classification import SVMClassificationTrainer
trainer = SVMClassificationTrainer()
model = trainer.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.svm import LinearSVC
clf = LinearSVC()
clf.fit(x_train, y_train)

accuracy_score(y_test, clf.predict(x_test))

### 2.6 Random Forest Classification

Using Ignite ML:

In [None]:
from ignite_ml.classification import RandomForestClassificationTrainer
trainer = RandomForestClassificationTrainer()
model = trainer.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

And using scikit-learn:

In [None]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(x_train, y_train)

accuracy_score(y_test, model.predict(x_test))

### 2.7 MLP Classification

Using Ignite ML:

In [None]:
from ignite_ml.common import MLPArchitecture
from ignite_ml.classification import MLPClassificationTrainer

def encode_label(x):
    if x:
        return [0, 1]
    else:
        return [1, 0]

def decode_label(x):
    if x[0] > x[1]:
        return 0
    else:
        return 1
    
trainer = MLPRegressionTrainer(MLPArchitecture(20).with_layer(2, activator='sigmoid'))
model = trainer.fit(x_train, [encode_label(x) for x in y_train])

accuracy_score(y_test, [decode_label(x) for x in model.predict(x_test)])

## 3. Clustering

Lets prepare a clustering dataset using scikit-learn.

In [None]:
x, y = make_blobs(n_samples=2000, n_features=2, cluster_std=1.0, centers=[(-3, -3), (0, 0), (3, 3)])

In [None]:
for i in range(3):
    plt.scatter(x[y == i][:, 0], x[y == i][:, 1], s=1)

### 3.1 KMeans Clustering

Using Ignite ML:

In [None]:
from ignite_ml.clustering import KMeansClusteringTrainer
trainer = KMeansClusteringTrainer(amount_of_clusters=2)
model = trainer.fit(x)

y_predicted = np.array(model.predict(x))
for i in range(3):
    plt.scatter(x[y_predicted == i][:, 0], x[y_predicted == i][:, 1], s=1)

And using scikit-learn:

In [None]:
from sklearn.cluster import KMeans
clr = KMeans(n_clusters=2)
clr.fit(x)

y_predicted = np.array(clr.predict(x))
for i in range(2):
    plt.scatter(x[y_predicted == i][:, 0], x[y_predicted == i][:, 1], s=1)

### 3.2 GMM Clustering

Using Ignite ML:

In [None]:
from ignite_ml.clustering import GMMClusteringTrainer
trainer = GMMClusteringTrainer(count_of_components=2, max_count_of_clusters=2)
model = trainer.fit(x)

y_predicted = np.array(model.predict(x))
for i in range(2):
    plt.scatter(x[y_predicted == i][:, 0], x[y_predicted == i][:, 1], s=1)