# Ignite ML Preprocessing with Ignite Cache!

This document contains example of Ignite ML Preprocessing Python API with Ignite Cache.

In [1]:
from ggml.core import Ignite

from sklearn.datasets import make_regression
from sklearn.datasets import make_classification
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Lets prepare a classification dataset using scikit-learn.

In [2]:
x, y = make_classification()
x_train, x_test, y_train, y_test = train_test_split(x, y)
xy_train = np.column_stack((x_train, y_train))

### 1.1 Normalization preprocessing

In [3]:
from ggml.preprocessing import NormalizationTrainer

with Ignite("/home/gridgain/ignite/examples/config/example-ignite.xml") as ignite:
    train_cache = ignite.create_cache("test-preprocessing")
    for i in range(xy_train.shape[0]):
        train_cache.put(i, xy_train[i])
        
    normalizer = NormalizationTrainer().fit_on_cache(train_cache)

# Just to test.
x_train_normalized = normalizer.transform(x_train)
x_test_normalized = normalizer.transform(x_test)

In [4]:
from ggml.classification import DecisionTreeClassificationTrainer
trainer = DecisionTreeClassificationTrainer()

with Ignite("/home/gridgain/ignite/examples/config/example-ignite.xml") as ignite:
    train_cache = ignite.create_cache("test-preprocessing")
    for i in range(xy_train.shape[0]):
        train_cache.put(i, xy_train[i])
        
    model_without_normalization = trainer.fit_on_cache(train_cache)
    model_with_normalization = trainer.fit_on_cache(train_cache.transform(normalizer))

print("Without normalization: %f" % accuracy_score(
    y_test, 
    model_without_normalization.predict(x_test)
))
print("With normalization: %f" % accuracy_score(
    y_test, 
    model_with_normalization.predict(x_test_normalized)
))

Without normalization: 0.760000
With normalization: 0.800000
