# Ignite ML Preprocessing!

This document contains example of Ignite ML Preprocessing Python API.

In [1]:
from sklearn.datasets import make_regression
from sklearn.datasets import make_classification
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Lets prepare a classification dataset using scikit-learn.

In [2]:
x, y = make_classification()
x_train, x_test, y_train, y_test = train_test_split(x, y)

### 1.1 Normalization preprocessing

In [3]:
from ignite_ml.preprocessing import NormalizationTrainer
normalizer = NormalizationTrainer().fit_on_cache(x_train)

# Just to test.
x_test_normalized = normalizer.transform(x_test)

AttributeError: 'numpy.ndarray' object has no attribute 'proxy'

In [None]:
from ignite_ml.classification import DecisionTreeClassificationTrainer
trainer = DecisionTreeClassificationTrainer()
model_without_normalization = trainer.fit_on_cache(cache)
model_with_normalization = trainer.fit_on_cache(cache, normalizer)

print("Without normalization: %f" % accuracy_score(
    y_test, 
    model_without_normalization.predict(x_test)
))
print("With normalization: %f" % accuracy_score(
    y_test, 
    model_with_normalization.predict(x_test_normalized)
))

### 1.2 Binarization preprocessor

In [None]:
from ignite_ml.preprocessing import BinarizationTrainer
binarizer = BinarizationTrainer(threshold=0.5).fit([[]])

binarizer.transform(np.random.rand(10))

### 1.3 Imputing preprocessor

In [None]:
from ignite_ml.preprocessing import ImputerTrainer
imputer = ImputerTrainer().fit([[1, 1, 1], [2, 2, 2]])
imputer.transform([
    [None, 4, 5],
    [4, None, 5],
    [4, 5, None]
])

### 1.4 One hot encoding preprocessor

In [None]:
from ignite_ml.preprocessing import EncoderTrainer
encoder = EncoderTrainer(encoded_features=[0]).fit([
    [42],
    [43],
    [42],
    [43]
])
encoder.transform([
    [42],
    [43],
    [42],
    [43]
])

### 1.5 String encoding preprocessor

In [None]:
# TODO

### 1.6 MinMax scaling preprocessor

In [None]:
from ignite_ml.preprocessing import MinMaxScalerTrainer
scaler = MinMaxScalerTrainer().fit([[1, 1, 1], [2, 2, 2]])
scaler.transform([
    [1, 1, 1],
    [1.5, 1.5, 1.5],
    [2, 2, 2]
])

### 1.7 MaxAbs scaling preprocessor

In [None]:
from ignite_ml.preprocessing import MaxAbsScalerTrainer
scaler = MaxAbsScalerTrainer().fit([[1, 1, 1], [2, 2, 2]])
scaler.transform([
    [1, 1, 1],
    [1.5, 1.5, 1.5],
    [2, 2, 2]
])