# MinMaxScaler

## About DataSet

From site:

There are 10 predictors, all quantitative, and a binary dependent variable, indicating the presence or absence of breast cancer

The predictors are anthropometric data and parameters which can be gathered in routine blood analysis

Prediction models based on these predictors, if accurate, can potentially be used as a biomarker of breast cancer

## Attribute info

- Quantitative Attributes: 
- Age (years) 
- BMI (kg/m2) 
- Glucose (mg/dL) 
- Insulin (µU/mL) 
- HOMA 
- Leptin (ng/mL) 
- Adiponectin (µg/mL) 
- Resistin (ng/mL) 
- MCP-1(pg/dL) 

### Labels

- 1 — Healthy controls 
- 2 — Patients



## Formula of Scaler

# $$\color{Purple}{X`=\frac{X-Min(X)}{Max(X)-Min(X)}}$$

## Let's test

### Modules

In [31]:
import pandas as pd

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

### Data

In [115]:
data = pd.read_csv('dataR2.csv')

print('Data shape is:\t\t\t{}'.format(data.shape), end='\n\n')
print('Data targets value counts is:\n{}'.format(data.iloc[:,-1].value_counts()), end='\n\n')

display(data.head(10))

Data shape is:			(116, 10)

Data targets value counts is:
2    64
1    52
Name: Classification, dtype: int64



Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,1
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,1
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,1
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,1
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,1
5,49,22.854458,92,3.226,0.732087,6.8317,13.67975,10.3176,530.41,1
6,89,22.7,77,4.69,0.890787,6.964,5.589865,12.9361,1256.083,1
7,76,23.8,118,6.47,1.883201,4.311,13.25132,5.1042,280.694,1
8,73,22.0,97,3.35,0.801543,4.47,10.358725,6.28445,136.855,1
9,75,23.0,83,4.952,1.013839,17.127,11.57899,7.0913,318.302,1


### Separation

In [108]:
X = data.iloc[:,:-1]
Y = data.iloc[:,-1]

In [46]:
X.describe()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,57.301724,27.582111,97.793103,10.012086,2.694988,26.61508,10.180874,14.725966,534.647
std,16.112766,5.020136,22.525162,10.067768,3.642043,19.183294,6.843341,12.390646,345.912663
min,24.0,18.37,60.0,2.432,0.467409,4.311,1.65602,3.21,45.843
25%,45.0,22.973205,85.75,4.35925,0.917966,12.313675,5.474282,6.881763,269.97825
50%,56.0,27.662416,92.0,5.9245,1.380939,20.271,8.352692,10.82774,471.3225
75%,71.0,31.241442,102.0,11.18925,2.857787,37.3783,11.81597,17.755207,700.085
max,89.0,38.578759,201.0,58.46,25.050342,90.28,38.04,82.1,1698.44


## First try: MLPC

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, random_state=0)

In [119]:
MLPC_simple = MLPClassifier(hidden_layer_sizes=(5,), random_state=0)
MLPC_simple.fit(X_train, y_train)

print('Train score:\t{:.3f}'.format(MLPC_simple.score(X_train, y_train)))
print('Test score:\t{:.3f}'.format(MLPC_simple.score(X_test, y_test)))

Train score:	0.563
Test score:	0.586


## Second try: MLPC + MinMaxScaler (0, 1)

#### Prepeare

In [40]:
MMScaler = MinMaxScaler()
MMScaler.fit(X)

MinMaxScaler(copy=True, feature_range=(0, 1))

In [120]:
X_sc = MMScaler.transform(X)
X_sc = pd.DataFrame(X_sc)
X_sc.columns = X.columns
X_sc.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1
0,0.369231,0.25385,0.070922,0.004908,0.0,0.052299,0.221152,0.060665,0.224659
1,0.907692,0.114826,0.22695,0.01219,0.009742,0.052726,0.103707,0.010826,0.255926
2,0.892308,0.235278,0.219858,0.036874,0.022058,0.158526,0.571021,0.076906,0.307912
3,0.676923,0.148328,0.120567,0.014171,0.005911,0.064811,0.151538,0.121131,0.533934
4,0.953846,0.13564,0.22695,0.019936,0.013748,0.027782,0.08694,0.093375,0.440565


In [51]:
X_sc.describe()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,0.512334,0.455847,0.268036,0.135291,0.090615,0.259443,0.234302,0.145975,0.295779
std,0.247889,0.248414,0.159753,0.179692,0.148153,0.223142,0.188087,0.157062,0.209315
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.323077,0.227783,0.182624,0.034398,0.018328,0.093088,0.104944,0.046543,0.135626
50%,0.492308,0.459821,0.22695,0.062335,0.037161,0.185648,0.184056,0.096562,0.257461
75%,0.723077,0.636924,0.297872,0.156301,0.097237,0.384642,0.279242,0.184373,0.395887
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


#### Splitting

In [69]:
X_train_sc, X_test_sc, y_train, y_test = train_test_split(X_sc, Y, random_state=0)

In [122]:
MLPC_sc = MLPClassifier(hidden_layer_sizes=(5,), random_state=0)
MLPC_sc.fit(X_train_sc, y_train)

print('Train score:\t{:.3f}'.format(MLPC_sc.score(X_train_sc, y_train)))
print('Test score:\t{:.3f}'.format(MLPC_sc.score(X_test_sc, y_test)))

Train score:	0.517
Test score:	0.552




## Third try: MLPC + MinMaxScaler (-1, 1)

In [85]:
MMScaler_minus = MinMaxScaler(feature_range=(-1, 1))
MMScaler_minus.fit(X)

MinMaxScaler(copy=True, feature_range=(-1, 1))

In [87]:
X_sc_minus = MMScaler_minus.transform(X)
X_sc_minus = pd.DataFrame(X_sc_minus)
X_sc_minus.columns = X.columns

In [88]:
X_sc_minus.describe()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1
count,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0,116.0
mean,0.024668,-0.088305,-0.463928,-0.729418,-0.81877,-0.481113,-0.531395,-0.70805,-0.408441
std,0.495777,0.496828,0.319506,0.359383,0.296307,0.446284,0.376173,0.314125,0.418629
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,-0.353846,-0.544435,-0.634752,-0.931204,-0.963344,-0.813824,-0.790113,-0.906914,-0.728748
50%,-0.015385,-0.080358,-0.546099,-0.87533,-0.925678,-0.628703,-0.631889,-0.806877,-0.485078
75%,0.446154,0.273848,-0.404255,-0.687397,-0.805525,-0.230716,-0.441515,-0.631253,-0.208226
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [123]:
X_train_sc_minus, X_test_sc_minus, y_train, y_test = train_test_split(X_sc_minus, Y, random_state=0)

In [124]:
MLPC_sc_minus = MLPClassifier(hidden_layer_sizes=(5,), random_state=0)
MLPC_sc_minus.fit(X_train_sc_minus, y_train)

print('Train score:\t{:.3f}'.format(MLPC_sc_minus.score(X_train_sc_minus, y_train)))
print('Test score:\t{:.3f}'.format(MLPC_sc_minus.score(X_test_sc_minus, y_test)))

Train score:	0.506
Test score:	0.448




## Methods

### «fit_transform»

In [94]:
X_ft = MinMaxScaler().fit_transform(X)
X_ft = pd.DataFrame(X_ft)
X_ft.columns = X.columns

In [99]:
X_ft.equals(X_sc)

True

### «inverse_transform»

In [103]:
X_anti = MMScaler_minus.inverse_transform(X_sc_minus)
X_anti = pd.DataFrame(X_anti)
X_anti.columns = X.columns

In [109]:
X_anti.equals(X)

False

# THE ENД

![giphy](giphy.gif)