In [1]:
import numpy as np
import pandas as pd
def load_titanic():
    data = pd.read_csv("train.csv")
    return data

# StandardScaler

#### StandardScaler: Klasik standartlaştırma. Ortalamayı çıkar, standart sapmaya böl. z = (x - u) / s

***Sklearn ile***

In [2]:
df = load_titanic()
from sklearn.preprocessing import StandardScaler
df["Age"] = StandardScaler().fit_transform(df[["Age"]])
df["Age"].describe().T

count    7.140000e+02
mean     2.388379e-16
std      1.000701e+00
min     -2.016979e+00
25%     -6.595416e-01
50%     -1.170488e-01
75%      5.718310e-01
max      3.465126e+00
Name: Age, dtype: float64

***From Scratch***

In [3]:
df = load_titanic()
df.Age = (df.Age - df.Age.mean(axis = 0)) / df.Age.std(axis = 0)
df["Age"].describe().T

count    7.140000e+02
mean     2.338621e-16
std      1.000000e+00
min     -2.015566e+00
25%     -6.590796e-01
50%     -1.169668e-01
75%      5.714304e-01
max      3.462699e+00
Name: Age, dtype: float64

# RobustScaler

#### RobustScaler: Medyanı çıkar iqr'a böl.

***Sklearn ile***

In [4]:
df = load_titanic()
from sklearn.preprocessing import RobustScaler

transformer = RobustScaler().fit(df[["Age"]])
df["Age"] = transformer.transform(df[["Age"]])
df["Age"].describe().T

count    714.000000
mean       0.095056
std        0.812671
min       -1.542937
25%       -0.440559
50%        0.000000
75%        0.559441
max        2.909091
Name: Age, dtype: float64

***From Scratch***

In [5]:
#X-Q1(X) / Q3(X)-Q1(X)
df = load_titanic()
df["Age"] = (df["Age"]-df["Age"].quantile(0.5)) / (df["Age"].quantile(0.75)-df["Age"].quantile(0.25))
df["Age"].describe().T

count    714.000000
mean       0.095056
std        0.812671
min       -1.542937
25%       -0.440559
50%        0.000000
75%        0.559441
max        2.909091
Name: Age, dtype: float64

In [6]:
df = load_titanic()
q1 = df["Age"].quantile(0.25)
q3 = df["Age"].quantile(0.75)
iqr = q3 - q1
df["Age"] = (df["Age"] - df["Age"].median()) / iqr
df["Age"].describe().T

count    714.000000
mean       0.095056
std        0.812671
min       -1.542937
25%       -0.440559
50%        0.000000
75%        0.559441
max        2.909091
Name: Age, dtype: float64

# MinMaxScaler

#### MinMaxScaler: Verilen 2 değer arasında değişken dönüşümü
#### X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))

***Sklearn ile***

In [7]:
df = load_titanic()
from sklearn.preprocessing import MinMaxScaler

transformer = MinMaxScaler((0, 1)).fit(df[["Age"]])
df["Age"] = transformer.transform(df[["Age"]])  # on tanımlı değeri 0 ile 1 arası.
df["Age"].describe().T

count    714.000000
mean       0.367921
std        0.182540
min        0.000000
25%        0.247612
50%        0.346569
75%        0.472229
max        1.000000
Name: Age, dtype: float64

***From Scratch***

In [8]:
df = load_titanic()
df["Age"] = (df["Age"] - df["Age"].min(axis = 0)) / (df["Age"].max(axis = 0) - df["Age"].min(axis = 0))
df["Age"] .describe().T

count    714.000000
mean       0.367921
std        0.182540
min        0.000000
25%        0.247612
50%        0.346569
75%        0.472229
max        1.000000
Name: Age, dtype: float64