In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame.from_dict({'Age':[10, 35, 34, 23, 70, 55, 89], 
                            'Height':[130, 178, 155, 133, 195, 150, 205],
                            'Weight':[80, 200, 220, 150, 140, 95, 180]})
df

Unnamed: 0,Age,Height,Weight
0,10,130,80
1,35,178,200
2,34,155,220
3,23,133,150
4,70,195,140
5,55,150,95
6,89,205,180


In [3]:
def min_max_scaling(series):
    return (series-series.min())/ (series.max()-series.min())
for col in df.columns:
    df[col] = min_max_scaling(df[col])
print(df.head())

        Age    Height    Weight
0  0.000000  0.000000  0.000000
1  0.316456  0.640000  0.857143
2  0.303797  0.333333  1.000000
3  0.164557  0.040000  0.500000
4  0.759494  0.866667  0.428571


## Normalisasi Standar (MinMaxScaler)

In [4]:
from sklearn.preprocessing import MinMaxScaler

In [5]:
scaler = MinMaxScaler()
scaler.fit(df)
scaled = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled, columns=df.columns)
print(scaled_df)

        Age    Height    Weight
0  0.000000  0.000000  0.000000
1  0.316456  0.640000  0.857143
2  0.303797  0.333333  1.000000
3  0.164557  0.040000  0.500000
4  0.759494  0.866667  0.428571
5  0.569620  0.266667  0.107143
6  1.000000  1.000000  0.714286


## Normalisasi Standar (Z-score)

In [6]:
def z_score_standardization(series):
    return (series-series.min())/series.std()
for col in df.columns :
    df[col] = z_score_standardization(df[col])
print(df)    

        Age    Height    Weight
0  0.000000  0.000000  0.000000
1  0.903793  1.625574  2.302811
2  0.867641  0.846653  2.686613
3  0.469972  0.101598  1.343306
4  2.169102  2.201298  1.151405
5  1.626827  0.677322  0.287851
6  2.855984  2.539959  1.919009


In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()
scaler.fit(df)
scaled = scaler.fit_transform(df)
scaled_df=pd.DataFrame(scaled,columns=df.columns)
print(scaled_df)

        Age    Height    Weight
0 -1.372269 -1.233255 -1.495353
1 -0.396061  0.522566  0.991967
2 -0.435110 -0.318765  1.406520
3 -0.864641 -1.123516 -0.044416
4  0.970629  1.144419 -0.251693
5  0.384905 -0.501663 -1.184438
6  1.712547  1.510215  0.577414


## Normalisasi Standar (Absolute Max)

In [9]:
from sklearn.preprocessing import MaxAbsScaler

In [10]:
scaler = MaxAbsScaler()
scaler.fit(df)
scaled = scaler.transform(df)
scaled_df = pd.DataFrame(scaled, columns=df.columns)
print(scaled_df)

        Age    Height    Weight
0  0.000000  0.000000  0.000000
1  0.316456  0.640000  0.857143
2  0.303797  0.333333  1.000000
3  0.164557  0.040000  0.500000
4  0.759494  0.866667  0.428571
5  0.569620  0.266667  0.107143
6  1.000000  1.000000  0.714286


# CONTOH

In [15]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [19]:
csv_data = pd.read_csv("FILE_LATIHAN/shopping_data.csv")
csv_data

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
...,...,...,...,...,...
195,196,Female,35,120,79
196,197,Female,45,126,28
197,198,Male,32,126,74
198,199,Male,32,137,18


In [21]:
array = csv_data.values
array

array([[1, 'Male', 19, 15, 39],
       [2, 'Male', 21, 15, 81],
       [3, 'Female', 20, 16, 6],
       [4, 'Female', 23, 16, 77],
       [5, 'Female', 31, 17, 40],
       [6, 'Female', 22, 17, 76],
       [7, 'Female', 35, 18, 6],
       [8, 'Female', 23, 18, 94],
       [9, 'Male', 64, 19, 3],
       [10, 'Female', 30, 19, 72],
       [11, 'Male', 67, 19, 14],
       [12, 'Female', 35, 19, 99],
       [13, 'Female', 58, 20, 15],
       [14, 'Female', 24, 20, 77],
       [15, 'Male', 37, 20, 13],
       [16, 'Male', 22, 20, 79],
       [17, 'Female', 35, 21, 35],
       [18, 'Male', 20, 21, 66],
       [19, 'Male', 52, 23, 29],
       [20, 'Female', 35, 23, 98],
       [21, 'Male', 35, 24, 35],
       [22, 'Male', 25, 24, 73],
       [23, 'Female', 46, 25, 5],
       [24, 'Male', 31, 25, 73],
       [25, 'Female', 54, 28, 14],
       [26, 'Male', 29, 28, 82],
       [27, 'Female', 45, 28, 32],
       [28, 'Male', 35, 28, 61],
       [29, 'Female', 40, 29, 31],
       [30, 'Female', 23

In [24]:
x = array[:,2:5]
x

array([[19, 15, 39],
       [21, 15, 81],
       [20, 16, 6],
       [23, 16, 77],
       [31, 17, 40],
       [22, 17, 76],
       [35, 18, 6],
       [23, 18, 94],
       [64, 19, 3],
       [30, 19, 72],
       [67, 19, 14],
       [35, 19, 99],
       [58, 20, 15],
       [24, 20, 77],
       [37, 20, 13],
       [22, 20, 79],
       [35, 21, 35],
       [20, 21, 66],
       [52, 23, 29],
       [35, 23, 98],
       [35, 24, 35],
       [25, 24, 73],
       [46, 25, 5],
       [31, 25, 73],
       [54, 28, 14],
       [29, 28, 82],
       [45, 28, 32],
       [35, 28, 61],
       [40, 29, 31],
       [23, 29, 87],
       [60, 30, 4],
       [21, 30, 73],
       [53, 33, 4],
       [18, 33, 92],
       [49, 33, 14],
       [21, 33, 81],
       [42, 34, 17],
       [30, 34, 73],
       [36, 37, 26],
       [20, 37, 75],
       [65, 38, 35],
       [24, 38, 92],
       [48, 39, 36],
       [31, 39, 61],
       [49, 39, 28],
       [24, 39, 65],
       [50, 40, 55],
       [27, 40, 47]

In [25]:
y = array[:,0:1]
y

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19],
       [20],
       [21],
       [22],
       [23],
       [24],
       [25],
       [26],
       [27],
       [28],
       [29],
       [30],
       [31],
       [32],
       [33],
       [34],
       [35],
       [36],
       [37],
       [38],
       [39],
       [40],
       [41],
       [42],
       [43],
       [44],
       [45],
       [46],
       [47],
       [48],
       [49],
       [50],
       [51],
       [52],
       [53],
       [54],
       [55],
       [56],
       [57],
       [58],
       [59],
       [60],
       [61],
       [62],
       [63],
       [64],
       [65],
       [66],
       [67],
       [68],
       [69],
       [70],
       [71],
       [72],
       [73],
       [74],
       [75],
       [76],
       [77],
       [

In [28]:
dataset=pd.DataFrame({'Customer ID':array[:,0],'Gender':array[:,1],
                      'Age':array[:,2],'Income':array[:,3],'Spending Score':array[:,4]})
print('Dataset sebelum dinormalisasi')
dataset.head(10)

Dataset sebelum dinormalisasi


Unnamed: 0,Customer ID,Gender,Age,Income,Spending Score
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
5,6,Female,22,17,76
6,7,Female,35,18,6
7,8,Female,23,18,94
8,9,Male,64,19,3
9,10,Female,30,19,72


In [30]:
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
data = min_max_scaler.fit_transform(x)
dataset = pd.DataFrame({'Age':data[:,0],'Income':data[:,1],
                        'Spending Score':data[:,2],'Customer ID':array[:,0],
                        'Gender':array[:,1]})

print("dataset setelah dinormalisasi :")
print(dataset.head(10))

dataset setelah dinormalisasi :
        Age    Income  Spending Score Customer ID  Gender
0  0.019231  0.000000        0.387755           1    Male
1  0.057692  0.000000        0.816327           2    Male
2  0.038462  0.008197        0.051020           3  Female
3  0.096154  0.008197        0.775510           4  Female
4  0.250000  0.016393        0.397959           5  Female
5  0.076923  0.016393        0.765306           6  Female
6  0.326923  0.024590        0.051020           7  Female
7  0.096154  0.024590        0.948980           8  Female
8  0.884615  0.032787        0.020408           9    Male
9  0.230769  0.032787        0.724490          10  Female
