### Normalization
This technique is used to adjust the values on different scales. It makes it easier to manipulate the values and the results will be more accurate.

In [1]:
import numpy as np
import pandas as pd

from sklearn import preprocessing

In [2]:
my_data = [1267, 7, 5432, 987, 1703, 123, 9098, 4072, 540, 3078, 54]

In [3]:
# ---- Logarithmic Transformation ----
log_data = np.log(my_data)

print("Original data: ", my_data)
print("-------------")
print("Logarithmic Transformation: ", log_data)

Original data:  [1267, 7, 5432, 987, 1703, 123, 9098, 4072, 540, 3078, 54]
-------------
Logarithmic Transformation:  [7.14440718 1.94591015 8.60006267 6.89467004 7.44014668 4.81218436
 9.11580989 8.31188956 6.29156914 8.03203531 3.98898405]


In [4]:
# ---- Normalization: values between 0 and 1 ----
# This function expects a 2D array: [my_data]
normalized_data = preprocessing.normalize([my_data])

# Remove 1 dimension
normalized_data_1D = [i for i in normalized_data[0]]

print("Original data: ", my_data)
print("-------------")
print("Normalized data: ", normalized_data_1D)

Original data:  [1267, 7, 5432, 987, 1703, 123, 9098, 4072, 540, 3078, 54]
-------------
Normalized data:  [0.10553758940384012, 0.0005830806044411056, 0.45247054904629797, 0.08221436522619589, 0.14185518133760042, 0.010245559192322284, 0.7578381913150255, 0.3391863173263117, 0.044980503771171, 0.25638887149567474, 0.0044980503771171005]


In [5]:
# ---- Standardization: mean: 0, s.dev: 1 ----
standardized_data = preprocessing.scale(my_data)

print("Original data: ", my_data)
print("-------------")
print("Standardized data: ", standardized_data)

Original data:  [1267, 7, 5432, 987, 1703, 123, 9098, 4072, 540, 3078, 54]
-------------
Standardized data:  [-0.41634325 -0.88080859  1.11897275 -0.51955777 -0.25562349 -0.83804829
  2.47034572  0.61764508 -0.68433238  0.25123353 -0.8634833 ]


In [6]:
# ===================== EXAMPLE WITH DATASET =====================
absenteeism_df = pd.read_csv("Absenteeism_at_work.csv", sep = ";")

absenteeism_df["Age"] = preprocessing.scale(absenteeism_df["Age"].values)

absenteeism_df["Age"]

0     -0.532868
1      2.092860
2      0.239405
3      0.393859
4     -0.532868
5      0.239405
6     -1.305142
7     -0.069505
8     -0.378414
9      0.084950
10    -0.069505
11    -0.069505
12    -0.069505
13     0.239405
14     0.239405
15     0.702769
16     0.239405
17     0.239405
18    -0.532868
19     1.629496
20    -1.305142
21     0.239405
22    -1.305142
23    -0.069505
24    -0.532868
25    -1.305142
26    -0.532868
27    -1.150687
28    -0.532868
29     0.239405
         ...   
710   -0.069505
711    2.092860
712   -0.841778
713   -0.996232
714    1.783951
715   -0.532868
716    2.092860
717   -0.996232
718    0.548314
719   -0.996232
720    0.084950
721   -0.841778
722    0.084950
723    0.084950
724   -0.841778
725    1.011678
726   -0.841778
727    3.328497
728    0.084950
729    3.328497
730   -0.532868
731    0.084950
732   -1.305142
733   -1.305142
734   -0.841778
735   -0.532868
736    0.084950
737    0.548314
738    0.393859
739    2.556224
Name: Age, Length: 740, 

### Credits
* [Scikit](https://scikit-learn.org/stable/modules/preprocessing.html)
* [Aletta Smits](https://nl.linkedin.com/in/alettasmits)
* [Robert R.F. DeFilippi](https://medium.com/@rrfd/standardize-or-normalize-examples-in-python-e3f174b65dfc)