In [1]:
import numpy as np
from sklearn import preprocessing

In [2]:
data = np.array([[2.2, 5.9, -1.8], [5.4, -3.2, -5.1], [-1.9, 4.2, 3.2]])
data

array([[ 2.2,  5.9, -1.8],
       [ 5.4, -3.2, -5.1],
       [-1.9,  4.2,  3.2]])

**Bringing the values of each feature vector on a common scale**

- **L1 - Least Absolute Deviations** - sum of absolute values (on each row) = 1; it is insensitive to outliers
- **L2 - Least Squares** - sum of squares (on each row) = 1; takes outliers in consideration during training

In [5]:
data_l1 = preprocessing.normalize(data, norm = 'l1')
print("data_l1\n",data_l1)
print("\nSum of absolute values:",(0.22222222+0.5959596+0.18181818))

data_l1
 [[ 0.22222222  0.5959596  -0.18181818]
 [ 0.39416058 -0.23357664 -0.37226277]
 [-0.20430108  0.4516129   0.34408602]]

Sum of absolute values: 1.0


In [7]:
data_l2 = preprocessing.normalize(data, norm = 'l2')
print("\n data_l2\n",data_l2)
print("\nSum of squares:", (0.3359268**2 +0.90089461**2 + (-0.2748492)**2))


 data_l2
 [[ 0.3359268   0.90089461 -0.2748492 ]
 [ 0.6676851  -0.39566524 -0.63059148]
 [-0.33858465  0.74845029  0.57024784]]

Sum of squares: 0.9999999960259321


#### Introducing outliers to check how normallisation really works!

## L1 normalisation

In [10]:
data_outliers = np.array([[22.2, 5.9, -1.8], [5.4, -33.2, -5.1], [-1.9, 44.2, 3.2]])
data_outliers

array([[ 22.2,   5.9,  -1.8],
       [  5.4, -33.2,  -5.1],
       [ -1.9,  44.2,   3.2]])

In [13]:
data_outliers_l1 = preprocessing.normalize(data_outliers, norm = 'l1')
print("data_outliers_l1\n",data_outliers_l1)
#print("\nSum of absolute values:",(0.22222222+0.5959596+0.18181818))
print("\nData L1 without Outliers\n",data_l1)

data_outliers_l1
 [[ 0.74247492  0.19732441 -0.06020067]
 [ 0.12356979 -0.7597254  -0.11670481]
 [-0.03853955  0.89655172  0.06490872]]

Data L1 without Outliers
 [[ 0.74247492  0.19732441 -0.06020067]
 [ 0.12356979 -0.7597254  -0.11670481]
 [-0.03853955  0.89655172  0.06490872]]


### Insensitive to outliers

In [14]:
data_outliers = np.array([[22.2, 5.9, -1.8], [5.4, -33.2, -5.1], [-1.9, 44.2, 3.2]])
data_outliers

array([[ 22.2,   5.9,  -1.8],
       [  5.4, -33.2,  -5.1],
       [ -1.9,  44.2,   3.2]])

In [17]:
data_outliers_l2 = preprocessing.normalize(data_outliers, norm = 'l2')
print("data_outliers_l2\n",data_outliers_l2)
print("\nSum of squares:", (0.96349774**2 + 0.25606472**2 + (-0.07812144)**2))

print("\nData L2 without Outliers\n",data_l2)
print("\nSum of squares:", (0.3359268**2 +0.90089461**2 + (-0.2748492)**2))

data_outliers_l2
 [[ 0.96349774  0.25606472 -0.07812144]
 [ 0.15872676 -0.97587562 -0.1499086 ]
 [-0.04283486  0.99647407  0.07214292]]

Sum of squares: 0.9999999952014595

Data L2 without Outliers
 [[ 0.3359268   0.90089461 -0.2748492 ]
 [ 0.6676851  -0.39566524 -0.63059148]
 [-0.33858465  0.74845029  0.57024784]]

Sum of squares: 0.9999999960259321


### Sensitive to outliers