In [13]:
import numpy as np

from sklearn import preprocessing

#We imported a couple of packages. Let's create some sample data and add the line to this file:

input_data = np.array([
    [3, -1.5,   3,   -6.4],
    [0,  3,    -1.3,  4.1],
    [1,  2.3,  -2.9, -4.3]
])

# Mean removal

In [29]:
data_standardized = preprocessing.scale(input_data)
print("data =\n", input_data)
print("\nMean = ", data_standardized.mean(axis = 0))
print("Std deviation = ", data_standardized.std(axis = 0))

data =
 [[ 3.  -1.5  3.  -6.4]
 [ 0.   3.  -1.3  4.1]
 [ 1.   2.3 -2.9 -4.3]]

Mean =  [  5.55111512e-17  -3.70074342e-17   0.00000000e+00  -1.85037171e-17]
Std deviation =  [ 1.  1.  1.  1.]


# Scaling

In [30]:
data_scaler = preprocessing.MinMaxScaler(feature_range = (0, 1))
print("data =\n", input_data)
data_scaled = data_scaler.fit_transform(input_data)
print("\nMin max scaled data = \n", data_scaled)

data =
 [[ 3.  -1.5  3.  -6.4]
 [ 0.   3.  -1.3  4.1]
 [ 1.   2.3 -2.9 -4.3]]

Min max scaled data = 
 [[ 1.          0.          1.          0.        ]
 [ 0.          1.          0.27118644  1.        ]
 [ 0.33333333  0.84444444  0.          0.2       ]]


# Normalization

In [31]:
data_normalized = preprocessing.normalize(input_data, norm  = 'l1')
print("data =\n", input_data)
print("L1 normalized data =\n", data_normalized)
np.sum(np.abs(data_normalized), axis=1)

data =
 [[ 3.  -1.5  3.  -6.4]
 [ 0.   3.  -1.3  4.1]
 [ 1.   2.3 -2.9 -4.3]]
L1 normalized data =
 [[ 0.21582734 -0.10791367  0.21582734 -0.46043165]
 [ 0.          0.35714286 -0.1547619   0.48809524]
 [ 0.0952381   0.21904762 -0.27619048 -0.40952381]]


array([ 1.,  1.,  1.])

# Binarization

In [32]:
data_binarized = preprocessing.Binarizer(threshold=1.4).transform(input_data)
print("data =\n", input_data)
print("\nBinarized data =\n", data_binarized)

data =
 [[ 3.  -1.5  3.  -6.4]
 [ 0.   3.  -1.3  4.1]
 [ 1.   2.3 -2.9 -4.3]]

Binarized data =
 [[ 1.  0.  1.  0.]
 [ 0.  1.  0.  1.]
 [ 0.  1.  0.  0.]]


# One Hot Encoding

In [33]:
encoder = preprocessing.OneHotEncoder()
encoder.fit([  [0, 2, 1, 12], 
               [1, 3, 5, 3], 
               [2, 3, 2, 12], 
               [1, 2, 4, 3]
])
encoded_vector = encoder.transform([[2, 3, 5, 3]]).toarray()
print("\nEncoded vector =\n", encoded_vector)


Encoded vector =
 [[ 0.  0.  1.  0.  1.  0.  0.  0.  1.  1.  0.]]


# Label Encoding

In [41]:
input_classes = ['suzuki', 'ford', 'suzuki', 'toyota', 'ford', 'bmw']
label_encoder = preprocessing.LabelEncoder().fit(input_classes)
print("\nClass mapping:")
for i, item in enumerate(label_encoder.classes_):
    print('\t', item, '-->', i)

print('\n')
    
labels = ['toyota', 'ford', 'suzuki']
encoded_labels = label_encoder.transform(labels)
print("\nLabels =", labels)
print("Encoded labels =", list(encoded_labels))

print('\n')

encoded_labels = [3, 2, 0, 2, 1]
decoded_labels = label_encoder.inverse_transform(encoded_labels)
print("\nEncoded labels =", encoded_labels)
print("Decoded labels =", list(decoded_labels))


Class mapping:
	 bmw --> 0
	 ford --> 1
	 suzuki --> 2
	 toyota --> 3



Labels = ['toyota', 'ford', 'suzuki']
Encoded labels = [3, 1, 2]



Encoded labels = [3, 2, 0, 2, 1]
Decoded labels = ['toyota', 'suzuki', 'bmw', 'suzuki', 'ford']
