#Standard Scaling

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

tips_ds  = sns.load_dataset('tips')
tips_ds.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [None]:
tips_ds_numeric = tips_ds.filter(["total_bill", "tip", "size"], axis = 1)
tips_ds_numeric.head()

Unnamed: 0,total_bill,tip,size
0,16.99,1.01,2
1,10.34,1.66,3
2,21.01,3.5,3
3,23.68,3.31,2
4,24.59,3.61,4


In [None]:
tips_ds_numeric.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672
std,8.902412,1.383638,0.9511
min,3.07,1.0,1.0
25%,13.3475,2.0,2.0
50%,17.795,2.9,2.0
75%,24.1275,3.5625,3.0
max,50.81,10.0,6.0


In [None]:
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
tips_ds_scaled = ss.fit_transform(tips_ds_numeric)

In [None]:
tips_ds_scaled_df = pd.DataFrame(tips_ds_scaled,
                                 columns = tips_ds_numeric.columns)
tips_ds_scaled_df.head()

Unnamed: 0,total_bill,tip,size
0,-0.314711,-1.439947,-0.600193
1,-1.063235,-0.969205,0.453383
2,0.13778,0.363356,0.453383
3,0.438315,0.225754,-0.600193
4,0.540745,0.44302,1.506958


In [None]:
tips_ds_scaled_df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,-7.871663e-17,2.839259e-16,-5.824121e-17
std,1.002056,1.002056,1.002056
min,-1.881547,-1.447189,-1.653768
25%,-0.7247111,-0.7229713,-0.6001926
50%,-0.2241005,-0.07117518,-0.6001926
75%,0.4886857,0.4086192,0.4533829
max,3.492068,5.070772,3.61411


#Min/Max Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

mms = MinMaxScaler()
tips_ds_mms = mms.fit_transform(tips_ds_numeric)

In [None]:
tips_ds_mms_df = pd.DataFrame(tips_ds_mms,
                              columns = tips_ds_numeric.columns)
tips_ds_mms_df.head()

Unnamed: 0,total_bill,tip,size
0,0.291579,0.001111,0.2
1,0.152283,0.073333,0.4
2,0.375786,0.277778,0.4
3,0.431713,0.256667,0.2
4,0.450775,0.29,0.6


In [None]:
tips_ds_mms_df.describe()

Unnamed: 0,total_bill,tip,size
count,244.0,244.0,244.0
mean,0.350145,0.222031,0.313934
std,0.186477,0.153738,0.19022
min,0.0,0.0,0.0
25%,0.215281,0.111111,0.2
50%,0.308442,0.211111,0.2
75%,0.441087,0.284722,0.4
max,1.0,1.0,1.0


#Label Encoding

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
# creating initial dataframe
bridge_types = ('Arch','Beam','Truss','Cantilever','Tied Arch','Suspension','Cable')
bridge_df = pd.DataFrame(bridge_types, columns=['Bridge_Types'])
# creating instance of labelencoder
labelencoder = LabelEncoder()
# Assigning numerical values and storing in another column
bridge_df['Bridge_Types_Cat'] = labelencoder.fit_transform(bridge_df['Bridge_Types'])
bridge_df

Unnamed: 0,Bridge_Types,Bridge_Types_Cat
0,Arch,0
1,Beam,1
2,Truss,6
3,Cantilever,3
4,Tied Arch,5
5,Suspension,4
6,Cable,2


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
# creating instance of one-hot-encoder
enc = OneHotEncoder(handle_unknown='ignore')
# passing bridge-types-cat column (label encoded values of bridge_types)
enc_df = pd.DataFrame(enc.fit_transform(bridge_df[['Bridge_Types_Cat']]).toarray())
# merge with main df bridge_df on key values
bridge_df = bridge_df.join(enc_df)
bridge_df

Unnamed: 0,Bridge_Types,Bridge_Types_Cat,0,1,2,3,4,5,6
0,Arch,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Beam,1,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,Truss,6,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,Cantilever,3,0.0,0.0,0.0,1.0,0.0,0.0,0.0
4,Tied Arch,5,0.0,0.0,0.0,0.0,0.0,1.0,0.0
5,Suspension,4,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6,Cable,2,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [None]:
import pandas as pd
import numpy as np
# creating initial dataframe
bridge_types = ('Arch','Beam','Truss','Cantilever','Tied Arch','Suspension','Cable')
bridge_df = pd.DataFrame(bridge_types, columns=['Bridge_Types'])
# generate binary values using get_dummies
dum_df = pd.get_dummies(bridge_df, columns=["Bridge_Types"], prefix=["Type_is"] )
# merge with main df bridge_df on key values
bridge_df = bridge_df.join(dum_df)
bridge_df

Unnamed: 0,Bridge_Types,Type_is_Arch,Type_is_Beam,Type_is_Cable,Type_is_Cantilever,Type_is_Suspension,Type_is_Tied Arch,Type_is_Truss
0,Arch,1,0,0,0,0,0,0
1,Beam,0,1,0,0,0,0,0
2,Truss,0,0,0,0,0,0,1
3,Cantilever,0,0,0,1,0,0,0
4,Tied Arch,0,0,0,0,0,1,0
5,Suspension,0,0,0,0,1,0,0
6,Cable,0,0,1,0,0,0,0
