In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
# !pip install missingno
import missingno as msno
from datetime import date
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler, RobustScaler

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.width', 500)

def load():
    data = pd.read_csv("titanic.csv")
    return data

def load_application_train():
    data = pd.read_csv("application_train.csv")
    return data
df = load()
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.283,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### Label Encoding & Binary Encoding

In [2]:
le = LabelEncoder()
le.fit_transform(df["Sex"])[0:5]

array([1, 0, 0, 0, 1])

In [3]:
def label_encoder(dataframe,binary_col):
    le= LabelEncoder()
    dataframe[binary_col] = le.fit_transform(dataframe[binary_col])
    return dataframe

In [4]:
binary_col = [col for col in df.columns if df[col].dtype not in [int,float] and df[col].nunique() == 2]

In [5]:
for col in binary_col:
    label_encoder(df,col)

In [6]:
df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",1,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.0,1,0,PC 17599,71.283,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",0,26.0,0,0,STON/O2. 3101282,7.925,,S


In [7]:
df = load_application_train()
df.shape

(307511, 122)

In [8]:
binary_col = [col for col in df.columns if df[col].dtype not in [int,float] and df[col].nunique() == 2]
binary_col

['TARGET',
 'NAME_CONTRACT_TYPE',
 'FLAG_OWN_CAR',
 'FLAG_OWN_REALTY',
 'FLAG_MOBIL',
 'FLAG_EMP_PHONE',
 'FLAG_WORK_PHONE',
 'FLAG_CONT_MOBILE',
 'FLAG_PHONE',
 'FLAG_EMAIL',
 'REG_REGION_NOT_LIVE_REGION',
 'REG_REGION_NOT_WORK_REGION',
 'LIVE_REGION_NOT_WORK_REGION',
 'REG_CITY_NOT_LIVE_CITY',
 'REG_CITY_NOT_WORK_CITY',
 'LIVE_CITY_NOT_WORK_CITY',
 'EMERGENCYSTATE_MODE',
 'FLAG_DOCUMENT_2',
 'FLAG_DOCUMENT_3',
 'FLAG_DOCUMENT_4',
 'FLAG_DOCUMENT_5',
 'FLAG_DOCUMENT_6',
 'FLAG_DOCUMENT_7',
 'FLAG_DOCUMENT_8',
 'FLAG_DOCUMENT_9',
 'FLAG_DOCUMENT_10',
 'FLAG_DOCUMENT_11',
 'FLAG_DOCUMENT_12',
 'FLAG_DOCUMENT_13',
 'FLAG_DOCUMENT_14',
 'FLAG_DOCUMENT_15',
 'FLAG_DOCUMENT_16',
 'FLAG_DOCUMENT_17',
 'FLAG_DOCUMENT_18',
 'FLAG_DOCUMENT_19',
 'FLAG_DOCUMENT_20',
 'FLAG_DOCUMENT_21']

In [9]:
df[binary_col].head()

Unnamed: 0,TARGET,NAME_CONTRACT_TYPE,FLAG_OWN_CAR,FLAG_OWN_REALTY,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EMERGENCYSTATE_MODE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21
0,1,Cash loans,N,Y,1,1,0,1,1,0,0,0,0,0,0,0,No,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,Cash loans,N,N,1,1,0,1,1,0,0,0,0,0,0,0,No,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,Revolving loans,Y,Y,1,1,1,1,1,0,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,Cash loans,N,Y,1,1,0,1,0,0,0,0,0,0,0,0,,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,Cash loans,N,Y,1,1,0,1,0,0,0,0,0,0,1,1,,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
for col in binary_col:
    label_encoder(df,col)

In [11]:
df[binary_col].head()

Unnamed: 0,TARGET,NAME_CONTRACT_TYPE,FLAG_OWN_CAR,FLAG_OWN_REALTY,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,EMERGENCYSTATE_MODE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21
0,1,0,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
df = load()
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.283,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [13]:
df["Embarked"].value_counts()

S    644
C    168
Q     77
Name: Embarked, dtype: int64

In [14]:
pd.get_dummies(df,columns=["Embarked"]).head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked_C,Embarked_Q,Embarked_S
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,0,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.283,C85,1,0,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,0,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,0,0,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,0,0,1


In [15]:
pd.get_dummies(df,columns=["Embarked"],drop_first=True).head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked_Q,Embarked_S
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,0,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.283,C85,0,0
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,0,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,0,1
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,0,1


In [16]:
def one_hot_encoder(dataframe,columns,drop_first=True):
    dataframe= pd.get_dummies(dataframe,columns=[columns],drop_first=drop_first).head()
    return dataframe

In [17]:
one_hot_encoder(df,"Sex")

Unnamed: 0,PassengerId,Survived,Pclass,Name,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Sex_male
0,1,0,3,"Braund, Mr. Owen Harris",22.0,1,0,A/5 21171,7.25,,S,1
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0,1,0,PC 17599,71.283,C85,C,0
2,3,1,3,"Heikkinen, Miss. Laina",26.0,0,0,STON/O2. 3101282,7.925,,S,0
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0,1,0,113803,53.1,C123,S,0
4,5,0,3,"Allen, Mr. William Henry",35.0,0,0,373450,8.05,,S,1


In [18]:
df = load()

In [19]:
def grab_col_name(dataframe,cat_th=10,car_th=20):
    cat_cols    =  [col for col in dataframe.columns if dataframe[col].dtype == ("O")]
    num_but_cat =  [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and dataframe[col].dtype !="O"]
    cat_but_car =  [col for col in dataframe.columns if dataframe[col].nunique() < car_th and dataframe[col].dtype == "O"]
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]
    num_cols = [col for col in dataframe.columns if dataframe[col].dtype !="O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]
    print(f"observations:{dataframe.shape[0]}")
    print(f"variables:{dataframe.shape[1]}")
    print(f'cat_cols:{len(cat_cols)}')
    print(f'num_cols:{len(num_cols)}')
    print(f'cat_but_car:{len(cat_but_car)}')
    print(f'num_but_car:{len(num_but_cat)}')
    return cat_cols,num_cols,cat_but_car

In [20]:
cat_cols,num_cols,cat_but_car = grab_col_name(df)

observations:891
variables:12
cat_cols:7
num_cols:3
cat_but_car:2
num_but_car:4


In [21]:
ohe_cols = [col for col in df.columns if 10 >= df[col].nunique()>2]

In [22]:
for col in ohe_cols:
 print(one_hot_encoder(df,col).head(),"\n")

   PassengerId  Survived                                               Name     Sex    Age  SibSp  Parch            Ticket   Fare Cabin Embarked  Pclass_2  Pclass_3
0            1         0                            Braund, Mr. Owen Harris    male 22.000      1      0         A/5 21171  7.250   NaN        S         0         1
1            2         1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female 38.000      1      0          PC 17599 71.283   C85        C         0         0
2            3         1                             Heikkinen, Miss. Laina  female 26.000      0      0  STON/O2. 3101282  7.925   NaN        S         0         1
3            4         1       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female 35.000      1      0            113803 53.100  C123        S         0         0
4            5         0                           Allen, Mr. William Henry    male 35.000      0      0            373450  8.050   NaN        S         0         1 

   Passe

In [23]:
df = load_application_train()

In [24]:
df["NAME_EDUCATION_TYPE"].value_counts()

Secondary / secondary special    218391
Higher education                  74863
Incomplete higher                 10277
Lower secondary                    3816
Academic degree                     164
Name: NAME_EDUCATION_TYPE, dtype: int64

In [25]:
cat_cols,num_cols,cat_but_car=grab_col_name(df)

observations:307511
variables:122
cat_cols:40
num_cols:67
cat_but_car:15
num_but_car:39


In [26]:
cat_cols

['ORGANIZATION_TYPE',
 'TARGET',
 'FLAG_MOBIL',
 'FLAG_EMP_PHONE',
 'FLAG_WORK_PHONE',
 'FLAG_CONT_MOBILE',
 'FLAG_PHONE',
 'FLAG_EMAIL',
 'REGION_RATING_CLIENT',
 'REGION_RATING_CLIENT_W_CITY',
 'REG_REGION_NOT_LIVE_REGION',
 'REG_REGION_NOT_WORK_REGION',
 'LIVE_REGION_NOT_WORK_REGION',
 'REG_CITY_NOT_LIVE_CITY',
 'REG_CITY_NOT_WORK_CITY',
 'LIVE_CITY_NOT_WORK_CITY',
 'DEF_60_CNT_SOCIAL_CIRCLE',
 'FLAG_DOCUMENT_2',
 'FLAG_DOCUMENT_3',
 'FLAG_DOCUMENT_4',
 'FLAG_DOCUMENT_5',
 'FLAG_DOCUMENT_6',
 'FLAG_DOCUMENT_7',
 'FLAG_DOCUMENT_8',
 'FLAG_DOCUMENT_9',
 'FLAG_DOCUMENT_10',
 'FLAG_DOCUMENT_11',
 'FLAG_DOCUMENT_12',
 'FLAG_DOCUMENT_13',
 'FLAG_DOCUMENT_14',
 'FLAG_DOCUMENT_15',
 'FLAG_DOCUMENT_16',
 'FLAG_DOCUMENT_17',
 'FLAG_DOCUMENT_18',
 'FLAG_DOCUMENT_19',
 'FLAG_DOCUMENT_20',
 'FLAG_DOCUMENT_21',
 'AMT_REQ_CREDIT_BUREAU_HOUR',
 'AMT_REQ_CREDIT_BUREAU_DAY',
 'AMT_REQ_CREDIT_BUREAU_WEEK']

In [27]:
def cat_summary(dataframe,col_name,plot=False):
    print("#####################################")
    print(pd.DataFrame({col_name: dataframe[col_name].value_counts(),
                         "ratio": 100* dataframe[col_name].value_counts()/len(dataframe)}))
    print("#####################################")
    if plot:
        sns.countplot(x=dataframe[col_name],data=dataframe)
        plt.show()

In [28]:
for col in cat_cols:
    cat_summary(df,col)

#####################################
                        ORGANIZATION_TYPE  ratio
Business Entity Type 3              67992 22.110
XNA                                 55374 18.007
Self-employed                       38412 12.491
Other                               16683  5.425
Medicine                            11193  3.640
Business Entity Type 2              10553  3.432
Government                          10404  3.383
School                               8893  2.892
Trade: type 7                        7831  2.547
Kindergarten                         6880  2.237
Construction                         6721  2.186
Business Entity Type 1               5984  1.946
Transport: type 4                    5398  1.755
Trade: type 3                        3492  1.136
Industry: type 9                     3368  1.095
Industry: type 3                     3278  1.066
Security                             3247  1.056
Housing                              2958  0.962
Industry: type 11              

In [29]:
df.groupby("NAME_INCOME_TYPE")["TARGET"].mean()

NAME_INCOME_TYPE
Businessman            0.000
Commercial associate   0.075
Maternity leave        0.400
Pensioner              0.054
State servant          0.058
Student                0.000
Unemployed             0.364
Working                0.096
Name: TARGET, dtype: float64

In [30]:
def rare_analyser(dataframe,target,cat_cols):
    for col in cat_cols:
        print(col,":",len(dataframe[col].value_counts()))
        print(pd.DataFrame({"COUNT": dataframe[col].value_counts(),
                            "RATIO": dataframe[col].value_counts() / len(dataframe),
                            "TARGET": dataframe.groupby(col)[target].mean()
                            }),end="\n\n\n")

In [31]:
rare_analyser(df,"TARGET",cat_cols)

ORGANIZATION_TYPE : 58
                        COUNT  RATIO  TARGET
Advertising               429  0.001   0.082
Agriculture              2454  0.008   0.105
Bank                     2507  0.008   0.052
Business Entity Type 1   5984  0.019   0.081
Business Entity Type 2  10553  0.034   0.085
Business Entity Type 3  67992  0.221   0.093
Cleaning                  260  0.001   0.112
Construction             6721  0.022   0.117
Culture                   379  0.001   0.055
Electricity               950  0.003   0.066
Emergency                 560  0.002   0.071
Government              10404  0.034   0.070
Hotel                     966  0.003   0.064
Housing                  2958  0.010   0.079
Industry: type 1         1039  0.003   0.111
Industry: type 10         109  0.000   0.064
Industry: type 11        2704  0.009   0.087
Industry: type 12         369  0.001   0.038
Industry: type 13          67  0.000   0.134
Industry: type 2          458  0.001   0.072
Industry: type 3         3278  0

In [32]:
def rare_encoder(dataframe,rare_perc):
    tempt_df=dataframe.copy()
    rare_columns = [col for col in tempt_df.columns if tempt_df[col].dtype == "o"
     and (temp_df[col].value_counts()/len(tempt_def) < rare_perc).any(axis=None)]
    for var in rare_columns:
        tmp = tempt_df[var].value_counts() / len(tempt_df)
        rare_labels = tmp[tmp<rare_perc].index
        tempt_df[var] = np.where(tempt_df[var].isin(rare_labels),'Rare',tempt_df[var])
        return tempt_df

### Standard Scaler

In [33]:
df = load()
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.283,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [34]:
ss = StandardScaler()
df["Age_standard_Scaler"] = ss.fit_transform(df[["Age"]])

In [35]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Age_standard_Scaler
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,-0.53
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.283,C85,C,0.572
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,-0.255
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,0.365
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,0.365


### RobustScaler 

In [36]:
rs = RobustScaler()
df["Age_robust_Scaler"] = rs.fit_transform(df[["Age"]])
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
PassengerId,891.0,446.0,257.354,1.0,223.5,446.0,668.5,891.0
Survived,891.0,0.384,0.487,0.0,0.0,0.0,1.0,1.0
Pclass,891.0,2.309,0.836,1.0,2.0,3.0,3.0,3.0
Age,714.0,29.699,14.526,0.42,20.125,28.0,38.0,80.0
SibSp,891.0,0.523,1.103,0.0,0.0,0.0,1.0,8.0
Parch,891.0,0.382,0.806,0.0,0.0,0.0,0.0,6.0
Fare,891.0,32.204,49.693,0.0,7.91,14.454,31.0,512.329
Age_standard_Scaler,714.0,0.0,1.001,-2.017,-0.66,-0.117,0.572,3.465
Age_robust_Scaler,714.0,0.095,0.813,-1.543,-0.441,0.0,0.559,2.909


### MinMaxScaler

In [37]:
mms = MinMaxScaler()
df["Age_min_max_scaler"] = mms.fit_transform(df[["Age"]])
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
PassengerId,891.0,446.0,257.354,1.0,223.5,446.0,668.5,891.0
Survived,891.0,0.384,0.487,0.0,0.0,0.0,1.0,1.0
Pclass,891.0,2.309,0.836,1.0,2.0,3.0,3.0,3.0
Age,714.0,29.699,14.526,0.42,20.125,28.0,38.0,80.0
SibSp,891.0,0.523,1.103,0.0,0.0,0.0,1.0,8.0
Parch,891.0,0.382,0.806,0.0,0.0,0.0,0.0,6.0
Fare,891.0,32.204,49.693,0.0,7.91,14.454,31.0,512.329
Age_standard_Scaler,714.0,0.0,1.001,-2.017,-0.66,-0.117,0.572,3.465
Age_robust_Scaler,714.0,0.095,0.813,-1.543,-0.441,0.0,0.559,2.909
Age_min_max_scaler,714.0,0.368,0.183,0.0,0.248,0.347,0.472,1.0


## Numeric to Categorical

In [38]:
df["Age_cut"] = pd.qcut(df["Age"],5)