In [175]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

from sklearn.preprocessing import KBinsDiscretizer #For Binning
from sklearn.compose import ColumnTransformer

In [176]:
df=pd.read_csv("train.csv",usecols=["Age","Fare","Survived"])
df.dropna(inplace=True)
df.sample(3)

Unnamed: 0,Survived,Age,Fare
745,0,70.0,71.0
816,0,23.0,7.925
56,1,21.0,10.5


# Train Test Split

In [177]:
X=df.iloc[:,1:]
Y=df.iloc[:,0]
X_train, X_test, y_train, y_test= train_test_split(X,Y, test_size=0.2,random_state=42)

# Training without Binning

In [178]:
clf=DecisionTreeClassifier()

clf.fit(X_train, y_train)

y_pred=clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.6223776223776224

In [179]:
#Cross Value Score
np.mean(cross_val_score(clf,X,Y,cv=200,scoring="accuracy"))

np.float64(0.6325)

# Applying Binning

In [180]:
# Applying Binning on our train columns
# Strategy (uniform, quantile, kmeans)
trf=ColumnTransformer(
    [
        ('first',KBinsDiscretizer(n_bins=10,encode="ordinal",strategy="quantile"),[0]),
        ('second',KBinsDiscretizer(n_bins=10,encode="ordinal",strategy="quantile"),[1])
    ]
)

In [181]:
X_train_transformed=trf.fit_transform(X_train)
X_test_transformed=trf.transform(X_test)

# Training After Binning

In [182]:
clf=DecisionTreeClassifier()
clf.fit(X_train_transformed,y_train)

y_pred1=clf.predict(X_test_transformed)

accuracy_score(y_test, y_pred1)

0.6223776223776224

In [183]:
X_trans=trf.fit_transform(X)
np.mean(cross_val_score(clf,X_trans,Y,cv=200,scoring="accuracy"))

np.float64(0.6929166666666666)