In [111]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline,make_pipeline

In [112]:
df=pd.read_csv('dataset/kidney_stone_data.csv')
df.head()

Unnamed: 0,treatment,stone_size,success
0,B,large,1
1,A,large,1
2,A,large,0
3,A,large,1
4,A,large,1


In [113]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 700 entries, 0 to 699
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   treatment   700 non-null    object
 1   stone_size  700 non-null    object
 2   success     700 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 16.5+ KB


In [114]:
df.describe()

Unnamed: 0,success
count,700.0
mean,0.802857
std,0.398126
min,0.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In [115]:
df.isnull().sum()

treatment     0
stone_size    0
success       0
dtype: int64

In [116]:
x=pd.DataFrame(df.iloc[:,:2])
x

Unnamed: 0,treatment,stone_size
0,B,large
1,A,large
2,A,large
3,A,large
4,A,large
...,...,...
695,B,small
696,B,small
697,B,small
698,A,large


In [117]:
y=df.iloc[:,-1]

In [118]:
y

0      1
1      1
2      0
3      1
4      1
      ..
695    0
696    1
697    1
698    1
699    1
Name: success, Length: 700, dtype: int64

In [119]:
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.8,random_state=2)

In [120]:
x_train.shape

(560, 2)

In [121]:
x_test.shape

(140, 2)

In [122]:
y_train.shape

(560,)

In [123]:
y_test.shape

(140,)

In [124]:
trf1=ColumnTransformer(transformers=[
    ('treat',OneHotEncoder(drop='first',sparse=True,handle_unknown='ignore'),[0])
],remainder='passthrough')
trf1

In [125]:
trf2=ColumnTransformer(transformers=[
    ('stone',OrdinalEncoder(categories=[['large', 'small']]),[1])
],remainder='passthrough')
trf2

In [126]:
df['stone_size'].unique()

array(['large', 'small'], dtype=object)

In [127]:
from sklearn.feature_selection import SelectKBest,chi2
sk=SelectKBest(score_func=chi2,k=2)

In [128]:

from sklearn.tree import DecisionTreeClassifier
dtc=DecisionTreeClassifier()

In [129]:


pipe=Pipeline([
    ('trf1',trf1),
    ('trf2',trf2),
    ('sk',sk),
    ('dtc',dtc)
])

In [130]:
pipe.fit(x_train,y_train)

In [131]:
pipe.named_steps

{'trf1': ColumnTransformer(remainder='passthrough',
                   transformers=[('treat',
                                  OneHotEncoder(drop='first',
                                                handle_unknown='ignore'),
                                  [0])]),
 'trf2': ColumnTransformer(remainder='passthrough',
                   transformers=[('stone',
                                  OrdinalEncoder(categories=[['large',
                                                              'small']]),
                                  [1])]),
 'sk': SelectKBest(k=2, score_func=<function chi2 at 0x00000240DC187160>),
 'dtc': DecisionTreeClassifier()}

In [132]:
from sklearn import set_config

In [133]:
set_config(display='diagram')

In [134]:
y_pred=pipe.predict(x_test)
y_pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [135]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.8142857142857143

In [136]:
from sklearn.model_selection import cross_val_score


In [137]:
cross_val_score(pipe,x_train,y_train,cv=5,scoring='accuracy').mean()

0.8

In [138]:
import pickle
pickle.dump(pipe,open('ksd.pkl','wb'))