In [None]:
import numpy as np
import pandas as pd

In [None]:
dataset = pd.read_csv("sample_data/kidney_disease.csv")

In [None]:
dataset.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,121.0,36.0,1.2,,,15.4,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,,18.0,0.8,,,11.3,38,6000,,no,no,no,good,no,no,ckd
2,2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,423.0,53.0,1.8,,,9.6,31,7500,,no,yes,no,poor,no,yes,ckd
3,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,117.0,56.0,3.8,111.0,2.5,11.2,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
4,4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,106.0,26.0,1.4,,,11.6,35,7300,4.6,no,no,no,good,no,no,ckd


In [None]:
dataset.shape

(400, 26)

In [None]:
dataset[['htn','dm','cad','pe','ane']]=dataset[['htn','dm','cad','pe','ane']].replace(to_replace={'yes':1,'no':0})
dataset[['rbc','pc']] = dataset[['rbc','pc']].replace(to_replace={'abnormal':1,'normal':0})
dataset[['pcc','ba']] = dataset[['pcc','ba']].replace(to_replace={'present':1,'notpresent':0})
dataset[['appet']] = dataset[['appet']].replace(to_replace={'good':1,'poor':0,'no':np.nan})
dataset['classification']=dataset['classification'].replace(to_replace={'ckd':1.0,'ckd\t':1.0,'notckd':0.0,'no':0.0})
dataset.rename(columns={'classification':'class'},inplace=True)

In [None]:
# # Further cleaning
dataset['pe'] = dataset['pe'].replace(to_replace='good',value=0) # Not having pedal edema is good
dataset['appet'] = dataset['appet'].replace(to_replace='no',value=0)
dataset['cad'] = dataset['cad'].replace(to_replace='\tno',value=0)
dataset['dm'] = dataset['dm'].replace(to_replace={'\tno':0,'\tyes':1,' yes':1, '':np.nan})
dataset.drop('id',axis=1,inplace=True)

In [None]:
# '?' character remove process in the dataset
for i in ['rc','wc','pcv']:
    dataset[i] = dataset[i].str.extract('(\d+)').astype(float)

In [None]:
# Filling missing numeric data in the dataset with mean
for i in ['age','bp','sg','al','su','bgr','bu','sc','sod','pot','hemo','rc','wc','pcv']:
    dataset[i].fillna(dataset[i].mean(),inplace=True)

In [None]:
dataset.isna().sum()

age        0
bp         0
sg         0
al         0
su         0
rbc      152
pc        65
pcc        4
ba         4
bgr        0
bu         0
sc         0
sod        0
pot        0
hemo       0
pcv        0
wc         0
rc         0
htn        2
dm         2
cad        2
appet      1
pe         1
ane        1
class      0
dtype: int64

In [None]:
from sklearn.impute import KNNImputer

In [None]:
cat_variables = dataset[['rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane']]
cat_dummies = pd.get_dummies(cat_variables, drop_first=True)
cat_dummies

Unnamed: 0,rbc,pc,pcc,ba,htn,dm,cad,appet,pe,ane
0,,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
1,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
395,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
398,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [None]:
cat_dummies.isna().sum()

rbc      152
pc        65
pcc        4
ba         4
htn        2
dm         2
cad        2
appet      1
pe         1
ane        1
dtype: int64

In [None]:
dataset = dataset.drop(['rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane'], axis=1)
dataset = pd.concat([dataset, cat_dummies], axis=1)
dataset

Unnamed: 0,age,bp,sg,al,su,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,class,rbc,pc,pcc,ba,htn,dm,cad,appet,pe,ane
0,48.0,80.0,1.020,1.0,0.0,121.000000,36.0,1.2,137.528754,4.627244,15.4,44.0,7800.0,5.000000,1.0,,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
1,7.0,50.0,1.020,4.0,0.0,148.036517,18.0,0.8,137.528754,4.627244,11.3,38.0,6000.0,4.241636,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,62.0,80.0,1.010,2.0,3.0,423.000000,53.0,1.8,137.528754,4.627244,9.6,31.0,7500.0,4.241636,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,48.0,70.0,1.005,4.0,0.0,117.000000,56.0,3.8,111.000000,2.500000,11.2,32.0,6700.0,3.000000,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0
4,51.0,80.0,1.010,2.0,0.0,106.000000,26.0,1.4,137.528754,4.627244,11.6,35.0,7300.0,4.000000,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,55.0,80.0,1.020,0.0,0.0,140.000000,49.0,0.5,150.000000,4.900000,15.7,47.0,6700.0,4.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
396,42.0,70.0,1.025,0.0,0.0,75.000000,31.0,1.2,141.000000,3.500000,16.5,54.0,7800.0,6.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
397,12.0,80.0,1.020,0.0,0.0,100.000000,26.0,0.6,137.000000,4.400000,15.8,49.0,6600.0,5.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
398,17.0,60.0,1.025,0.0,0.0,114.000000,50.0,1.0,135.000000,4.900000,14.2,51.0,7200.0,5.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
dataset = pd.DataFrame(scaler.fit_transform(dataset), columns = dataset.columns)
dataset.head()

Unnamed: 0,age,bp,sg,al,su,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,class,rbc,pc,pcc,ba,htn,dm,cad,appet,pe,ane
0,0.522727,0.230769,0.75,0.2,0.0,0.211538,0.088575,0.010582,0.839298,0.047803,0.836735,0.777778,0.231405,0.5,1.0,,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
1,0.056818,0.0,0.75,0.8,0.0,0.269309,0.042362,0.005291,0.839298,0.047803,0.557823,0.644444,0.157025,0.373606,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.681818,0.230769,0.25,0.4,0.6,0.856838,0.132221,0.018519,0.839298,0.047803,0.442177,0.488889,0.219008,0.373606,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,0.522727,0.153846,0.0,0.8,0.0,0.202991,0.139923,0.044974,0.671924,0.0,0.55102,0.511111,0.18595,0.166667,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.556818,0.230769,0.25,0.4,0.0,0.179487,0.062901,0.013228,0.839298,0.047803,0.578231,0.577778,0.210744,0.333333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [None]:
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=5)
dataset = pd.DataFrame(imputer.fit_transform(dataset),columns = dataset.columns)

In [None]:
dataset.isna().sum()

age      0
bp       0
sg       0
al       0
su       0
bgr      0
bu       0
sc       0
sod      0
pot      0
hemo     0
pcv      0
wc       0
rc       0
class    0
rbc      0
pc       0
pcc      0
ba       0
htn      0
dm       0
cad      0
appet    0
pe       0
ane      0
dtype: int64

In [None]:
dataset

Unnamed: 0,age,bp,sg,al,su,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,class,rbc,pc,pcc,ba,htn,dm,cad,appet,pe,ane
0,0.522727,0.230769,0.75,0.2,0.0,0.211538,0.088575,0.010582,0.839298,0.047803,0.836735,0.777778,0.231405,0.500000,1.0,0.2,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0
1,0.056818,0.000000,0.75,0.8,0.0,0.269309,0.042362,0.005291,0.839298,0.047803,0.557823,0.644444,0.157025,0.373606,1.0,0.4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.681818,0.230769,0.25,0.4,0.6,0.856838,0.132221,0.018519,0.839298,0.047803,0.442177,0.488889,0.219008,0.373606,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,0.522727,0.153846,0.00,0.8,0.0,0.202991,0.139923,0.044974,0.671924,0.000000,0.551020,0.511111,0.185950,0.166667,1.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0
4,0.556818,0.230769,0.25,0.4,0.0,0.179487,0.062901,0.013228,0.839298,0.047803,0.578231,0.577778,0.210744,0.333333,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.602273,0.230769,0.75,0.0,0.0,0.252137,0.121951,0.001323,0.917981,0.053933,0.857143,0.844444,0.185950,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
396,0.454545,0.153846,1.00,0.0,0.0,0.113248,0.075738,0.010582,0.861199,0.022472,0.911565,1.000000,0.231405,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
397,0.113636,0.230769,0.75,0.0,0.0,0.166667,0.062901,0.002646,0.835962,0.042697,0.863946,0.888889,0.181818,0.500000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
398,0.170455,0.076923,1.00,0.0,0.0,0.196581,0.124519,0.007937,0.823344,0.053933,0.755102,0.933333,0.206612,0.500000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [None]:
dataset.to_csv('data_CKD.csv', index=False)

In [None]:
X = dataset.drop('class', 1)
y = dataset['class']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
#from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_predict = rfc.predict(X_test)
accuracy_score(y_test, y_predict)

1.0

In [1]:
feature_ranking = pd.DataFrame(rfc.feature_importances_, index=X.columns, columns=['importance']).sort_values('importance', ascending=False)
feature_ranking

NameError: ignored

In [None]:
abc = AdaBoostClassifier()
abc.fit(X_train, y_train)
y_predict = abc.predict(X_test)
accuracy_score(y_test, y_predict)

1.0

In [None]:
feature_ranking = pd.DataFrame(abc.feature_importances_, index=X.columns, columns=['importance']).sort_values('importance', ascending=False)
feature_ranking

Unnamed: 0,importance
al,0.18
bgr,0.16
rbc,0.12
sod,0.1
hemo,0.08
pcv,0.08
age,0.06
sg,0.06
pe,0.04
bu,0.04


In [None]:
data=pd.read_csv("sample_data/data_CKD.csv")

In [None]:
data

Unnamed: 0,age,bp,sg,al,su,bgr,bu,sc,sod,pot,hemo,pcv,wc,rc,rbc,pc,pcc,ba,htn,dm,cad,appet,pe,ane,class
0,0.522727,0.230769,0.75,0.2,0.0,0.211538,0.088575,0.010582,0.839298,0.047803,0.836735,0.777778,0.231405,0.500000,0.2,0.0,0,0,1,1,0,1,0,0,1
1,0.056818,0.000000,0.75,0.8,0.0,0.269309,0.042362,0.005291,0.839298,0.047803,0.557823,0.644444,0.157025,0.373606,0.4,0.0,0,0,0,0,0,1,0,0,1
2,0.681818,0.230769,0.25,0.4,0.6,0.856838,0.132221,0.018519,0.839298,0.047803,0.442177,0.488889,0.219008,0.373606,0.0,0.0,0,0,0,1,0,0,0,1,1
3,0.522727,0.153846,0.00,0.8,0.0,0.202991,0.139923,0.044974,0.671924,0.000000,0.551020,0.511111,0.185950,0.166667,0.0,1.0,1,0,1,0,0,0,1,1,1
4,0.556818,0.230769,0.25,0.4,0.0,0.179487,0.062901,0.013228,0.839298,0.047803,0.578231,0.577778,0.210744,0.333333,0.0,0.0,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.602273,0.230769,0.75,0.0,0.0,0.252137,0.121951,0.001323,0.917981,0.053933,0.857143,0.844444,0.185950,0.333333,0.0,0.0,0,0,0,0,0,1,0,0,0
396,0.454545,0.153846,1.00,0.0,0.0,0.113248,0.075738,0.010582,0.861199,0.022472,0.911565,1.000000,0.231405,0.666667,0.0,0.0,0,0,0,0,0,1,0,0,0
397,0.113636,0.230769,0.75,0.0,0.0,0.166667,0.062901,0.002646,0.835962,0.042697,0.863946,0.888889,0.181818,0.500000,0.0,0.0,0,0,0,0,0,1,0,0,0
398,0.170455,0.076923,1.00,0.0,0.0,0.196581,0.124519,0.007937,0.823344,0.053933,0.755102,0.933333,0.206612,0.500000,0.0,0.0,0,0,0,0,0,1,0,0,0


In [None]:
X=data.drop("class",1)

In [None]:
y=data['class']

In [None]:
def forward_selection(data, target, significance_level=0.05):
    initial_features = data.columns.tolist()
    best_features = []
    while (len(initial_features)>0):
        remaining_features = list(set(initial_features)-set(best_features))
        new_pval = pd.Series(index=remaining_features)
        for new_column in remaining_features:
            model = sm.OLS(target, sm.add_constant(data[best_features+[new_column]])).fit()
            new_pval[new_column] = model.pvalues[new_column]
        min_p_value = new_pval.min()
        if(min_p_value<significance_level):
            best_features.append(new_pval.idxmin())
        else:
            break
    return best_features

In [None]:
forward_selection(X,y)

In [None]:
#importing the necessary libraries
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.linear_model import LinearRegression
# Sequential Forward Selection(sfs)
sfs = SFS(LinearRegression(),
          k_features=24,
          forward=True,
          floating=False,
          scoring = 'r2',
          cv = 0)

In [None]:
sfs.fit(X, y)
sfs.k_feature_names_ 

('age',
 'bp',
 'sg',
 'al',
 'su',
 'bgr',
 'bu',
 'sc',
 'sod',
 'pot',
 'hemo',
 'pcv',
 'wc',
 'rc',
 'rbc',
 'pc',
 'pcc',
 'ba',
 'htn',
 'dm',
 'cad',
 'appet',
 'pe',
 'ane')

In [None]:
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import LinearRegression,LogisticRegression
#FOR REGRESSION MODEL
feature_select = SequentialFeatureSelector(LinearRegression(),
                                           k_features=24,
                                           forward=True,
                                           floating=False,
                                           scoring='r2',
                                           cv=0)

In [None]:
feature_select.fit(X,y)
feature_select.k_feature_names_

('age',
 'bp',
 'sg',
 'al',
 'su',
 'bgr',
 'bu',
 'sc',
 'sod',
 'pot',
 'hemo',
 'pcv',
 'wc',
 'rc',
 'rbc',
 'pc',
 'pcc',
 'ba',
 'htn',
 'dm',
 'cad',
 'appet',
 'pe',
 'ane')

In [None]:
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import LinearRegression,LogisticRegression
#FOR REGRESSION MODEL
lr=LinearRegression()
ffs = SequentialFeatureSelector(lr, k_features='best',forward=True,n_jobs=-1)
ffs.fit(X,y)
features=list(ffs.k_feature_names_)
#features=list(map(features))
lr.fit(x_train[features],y_train)
y_pred=lr.predict(x_train[features])

NameError: ignored

In [None]:
from mlxtend.feature_selection import SequentialFeatureSelector
from sklearn.linear_model import LinearRegression,LogisticRegression
feature_select = SequentialFeatureSelector(LogisticRegression(),
                                           k_features=6,
                                           forward=True,
                                           floating=False,
                                           scoring='roc_auc',
                                           cv=0)
feature_select.fit(X,y)
feature_select.k_feature_names_


('age', 'bp', 'al', 'bgr', 'hemo', 'rbc')

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso

In [None]:
from sklearn.datasets import load_diabetes
X,y = load_diabetes(return_X_y=True)

features = load_diabetes()['feature_names']

In [None]:
features

['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']

In [None]:
features = data['feature_names']

In [None]:
X = data.drop('class', 1)
y = data['class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
pipeline = Pipeline([
                     ('scaler',StandardScaler()),
                     ('model',Lasso())
])

In [None]:
search = GridSearchCV(pipeline,
                      {'model__alpha':np.arange(0.1,10,0.1)},
                      cv = 5, scoring="neg_mean_squared_error",verbose=3
                      )

In [None]:
search.fit(X_train,y_train)

Fitting 5 folds for each of 99 candidates, totalling 495 fits
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.081, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.080, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.065, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.067, total=   0.0s
[CV] model__alpha=0.1 ................................................
[CV] ................... model__alpha=0.1, score=-0.076, total=   0.0s
[CV] model__alpha=0.2 ................................................
[CV] ................... model__alpha=0.2, score=-0.121, total=   0.0s
[CV] model__alpha=0.2 ................................................
[CV] ..........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


[CV] ................... model__alpha=0.5, score=-0.245, total=   0.0s
[CV] model__alpha=0.5 ................................................
[CV] ................... model__alpha=0.5, score=-0.250, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-0.217, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-0.259, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-0.222, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-0.245, total=   0.0s
[CV] model__alpha=0.6 ................................................
[CV] ................... model__alpha=0.6, score=-0.250, total=   0.0s
[CV] model__alpha=0.7000000000000001 .................................
[CV] .

[Parallel(n_jobs=1)]: Done 495 out of 495 | elapsed:    4.0s finished


GridSearchCV(cv=5, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('scaler',
                                        StandardScaler(copy=True,
                                                       with_mean=True,
                                                       with_std=True)),
                                       ('model',
                                        Lasso(alpha=1.0, copy_X=True,
                                              fit_intercept=True, max_iter=1000,
                                              normalize=False, positive=False,
                                              precompute=False,
                                              random_state=None,
                                              selection='cyclic', tol=0.0001,
                                              warm_start=False))],
                                verbose=False),
             iid='deprec...
       2.7, 2.8, 2.9, 3. , 3.1,

In [None]:
search.best_params_

{'model__alpha': 0.1}

In [None]:
coefficients = search.best_estimator_.named_steps['model'].coef_

In [None]:
importance = np.abs(coefficients)

In [None]:
importance

array([0.        , 0.        , 0.13625293, 0.03343845, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.09505082, 0.        , 0.        , 0.        , 0.07936907,
       0.        , 0.        , 0.        , 0.02225684, 0.02492131,
       0.        , 0.        , 0.        , 0.        ])

In [None]:
np.array(features)[importance > 0]

array(['sg', 'al', 'hemo', 'rbc', 'htn', 'dm'], dtype=object)

In [None]:
features = data.columns[:-1]

In [None]:
features=['age','bp','sg', 'al', 'su','bgr','bu','sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane']