In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('CKD.csv')

In [3]:
dataset=pd.get_dummies(dataset,drop_first=True)

In [4]:
dataset


Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3,0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.000000,76.459948,2,0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,1,0,0,0,0,0,1,0,0,1
2,4.000000,76.459948,1,0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,1,0,0,0,0,0,1,0,0,1
3,5.000000,76.459948,1,0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,1,0,0,0,0,0,1,0,1,1
4,5.000000,50.000000,0,0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,1,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0,0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,1,0,0,0,0,0,1,0,0,1
395,51.492308,70.000000,0,2,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,1,0,0,1,1,0,1,0,1,1
396,51.492308,70.000000,3,0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,1,0,0,1,1,0,0,0,0,1
397,51.492308,90.000000,0,0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,1,0,0,1,1,0,1,0,1,1


In [5]:

indep=dataset.iloc[:,0:27]
dep=dataset['classification_yes']

In [6]:
dataset['classification_yes'].value_counts()

1    249
0    150
Name: classification_yes, dtype: int64

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_= sc.fit_transform(X_train)
X_test_= sc.transform(X_test)

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

param_grid = {'solver':['newton-cg', 'lbfgs', 'liblinear', 'saga'],
             'penalty':['l2']} 
grid = GridSearchCV(LogisticRegression(), param_grid, refit = True, verbose = 3,n_jobs=-1,scoring='f1_weighted') 
   
grid.fit(X_train, y_train) 
 


Fitting 5 folds for each of 4 candidates, totalling 20 fits


In [9]:
re=grid.cv_results_
grid_predictions = grid.predict(X_test) 


In [10]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, grid_predictions)
print(cm)


# print classification report 
from sklearn.metrics import classification_report
clf_report = classification_report(y_test, grid_predictions)
print(clf_report)


[[49  2]
 [ 0 82]]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98        51
           1       0.98      1.00      0.99        82

    accuracy                           0.98       133
   macro avg       0.99      0.98      0.98       133
weighted avg       0.99      0.98      0.98       133



In [11]:
from sklearn.metrics import accuracy_score
score=accuracy_score(y_test,grid_predictions)
print(score)

0.9849624060150376


In [12]:
from sklearn.metrics import f1_score
f1=f1_score(y_test, grid_predictions, average='macro')
print(f1)

0.9839759036144577


In [13]:
from sklearn.metrics import roc_auc_score
rec=roc_auc_score(y_test, grid.predict_proba(X_test)[:, 1])
print(rec)

0.9992826398852224


In [14]:
re=grid.cv_results_
table=pd.DataFrame.from_dict(re)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.114013,0.012783,0.012506,0.006253,l2,newton-cg,"{'penalty': 'l2', 'solver': 'newton-cg'}",0.981569,0.981014,0.962573,0.943093,0.981031,0.969856,0.015204,2
1,0.041493,0.007815,0.006855,0.003963,l2,lbfgs,"{'penalty': 'l2', 'solver': 'lbfgs'}",0.869648,0.905069,0.906705,0.906085,0.885478,0.894597,0.014791,3
2,0.00851,0.001847,0.005711,0.000512,l2,liblinear,"{'penalty': 'l2', 'solver': 'liblinear'}",0.981569,0.981014,0.962573,0.962264,1.0,0.977484,0.014072,1
3,0.013684,0.002432,0.00389,0.003267,l2,saga,"{'penalty': 'l2', 'solver': 'saga'}",0.486532,0.50141,0.477841,0.477841,0.477841,0.484293,0.009196,4


In [15]:
dataset.columns


Index(['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes'],
      dtype='object')

In [17]:
Future_Prediction=grid.predict([52,100,0,0,0,0,1,1,1,7,85,6])# change the paramter,play with it.
print("Future_Prediction={}".format(Future_Prediction))




ValueError: Expected 2D array, got 1D array instead:
array=[ 52 100   0   0   0   0   1   1   1   7  85   6].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.