# Chronic Kidney Disease Prediction

In [1]:
import numpy as np
import pandas as pd
from scipy.io import arff

In [2]:
df = pd.read_csv('./data/chronic_kidney_disease_cleaned.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 26 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  158 non-null    int64  
 1   age         158 non-null    float64
 2   bp          158 non-null    float64
 3   sg          158 non-null    float64
 4   al          158 non-null    float64
 5   su          158 non-null    float64
 6   rbc         158 non-null    object 
 7   pc          158 non-null    object 
 8   pcc         158 non-null    object 
 9   ba          158 non-null    object 
 10  bgr         158 non-null    float64
 11  bu          158 non-null    float64
 12  sc          158 non-null    float64
 13  sod         158 non-null    float64
 14  pot         158 non-null    float64
 15  hemo        158 non-null    float64
 16  pcv         158 non-null    float64
 17  wbcc        158 non-null    float64
 18  rbcc        158 non-null    float64
 19  htn         158 non-null    o

In [12]:
df.shape

(158, 26)

In [4]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
0,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,32.0,6700.0,3.9,yes,no,no,poor,yes,yes,ckd
1,9,53.0,90.0,1.02,2.0,0.0,abnormal,abnormal,present,notpresent,...,29.0,12100.0,3.7,yes,yes,no,poor,no,yes,ckd
2,11,63.0,70.0,1.01,3.0,0.0,abnormal,abnormal,present,notpresent,...,32.0,4500.0,3.8,yes,yes,no,poor,yes,no,ckd
3,14,68.0,80.0,1.01,3.0,2.0,normal,abnormal,present,present,...,16.0,11000.0,2.6,yes,yes,yes,poor,yes,no,ckd
4,20,61.0,80.0,1.015,2.0,0.0,abnormal,abnormal,notpresent,notpresent,...,24.0,9200.0,3.2,yes,yes,yes,poor,yes,yes,ckd
5,22,48.0,80.0,1.025,4.0,0.0,normal,abnormal,notpresent,notpresent,...,32.0,6900.0,3.4,yes,no,no,good,no,yes,ckd
6,27,69.0,70.0,1.01,3.0,4.0,normal,abnormal,notpresent,notpresent,...,37.0,9600.0,4.1,yes,yes,yes,good,yes,no,ckd
7,48,73.0,70.0,1.005,0.0,0.0,normal,normal,notpresent,notpresent,...,29.0,18900.0,3.5,yes,yes,no,good,yes,no,ckd
8,58,73.0,80.0,1.02,2.0,0.0,abnormal,abnormal,notpresent,notpresent,...,33.0,7200.0,4.3,yes,yes,yes,good,no,no,ckd
9,71,46.0,60.0,1.01,1.0,0.0,normal,normal,notpresent,notpresent,...,28.0,14600.0,3.2,yes,yes,no,good,no,no,ckd


In [5]:
# Categorical columns (That specify different categories)
cat_cols = [col for col in df.columns if df[col].dtype == "object"]

# Numeric data columns
num_cols = [col for col in df.columns if df[col].dtype != "object"]

In [10]:
cat_cols

['rbc', 'pc', 'pcc', 'ba', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane', 'class']

In [11]:
num_cols

['Unnamed: 0',
 'age',
 'bp',
 'sg',
 'al',
 'su',
 'bgr',
 'bu',
 'sc',
 'sod',
 'pot',
 'hemo',
 'pcv',
 'wbcc',
 'rbcc']

In [6]:
# Checking for no. of categories in each of the categorical columns

for col in cat_cols:
    print(f"{col} has {df[col].unique()} values\n")

rbc has ['normal' 'abnormal'] values

pc has ['abnormal' 'normal'] values

pcc has ['present' 'notpresent'] values

ba has ['notpresent' 'present'] values

htn has ['yes' 'no'] values

dm has ['no' 'yes'] values

cad has ['no' 'yes'] values

appet has ['poor' 'good'] values

pe has ['yes' 'no'] values

ane has ['yes' 'no'] values

class has ['ckd' 'notckd'] values



In [7]:
# Converting class values to numeric class

df['class'] = df['class'].map({'ckd': 0, 'notckd': 1})
# df['class'] = pd.to_numeric(df['class'], errors='coerce')

In [8]:
# Checking for the total number of null values

df.isna().sum().sort_values(ascending = False)

Unnamed: 0    0
age           0
ane           0
pe            0
appet         0
cad           0
dm            0
htn           0
rbcc          0
wbcc          0
pcv           0
hemo          0
pot           0
sod           0
sc            0
bu            0
bgr           0
ba            0
pcc           0
pc            0
rbc           0
su            0
al            0
sg            0
bp            0
class         0
dtype: int64

In [9]:
df.head()

Unnamed: 0.1,Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
0,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,32.0,6700.0,3.9,yes,no,no,poor,yes,yes,0
1,9,53.0,90.0,1.02,2.0,0.0,abnormal,abnormal,present,notpresent,...,29.0,12100.0,3.7,yes,yes,no,poor,no,yes,0
2,11,63.0,70.0,1.01,3.0,0.0,abnormal,abnormal,present,notpresent,...,32.0,4500.0,3.8,yes,yes,no,poor,yes,no,0
3,14,68.0,80.0,1.01,3.0,2.0,normal,abnormal,present,present,...,16.0,11000.0,2.6,yes,yes,yes,poor,yes,no,0
4,20,61.0,80.0,1.015,2.0,0.0,abnormal,abnormal,notpresent,notpresent,...,24.0,9200.0,3.2,yes,yes,yes,poor,yes,yes,0


In [13]:
df.drop(columns=df.columns[0], axis=1, inplace=True)

In [14]:
df.head()

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
0,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,117.0,...,32.0,6700.0,3.9,yes,no,no,poor,yes,yes,0
1,53.0,90.0,1.02,2.0,0.0,abnormal,abnormal,present,notpresent,70.0,...,29.0,12100.0,3.7,yes,yes,no,poor,no,yes,0
2,63.0,70.0,1.01,3.0,0.0,abnormal,abnormal,present,notpresent,380.0,...,32.0,4500.0,3.8,yes,yes,no,poor,yes,no,0
3,68.0,80.0,1.01,3.0,2.0,normal,abnormal,present,present,157.0,...,16.0,11000.0,2.6,yes,yes,yes,poor,yes,no,0
4,61.0,80.0,1.015,2.0,0.0,abnormal,abnormal,notpresent,notpresent,173.0,...,24.0,9200.0,3.2,yes,yes,yes,poor,yes,yes,0


In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 25 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     158 non-null    float64
 1   bp      158 non-null    float64
 2   sg      158 non-null    float64
 3   al      158 non-null    float64
 4   su      158 non-null    float64
 5   rbc     158 non-null    object 
 6   pc      158 non-null    object 
 7   pcc     158 non-null    object 
 8   ba      158 non-null    object 
 9   bgr     158 non-null    float64
 10  bu      158 non-null    float64
 11  sc      158 non-null    float64
 12  sod     158 non-null    float64
 13  pot     158 non-null    float64
 14  hemo    158 non-null    float64
 15  pcv     158 non-null    float64
 16  wbcc    158 non-null    float64
 17  rbcc    158 non-null    float64
 18  htn     158 non-null    object 
 19  dm      158 non-null    object 
 20  cad     158 non-null    object 
 21  appet   158 non-null    object 
 22  pe