# Importing Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
kidney_data = pd.read_csv('Kidney disease data.csv')

In [3]:
kidney_data.head()

Unnamed: 0,Bp,Sg,Al,Su,Rbc,Bu,Sc,Sod,Pot,Hemo,Wbcc,Rbcc,Htn,Class
0,80,1.02,1,0,1,36.0,1.2,137.53,4.63,15.4,7800,5.2,1.0,1
1,50,1.02,4,0,1,18.0,0.8,137.53,4.63,11.3,6000,4.71,0.0,1
2,80,1.01,2,3,1,53.0,1.8,137.53,4.63,9.6,7500,4.71,0.0,1
3,70,1.005,4,0,1,56.0,3.8,111.0,2.5,11.2,6700,3.9,1.0,1
4,80,1.01,2,0,1,26.0,1.4,137.53,4.63,11.6,7300,4.6,0.0,1


In [4]:
kidney_data.drop(columns='Rbc',axis=1,inplace=True)

In [5]:
kidney_data.rename(columns = {'Class':'Target'},inplace=True)

In [6]:
kidney_data.head()

Unnamed: 0,Bp,Sg,Al,Su,Bu,Sc,Sod,Pot,Hemo,Wbcc,Rbcc,Htn,Target
0,80,1.02,1,0,36.0,1.2,137.53,4.63,15.4,7800,5.2,1.0,1
1,50,1.02,4,0,18.0,0.8,137.53,4.63,11.3,6000,4.71,0.0,1
2,80,1.01,2,3,53.0,1.8,137.53,4.63,9.6,7500,4.71,0.0,1
3,70,1.005,4,0,56.0,3.8,111.0,2.5,11.2,6700,3.9,1.0,1
4,80,1.01,2,0,26.0,1.4,137.53,4.63,11.6,7300,4.6,0.0,1


In [7]:
kidney_data.shape

(400, 13)

In [8]:
kidney_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Bp      400 non-null    int64  
 1   Sg      400 non-null    float64
 2   Al      400 non-null    int64  
 3   Su      400 non-null    int64  
 4   Bu      400 non-null    float64
 5   Sc      400 non-null    float64
 6   Sod     400 non-null    float64
 7   Pot     400 non-null    float64
 8   Hemo    400 non-null    float64
 9   Wbcc    400 non-null    int64  
 10  Rbcc    400 non-null    float64
 11  Htn     400 non-null    float64
 12  Target  400 non-null    int64  
dtypes: float64(8), int64(5)
memory usage: 40.8 KB


In [9]:
kidney_data.isnull().sum()

Bp        0
Sg        0
Al        0
Su        0
Bu        0
Sc        0
Sod       0
Pot       0
Hemo      0
Wbcc      0
Rbcc      0
Htn       0
Target    0
dtype: int64

In [10]:
kidney_data['Target'].value_counts()

1    250
0    150
Name: Target, dtype: int64

1 -->  Having chronic kidney disease

2 -->  Not having chronic kidney disease

# Splitting the features and target

In [11]:
x = kidney_data.drop(columns="Target",axis=1)
y = kidney_data["Target"]

In [12]:
x

Unnamed: 0,Bp,Sg,Al,Su,Bu,Sc,Sod,Pot,Hemo,Wbcc,Rbcc,Htn
0,80,1.020,1,0,36.0,1.2,137.53,4.63,15.4,7800,5.20,1.0
1,50,1.020,4,0,18.0,0.8,137.53,4.63,11.3,6000,4.71,0.0
2,80,1.010,2,3,53.0,1.8,137.53,4.63,9.6,7500,4.71,0.0
3,70,1.005,4,0,56.0,3.8,111.00,2.50,11.2,6700,3.90,1.0
4,80,1.010,2,0,26.0,1.4,137.53,4.63,11.6,7300,4.60,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
395,80,1.020,0,0,49.0,0.5,150.00,4.90,15.7,6700,4.90,0.0
396,70,1.025,0,0,31.0,1.2,141.00,3.50,16.5,7800,6.20,0.0
397,80,1.020,0,0,26.0,0.6,137.00,4.40,15.8,6600,5.40,0.0
398,60,1.025,0,0,50.0,1.0,135.00,4.90,14.2,7200,5.90,0.0


In [13]:
y

0      1
1      1
2      1
3      1
4      1
      ..
395    0
396    0
397    0
398    0
399    0
Name: Target, Length: 400, dtype: int64

 # Splitting the Training data and Test data

In [14]:
x_train,x_test,y_train,_y_test = train_test_split(x,y,test_size=0.2,stratify=y,random_state=2)

In [15]:
print(x.shape,x_train.shape,x_test.shape)

(400, 12) (320, 12) (80, 12)


# Model training Logistic regression model

In [16]:
model = LogisticRegression()

In [17]:
#training the logistic regression model with training data
model.fit(x_train.values , y_train)

LogisticRegression()

Evaluation

Accuracy score

In [18]:
#accuracy on training data
x_train_prediction=model.predict(x_train)
training_data_accuracy=accuracy_score(x_train_prediction,y_train) 

In [19]:
print("Accuracy on training data : ",training_data_accuracy)

Accuracy on training data :  0.96875


# Building Predictive system

In [20]:
input_data = (80,1.02,1,0,36,1.2,137.53,4.63,15.4,7800,5.2,1)

#change the input data to a numpy array
input_data_array=np.asarray(input_data)

#reshape the numpy array as we are predicting for one instance
input_data_reshaped=input_data_array.reshape(1,-1)

prediction = model.predict(input_data_reshaped)

if (prediction[0]==0):
    print("The person does not have a Kidney disease")
else:
    print("The person has a Kidney disease")

The person does not have a Kidney disease


# Saving the trained model

In [21]:
import pickle

In [22]:
filename = 'kidneydisease_model.sav'
pickle.dump(model, open(filename,'wb'))

In [23]:
#loading the saved model
loaded_model = pickle.load(open('heartdisease_model.sav','rb'))

In [24]:
for column in x.columns:
    print(column)

Bp
Sg
Al
Su
Bu
Sc
Sod
Pot
Hemo
Wbcc
Rbcc
Htn


# Thankyou