<h1>Importing Libraries</h1>

: 

In [None]:

import pandas as pd #used for data manipulation
import numpy as np #used for numerical analysis
from collections import Counter as c #returns counts of classes
import matplotlib.pyplot as plt #used for data visualization
import seaborn as sns #used for data visualization
import missingno as msno #finding missing values
from sklearn.metrics import accuracy_score, confusion_matrix #model performance
from sklearn.model_selection import train_test_split #Splits data in random train and test array
from sklearn.preprocessing import LabelEncoder #encoding the levels of categorical featues
from sklearn.linear_model import LogisticRegression #Classification ML  Algorithm
import pickle #python object hirearchy is converted into a byte stream


: 

<h1>Loading Dataset</h1>

In [None]:
# Loading the dataset
data = pd.read_csv(r"Datasets/chronickidneydisease.csv")

# returns first 10 rows
data.head(10)

: 

In [None]:
#Drop is used for dropping the column
data.drop(['id'],axis=1,inplace=True)

: 

<h2>Renaming the columns</h2>

In [None]:
#return all the column names
data.columns

: 

In [None]:
#manually giving the names of the columns
data.columns = ['age','blood_pressure','specific_gravity','albumin','sugar','red_blood_cells','pus_cell',
                'pus_cell_clumps','bacteria','blood glucose random','blood_urea','serum_creatinine','sodium','potassium','hemoglobin','packed_cell_volume','white_blood_cell_count','red_blood_cell_count','hypertension','diabetesmellitus','coronary_artery_disease','appetite','pedal_edema','anemia','class']
data.columns

: 

In [None]:
#info will give the summary of the dataset
data.info()

: 

<h1>Traget Column</h1>

In [None]:
#find the unique elements of the array
data['class'].unique()


: 

<h1>Rectifying the target column</h1>

In [None]:
#replace is used of renaming
data['class'] = data['class'].replace("ckd\t","ckd")
data['class'].unique()

: 

In [None]:
#only fetch the object  type columns
catcols = set(data.dtypes[data.dtypes=='O'].index.values)
print(catcols)

: 

In [None]:
for i in catcols:
    print("Columns :",i)
    print(c(data[i])) #using counter for checking the no of classes in the column
    print('*'*120+'\n')

: 

<h1>Removing the columns which are not categorical</h1>

In [None]:
#remove is used for removing the column
catcols.remove('red_blood_cell_count')
catcols.remove('packed_cell_volume')
catcols.remove('white_blood_cell_count')
print(catcols)


: 

In [None]:
# only fetch the float and int type columns
contcols = set(data.dtypes[data.dtypes!='O'].index.values)
print(contcols)

: 

In [None]:
for i in contcols:
    print("Continous Columns:",i)
    print(c(data[i]))#using counter for checking the number of classes in the column
    print('*'*120+'\n')

: 

<h1>Removing the columns which are not numerical</h1>

In [None]:
contcols.remove('specific_gravity')
contcols.remove('albumin')
contcols.remove('sugar')
print(contcols)

: 

<h1>Adding columns which we found continous</h1>

In [None]:
#using add we can add columns
contcols.add('red_blood_cell_count')
contcols.add('packed_cell_volume')
contcols.add('white_blood_cell_count')
print(contcols)

: 

<h1>Adding columns which we found categorical</h1>


In [None]:
catcols.add('specific_gravity')
catcols.add('albumin')
catcols.add('sugar')
print(catcols)

: 

<h1>Rectifying the categorical column classes</h1>

In [None]:
data['coronary_artery_disease']=data.coronary_artery_disease.replace('\tno','no')
c(data['coronary_artery_disease'])

: 

In [None]:
data['diabetesmellitus']=data.diabetesmellitus.replace(to_replace={'\tno':'no','\tyes':'yes',' yes':'yes'})
c(data['diabetesmellitus'])

: 

<h1>Null Values</h1>

In [None]:
# it will return if any null vales values present
data.isnull().any()

: 

In [None]:
#returns the count
data.isnull().count()

: 

In [None]:
data.packed_cell_volume = pd.to_numeric(data.packed_cell_volume , errors='coerce')
data.red_blood_cell_count = pd.to_numeric(data.red_blood_cell_count  , errors='coerce')
data.white_blood_cell_count = pd.to_numeric(data.white_blood_cell_count , errors='coerce')

: 

<h1>Handling Continous/Numerical Columns Missing Values</h1>

In [None]:
data['blood glucose random'].fillna(data['blood glucose random'].mean(),inplace = True)
data['blood_pressure'].fillna(data['blood_pressure'].mean(),inplace = True)
data['blood_urea'].fillna(data['blood_urea'].mean(),inplace = True)
data['hemoglobin'].fillna(data['hemoglobin'].mean(),inplace = True)
data['packed_cell_volume'].fillna(data['packed_cell_volume'].mean(),inplace = True)
data['potassium'].fillna(data['potassium'].mean(),inplace = True)
data['red_blood_cell_count'].fillna(data['red_blood_cell_count'].mean(),inplace = True)
data['serum_creatinine'].fillna(data['serum_creatinine'].mean(),inplace = True)
data['sodium'].fillna(data['sodium'].mean(),inplace = True)
data['white_blood_cell_count'].fillna(data['white_blood_cell_count'].mean(),inplace = True)


: 

In [None]:
data['age'].fillna(data['age'].mode()[0], inplace=True)
data['specific_gravity'].fillna(data['specific_gravity'].mode()[0], inplace=True)
data['albumin'].fillna(data['albumin'].mode()[0], inplace=True)
data['sugar'].fillna(data['sugar'].mode()[0], inplace=True)
data['red_blood_cells'].fillna(data['red_blood_cells'].mode()[0], inplace=True)
data['pus_cell'].fillna(data['pus_cell'].mode()[0], inplace=True)
data['pus_cell_clumps'].fillna(data['pus_cell_clumps'].mode()[0], inplace=True)
data['bacteria'].fillna(data['bacteria'].mode()[0], inplace=True)
data['blood glucose random'].fillna(data['blood glucose random'].mode()[0], inplace=True)
data['hypertension'].fillna(data['hypertension'].mode()[0], inplace=True)
data['diabetesmellitus'].fillna(data['diabetesmellitus'].mode()[0], inplace=True)
data['coronary_artery_disease'].fillna(data['coronary_artery_disease'].mode()[0], inplace=True)
data['appetite'].fillna(data['appetite'].mode()[0], inplace=True)
data['pedal_edema'].fillna(data['pedal_edema'].mode()[0], inplace=True)
data['anemia'].fillna(data['anemia'].mode()[0], inplace=True)
data['class'].fillna(data['class'].mode()[0], inplace=True)

: 

In [None]:
data.isna().sum()

: 

<h1>Label encoding</h1>

In [None]:
#importing label encoding from sklearn
from sklearn.preprocessing import LabelEncoder

for i in catcols:  #looping through all the categorical columns
    print("LABEL ENCODING OF :",i)
    LEi = LabelEncoder() #creating an object of label encoder
    print(c(data[i]))  #getting the classes values before transformation
    data[i] = LEi.fit_transform(data[i]) #transforming our text classes to numerical values
    print(c(data[i]))  #geting class values after transformation
    print('*'*100)


: 

In [None]:
features_name = ['blood_urea','blood glucose random','coronary_artery_disease','anemia','pus_cell',
    'red_blood_cells','diabetesmellitus','pedal_edema']
x = pd.DataFrame(data, columns = features_name)
y = pd.DataFrame(data, columns = ['class'])
print(x.shape)
print(y.shape)

: 

In [None]:
data.isna().sum()

: 

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2, random_state=2)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

: 

In [None]:
from sklearn.linear_model import LogisticRegression

lgr = LogisticRegression(solver='lbfgs', max_iter=1000)

lgr.fit(x_train.values,y_train.values.ravel())

: 

In [None]:
y_pred = lgr.predict(x_test)

: 

In [None]:
y_pred1 =lgr.predict([[90,157,1,0,0,1,1,1]])
print(y_pred1)
c(y_pred)

: 

In [None]:
accuracy_score(y_test,y_pred)

: 

<h1>Confusion matrix of our model</h1>

In [None]:
conf_mat = confusion_matrix(y_test,y_pred)
conf_mat

: 

In [None]:
pickle.dump(lgr, open('CKD.pkl','wb'))

: 

: 