In [None]:
import pandas as pd
import numpy as np

#preprocessing tools
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler

#Classification Models
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

#for model evaluation
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [None]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.51.0-py3-none-any.whl.metadata (9.5 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.51.0-py3-none-any.whl (10.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m69.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m120.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.51.0


In [None]:
import streamlit as st

In [None]:
#Load Dataset
@st.cache_data
def load_data():
  df=pd.read_csv('/content/kidney_disease.csv')




In [None]:
df=pd.read_csv('/content/kidney_disease.csv')
df.head(2)

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,...,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,...,38,6000,,no,no,no,good,no,no,ckd


Short definitions for each important feature
'age': 'Patient age (years)',

'bp': 'Blood pressure (mm/Hg)',

'sg': 'Urine specific gravity',

'al': 'Albumin in urine (0–5)',

'hemo': 'Hemoglobin level (g/dL)',

'sc': 'Serum creatinine (mg/dL)',

'htn': 'Hypertension (yes/no)',

'dm': 'Diabetes mellitus (yes/no)',

'cad': 'Coronary artery disease (yes/no)',

'appet': 'Appetite status (good/poor)',

'pc': 'Pus cell status (normal/abnormal)',

'classification': 'CKD diagnosis (ckd/notckd)'

In [None]:
important_columns = ['age', 'bp', 'sg', 'al', 'hemo', 'sc','htn','dm','cad','appet','pc','classification']
df = df[important_columns]
df

Unnamed: 0,age,bp,sg,al,hemo,sc,htn,dm,cad,appet,pc,classification
0,48.0,80.0,1.020,1.0,15.4,1.2,yes,yes,no,good,normal,ckd
1,7.0,50.0,1.020,4.0,11.3,0.8,no,no,no,good,normal,ckd
2,62.0,80.0,1.010,2.0,9.6,1.8,no,yes,no,poor,normal,ckd
3,48.0,70.0,1.005,4.0,11.2,3.8,yes,no,no,poor,abnormal,ckd
4,51.0,80.0,1.010,2.0,11.6,1.4,no,no,no,good,normal,ckd
...,...,...,...,...,...,...,...,...,...,...,...,...
395,55.0,80.0,1.020,0.0,15.7,0.5,no,no,no,good,normal,notckd
396,42.0,70.0,1.025,0.0,16.5,1.2,no,no,no,good,normal,notckd
397,12.0,80.0,1.020,0.0,15.8,0.6,no,no,no,good,normal,notckd
398,17.0,60.0,1.025,0.0,14.2,1.0,no,no,no,good,normal,notckd


In [None]:
df['cad'].value_counts()

Unnamed: 0_level_0,count
cad,Unnamed: 1_level_1
no,362
yes,34
\tno,2


In [None]:
# Clean the dataset
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].str.strip().str.replace('\t', '', regex=True)

print(df['cad'].value_counts())
print(df['dm'].value_counts())
print(df['classification'].value_counts())

cad
no     364
yes     34
Name: count, dtype: int64
dm
no     261
yes    137
Name: count, dtype: int64
classification
ckd       250
notckd    150
Name: count, dtype: int64


In [None]:
#Check missing values
df.isnull().sum()

Unnamed: 0,0
age,9
bp,12
sg,47
al,46
hemo,52
sc,17
htn,2
dm,2
cad,2
appet,1


In [None]:
# Fill missing values with appropriate methods
df['age'].fillna(df['age'].median(), inplace=True)         # Numerical → median
df['bp'].fillna(df['bp'].median(), inplace=True)           # Numerical → median
df['sg'].fillna(df['sg'].mode()[0], inplace=True)          # Categorical (discrete numeric) → mode
df['al'].fillna(df['al'].mode()[0], inplace=True)          # Categorical (discrete numeric) → mode
df['hemo'].fillna(df['hemo'].median(), inplace=True)       # Numerical → median
df['sc'].fillna(df['sc'].median(), inplace=True)           # Numerical → median
df['htn'].fillna(df['htn'].mode()[0], inplace=True)        # Categorical (yes/no) → mode
df['dm'].fillna(df['dm'].mode()[0], inplace=True)          # Categorical (yes/no) → mode
df['cad'].fillna(df['cad'].mode()[0], inplace=True)        # Categorical (yes/no) → mode
df['appet'].fillna(df['appet'].mode()[0], inplace=True)    # Categorical → mode
df['pc'].fillna(df['pc'].mode()[0], inplace=True)          # Categorical → mode

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['age'].fillna(df['age'].median(), inplace=True)         # Numerical → median
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['bp'].fillna(df['bp'].median(), inplace=True)           # Numerical → median
The behavior will change in pandas 3.0. This inplace method will never w

In [None]:
df.isnull().sum()

Unnamed: 0,0
age,0
bp,0
sg,0
al,0
hemo,0
sc,0
htn,0
dm,0
cad,0
appet,0


In [None]:
#Encoding
df['htn']=df['htn'].map({'yes':1,'no':0})
df['dm']=df['dm'].map({'yes':1,'no':0})
df['cad']=df['cad'].map({'yes':1,'no':0})
df['appet']=df['appet'].map({'good':1,'poor':0})
df['pc']=df['pc'].map({'normal':1,'abnormal':0})
df['classification']=df['classification'].map({'ckd':1,'notckd':0})

In [None]:
df.head(2)

Unnamed: 0,age,bp,sg,al,hemo,sc,htn,dm,cad,appet,pc,classification
0,48.0,80.0,1.02,1.0,15.4,1.2,1,1,0,1,1,1
1,7.0,50.0,1.02,4.0,11.3,0.8,0,0,0,1,1,1


In [None]:
#Scaling: Normalization
numeric_columns=['age','bp','sg','al','hemo','sc']

scaler=MinMaxScaler()

df[numeric_columns]=scaler.fit_transform(df[numeric_columns])
df.head(2)

Unnamed: 0,age,bp,sg,al,hemo,sc,htn,dm,cad,appet,pc,classification
0,0.522727,0.230769,0.75,0.2,0.836735,0.010582,1,1,0,1,1,1
1,0.056818,0.0,0.75,0.8,0.557823,0.005291,0,0,0,1,1,1


In [None]:
df['classification'].value_counts()

Unnamed: 0_level_0,count
classification,Unnamed: 1_level_1
1,250
0,150


In [None]:
X=df.drop('classification',axis=1)
y=df['classification']

In [None]:
# import train test split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=42)


In [None]:
#Traning,Testing Multiple Classifiers
models = {
    "Logistic Regression": LogisticRegression(),
    "Support Vector Classifier": SVC(),
    "Random Forest Classifier": RandomForestClassifier(),
    "K Nearest Neighbors": KNeighborsClassifier(),
    "Decision Tree Classifier": DecisionTreeClassifier(),
    "Gaussian Naive Bayes": GaussianNB(),
    "AdaBoost Classifier": AdaBoostClassifier(),
    "Gradient Boosting Classifier": GradientBoostingClassifier()
}


# Train and evaluate each model
for name, model in models.items():
    print("="*50)
    print("Model:", name)
    # Train the model
    model.fit(X_train, y_train)

    # Predict on test set
    y_pred = model.predict(X_test)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    classification_rep = classification_report(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Print metrics
    print("Accuracy:", accuracy)
    print("Classification Report:\n", classification_rep)
    print("Confusion Matrix:\n", conf_matrix)

Model: Logistic Regression
Accuracy: 0.975
Classification Report:
               precision    recall  f1-score   support

           0       0.93      1.00      0.97        28
           1       1.00      0.96      0.98        52

    accuracy                           0.97        80
   macro avg       0.97      0.98      0.97        80
weighted avg       0.98      0.97      0.98        80

Confusion Matrix:
 [[28  0]
 [ 2 50]]
Model: Support Vector Classifier
Accuracy: 0.7875
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.64      0.68        28
           1       0.82      0.87      0.84        52

    accuracy                           0.79        80
   macro avg       0.77      0.75      0.76        80
weighted avg       0.78      0.79      0.78        80

Confusion Matrix:
 [[18 10]
 [ 7 45]]
Model: Random Forest Classifier


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      1.00      1.00        52

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80

Confusion Matrix:
 [[28  0]
 [ 0 52]]
Model: K Nearest Neighbors
Accuracy: 0.8625
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.93      0.83        28
           1       0.96      0.83      0.89        52

    accuracy                           0.86        80
   macro avg       0.85      0.88      0.86        80
weighted avg       0.88      0.86      0.87        80

Confusion Matrix:
 [[26  2]
 [ 9 43]]
Model: Decision Tree Classifier
Accuracy: 0.9875
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00   

In [None]:
model_gbc = AdaBoostClassifier()

model_gbc.fit(X_train,y_train)

y_pred = model_gbc.predict(X_test)



print("confusion matrix \n: ", confusion_matrix(y_test,y_pred))
print("classification report \n: ", classification_report(y_test, y_pred))

confusion matrix 
:  [[28  0]
 [ 0 52]]
classification report 
:                precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      1.00      1.00        52

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80



In [None]:
st.subheader("📊 Model Performance")
st.write("Accuracy:", accuracy_score(y_test, y_pred))

# ------ Sidebar User Input ------
st.sidebar.header("Enter Patient Details")

def get_user_input():
    age = st.sidebar.slider("Age", 1, 90, 45)
    bp = st.sidebar.slider("Blood Pressure", 50, 180, 80)
    sg = st.sidebar.selectbox("Specific Gravity", [1.005, 1.010, 1.015, 1.020, 1.025])
    al = st.sidebar.slider("Albumin", 0, 5, 1)
    hemo = st.sidebar.slider("Hemoglobin", 3.0, 17.0, 12.0)
    sc = st.sidebar.slider("Serum Creatinine", 0.1, 15.0, 1.2)

    htn = st.sidebar.selectbox("Hypertension", ['yes', 'no'])
    dm = st.sidebar.selectbox("Diabetes Mellitus", ['yes', 'no'])
    cad = st.sidebar.selectbox("Coronary Artery Disease", ['yes', 'no'])
    appet = st.sidebar.selectbox("Appetite", ['good', 'poor'])
    pc = st.sidebar.selectbox("Pus Cell", ['normal', 'abnormal'])

    data = {
        "age": age,
        "bp": bp,
        "sg": sg,
        "al": al,
        "hemo": hemo,
        "sc": sc,
        "htn": 1 if htn == 'yes' else 0,
        "dm": 1 if dm == 'yes' else 0,
        "cad": 1 if cad == 'yes' else 0,
        "appet": 1 if appet == 'good' else 0,
        "pc": 1 if pc == 'normal' else 0
    }

    return pd.DataFrame([data])

user_df = get_user_input()

# ----- Prediction -----
if st.sidebar.button("🔍 Predict"):
    prediction = model.predict(user_df)[0]

    st.subheader("🩺 Prediction Result")
    if prediction == 1:
        st.error("⚠️ Patient is likely to have **Chronic Kidney Disease (CKD)**.")
    else:
        st.success("✅ Patient does **NOT** have CKD.")

st.write("---")
st.write("Developed by **Jay Lamichhane**")

