<a href="https://colab.research.google.com/github/anasbinayub/CKD-prediction-from-CT-image/blob/main/CKD_Prediction_from_Kidney_CT_Image.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Install necessary kaggle libraries**

In [None]:
!pip install kaggle



# **Upload kaggle token file**

In [None]:
from google.colab import files
files.upload()

In [None]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# **Download CKD image dataset from kaggle**

In [None]:
!kaggle datasets download -d nazmul0087/ct-kidney-dataset-normal-cyst-tumor-and-stone

## **Unzip the folder**

In [None]:
!unzip ct-kidney-dataset-normal-cyst-tumor-and-stone.zip

# **Import necessary libraries**

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from skimage.io import imread
import cv2
sns.set()

# **Select the dataset path**

In [None]:
import os

base_path = "CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone"
train_dir = base_path


In [None]:
# Get the path to the normal and pneumonia sub-directories
Normal_Cases_dir = os.path.join(train_dir, 'Normal')
Cyst_Cases_dir = os.path.join(train_dir, 'Cyst')
Stone_Cases_dir = os.path.join(train_dir, 'Stone')
Tumor_Cases_dir = os.path.join(train_dir, 'Tumor')

In [None]:
# Getting the list of all the images
Normal_Cases = Path(Normal_Cases_dir).glob('*.jpg')
Cyst_Cases = Path(Cyst_Cases_dir).glob('*.jpg')
Stone_Cases = Path(Stone_Cases_dir).glob('*.jpg')
Tumor_Cases = Path(Tumor_Cases_dir).glob('*.jpg')

In [None]:
# An empty list for inserting data into this list in (image_path, Label) format
train_data = []

In [None]:
# Labeling the Cyst case as 0
for img in Cyst_Cases:
    train_data.append((img, 0))

# Labeling the Normal case as 1
for img in Normal_Cases:
    train_data.append((img, 1))

# Labeling the Stone case as 2
for img in Stone_Cases:
    train_data.append((img, 2))

# Labeling the Tumor case as 3
for img in Tumor_Cases:
    train_data.append((img, 3))

In [None]:
# Making a data frame using pandas (creating CSV file)
train_data = pd.DataFrame(train_data, columns=['image', 'label'], index=None)
# Select random data row from the dataframe and show and index it.
train_data = train_data.sample(frac=1.).reset_index(drop=True)
train_data.head()

In [None]:
# Returns all the unique values of Label in train_data
train_data['label'].unique()

In [None]:
# Returns number of rows and columns
train_data.shape

# **Plotting values for each class before preprocessing**

In [None]:
# Getting the count of each class (Normal, Cyst, Tumor, Stone)
cases_count = train_data['label'].value_counts()
cases_count

In [None]:
# Plotting the Graph
colors = ['#FF9A32', '#C8DB2A', '#35666A', '#EF4687']

plt.figure(figsize = (8,6))  # Size of graph
sns.barplot(x = cases_count.index, y = cases_count.values,  palette=colors)
plt.title('Number of Cases', fontsize=14)
plt.xlabel('Case Type', fontsize = 12)
plt.ylabel('Count', fontsize = 12)
plt.xticks(range(len(cases_count.index)),['Cyst(0)', 'Normal(1)','Stone(2)','Tumor(3)'])
plt.show()

# **Checking the CT scanned images loaded successfully or not**

In [None]:
# Gets 5 data from each cases
Cyst_Samples = (train_data[train_data['label'] == 0]['image'].iloc[:5]).tolist()
Normal_Samples = (train_data[train_data['label'] == 1]['image'].iloc[:5]).tolist()
Stone_Samples = (train_data[train_data['label'] == 2]['image'].iloc[:5]).tolist()
Tumor_Samples = (train_data[train_data['label'] == 3]['image'].iloc[:5]).tolist()

# Combining data in one variable
samples = Cyst_Samples + Normal_Samples + Stone_Samples + Tumor_Samples

del Cyst_Samples, Normal_Samples, Stone_Samples, Tumor_Samples

In [None]:
# Displaying the picture
f, ax = plt.subplots(4, 5,figsize=(12,8)) # Initilizing the graph where image is to be display

for i in range(20):
    img = imread(samples[i]) # reading the image
    ax[i//5, i%5].imshow(img, cmap='gray') # displaying the image

    # putting title in the images
    if i<5:
        ax[i//5, i%5].set_title("Cyst_samples")
    elif i<10:
        ax[i//5, i%5].set_title("Normal_samples")
    elif i<15:
        ax[i//5, i%5].set_title("Stone_samples")
    elif i<20:
        ax[i//5, i%5].set_title("Tumor_samples")

    # removing the scales in the graph
    ax[i//5, i%5].axis('off')
    ax[i//5, i%5].set_aspect('auto')

plt.show()

In [None]:
# Get the path to the normal and pneumonia sub-directories
Normal_Cases_dir = os.path.join(train_dir, 'Normal')
Cyst_Cases_dir = os.path.join(train_dir, 'Cyst')
Stone_Cases_dir = os.path.join(train_dir, 'Stone')
Tumor_Cases_dir = os.path.join(train_dir, 'Tumor')

In [None]:
# Getting the list of all the images
Normal_Cases = Path(Normal_Cases_dir).glob('*.jpg')
Cyst_Cases = Path(Cyst_Cases_dir).glob('*.jpg')
Stone_Cases = Path(Stone_Cases_dir).glob('*.jpg')
Tumor_Cases = Path(Tumor_Cases_dir).glob('*.jpg')
train_data = []
train_labels = []

# **Data Preprocessing**

## Changing the image shape for each class

In [None]:
for img in Cyst_Cases:
    img = cv2.imread(str(img)) # Loading image
    img = cv2.resize(img, (28,28)) # resizing image
    if img.shape[2] == 1: # Number of channel in the dimension.
        img = np.dstack([img, img, img])
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) # Changing image color
    img = np.array(img) # creating a numpy array
    img = img/255  # Normalization
    label = 'Cyst'
    train_data.append(img)
    train_labels.append(label)

In [None]:
for img in Normal_Cases:
    img = cv2.imread(str(img))
    img = cv2.resize(img,(28,28))
    if img.shape[2] == 1:
        img = np.dstack([img, img, img])
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img = np.array(img)
    img = img/255
    label = 'Normal'
    train_data.append(img)
    train_labels.append(label)

In [None]:
for img in Stone_Cases:
    img = cv2.imread(str(img))
    img = cv2.resize(img, (28,28))
    if img.shape[2] == 1:
        img = np.dstack([img,img,img])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.array(img)
    img = img/255
    label = "Stone"
    train_data.append(img)
    train_labels.append(label)

In [None]:
for img in Tumor_Cases:
    img = cv2.imread(str(img))
    img = cv2.resize(img, (28,28))
    if img.shape[2] == 1:
        img = np.dstack([img, img, img])
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img = np.array(img)
    img = img/255
    label = "Tumor"
    train_data.append(img)
    train_labels.append(label)

## **Checking the dataset shape and labels**

In [None]:
train_data1 = np.array(train_data)
train_labels1 = np.array(train_labels)
print('Total number of validation example: ', train_data1.shape)
print('Total number of Labels : ', train_labels1.shape)

In [None]:
train_labels1 = pd.DataFrame(train_labels1, columns = ['label'],index=None)
train_labels1

## **Finding the unique values in the train dataset**

In [None]:
train_labels1['label'].unique()

## **Label Encoding**

In [None]:
train_labels1['label'] = train_labels1['label'].map({'Cyst':0,'Normal':1,'Stone':2,'Tumor':3})

In [None]:
print(train_data1.shape)
print(train_labels1.shape)

## **Checking null values**

In [None]:
train_labels1.isnull().sum()

## **Solving Dataset Imblance Using SMOTE**

In [None]:
from imblearn.over_sampling import SMOTE
smote = SMOTE() # Initilizing The SMOTE class
train_rows = len(train_data1) # getting total number or rows
train_data1 = train_data1.reshape(train_rows,-1)  # Converting 4D array to 2D Array
train_data2, train_labels2 = smote.fit_resample(train_data1,train_labels1) # Balancing Image Dataset using SMOTE

## **Plotting values for each class after preprocessing**

In [None]:
cases_count1 = train_labels2['label'].value_counts() # Counting values of diffrent image

#Plotting Graph for Label values
plt.figure(figsize=(8,6)) # Setting size of graph

# Define a color palette for the bars
colors = ['#FF9A32', '#C8DB2A', '#35666A', '#EF4687'] # Example colors (light red, light blue, light green, light orange)

sns.barplot(x=cases_count1.index, y=cases_count1.values, palette=colors)
plt.title('Number of cases', fontsize = 14)
plt.xlabel('Case Type',fontsize = 12)
plt.ylabel('Count', fontsize = 12)
plt.xticks(range(len(cases_count1.index)), ['Cyst(0)', 'Normal(1)', 'Stone(2)', 'Tumor(3)'])
plt.show()

In [None]:
train_data2.shape

## **Reshape the train data from 2D to 4D(number_of_samples, height, width, channels)**


We reshape the train_data2 from a 2D array back into a 4D array because Convolutional Neural Networks (CNNs), like the one we are building, expect their input data to be in a specific format.



*   **Number_of_samples =  -1**,  in reshape(-1, 28, 28, 3) tells NumPy to automatically calculate the number of samples based on the total number of elements and the other specified dimensions.




In [None]:
# Converting 2D array to 4D Array
train_data2 = train_data2.reshape(-1,28,28,3)
train_data2.shape

# **Splitting dataset for training and testing following 80-20 method**

In [None]:
from sklearn.model_selection import train_test_split

# for train-test we followed 80-20 method
X_train, X_test, y_train, y_test = train_test_split(train_data2, train_labels2, test_size = 0.20,random_state = 42)

# for train-validation we followed 90-10 method
X_train, X_val, y_train, y_val = train_test_split(train_data2, train_labels2, test_size = 0.10,random_state = 42)

In [None]:
print("Training Data X : ",X_train.shape)
print("Testing Data X : ",X_test.shape)
print("Training Data y : ",y_train.shape)
print("Testing Data y : ",y_test.shape)
print("Validating Data X : ",X_val.shape)
print("Validating Data y :",y_val.shape)
print("Image Size : ",X_train[0].shape)

# **CNN Model Building**

In [None]:
from tensorflow.keras import layers,models

model = models.Sequential([
        layers.Conv2D(28, (3, 3), activation='relu', input_shape=(28, 28, 3)) ,
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten()
])

model.add(layers.Dense(4,activation='softmax'))
model.summary()

## **Training model**

In [None]:
#Compiling
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Model Fit
history = model.fit(np.array(X_train),
                    np.array(y_train),
                    epochs=20,
                    validation_data=(np.array(X_val), np.array(y_val)))

## **Loss & Accuracy Curve for training**

In [None]:
# Training Loss & Accuracy Curve
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Loss', linewidth=2, color='red')
plt.plot(history.history['accuracy'], label='Accuracy', linewidth=2, color='green')

plt.xlabel("Epoch")
plt.ylabel("Value")
plt.title("Training Loss & Accuracy Curve")
plt.legend()
plt.grid(True)

plt.show()

## **Performace Check for each class**

In [None]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import label_binarize

# 1. TRUE & PREDICTED LABELS
# True labels (from your y_test dataframe)
y_true = y_test['label'].values

# Predicted probabilities
y_pred_prob = model.predict(X_test)

# Predicted class labels
y_pred = np.argmax(y_pred_prob, axis=1)

# 2. CLASS NAMES & calss number
class_names = ["Tumor", "Cyst", "Normal", "Stone"]
num_classes = len(class_names)

# 3. AUC REQUIRES BINARIZATION
y_true_bin = label_binarize(y_true, classes=range(num_classes))

# 4. PRINT FINAL TABLE
print("Model (Class) | Precision | Recall | F1-Score | AUC")
print("-"*65)

for i in range(num_classes):
    precision = precision_score(y_true, y_pred, average=None)[i]
    recall = recall_score(y_true, y_pred, average=None)[i]
    f1 = f1_score(y_true, y_pred, average=None)[i]
    auc = roc_auc_score(y_true_bin[:, i], y_pred_prob[:, i])

    print(f"{class_names[i]:<10} | {precision:.2f} | {recall:.2f} | {f1:.2f} | {auc:.2f}")


## **Confusion Matrix**

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)

plt.figure(figsize=(6, 6))
disp.plot(cmap='plasma', values_format='d')
plt.title("Confusion Matrix")
plt.show()

## ROC curve for each class

In [None]:
from sklearn.metrics import roc_curve, auc

# CLASS NAMES & NUMBER
class_names = ["Tumor", "Cyst", "Normal", "Stone"]
num_classes = len(class_names)

fpr = {}  #Flase positive rate
tpr = {}  # True positive rate
roc_auc = {}

for i in range(num_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true_bin[:, i], y_pred_prob[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

#PLOT ALL ROC CURVES
plt.figure(figsize=(8, 6))

for i in range(num_classes):
    plt.plot(fpr[i], tpr[i], label=f"{class_names[i]} (AUC = {roc_auc[i]:.2f})")

# Diagonal reference line
plt.plot([0, 1], [0, 1], linestyle="--")

plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve for Each Class")
plt.legend()
plt.grid(True)
plt.show()

## **PRECISION vs RECALL CURVE**

In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score

plt.figure(figsize=(8, 6))

for i in range(num_classes):
    precision, recall, _ = precision_recall_curve(
        y_true_bin[:, i], y_pred_prob[:, i]
    )

    avg_precision = average_precision_score(
        y_true_bin[:, i], y_pred_prob[:, i]
    )

    plt.plot(recall, precision, linewidth=3,
             label=f"{class_names[i]} (AP = {avg_precision:.2f})")

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precisionâ€“Recall Curve (All Classes)")
plt.legend()
plt.grid(True)
plt.show()

## **Bar Chart of Performance**

In [None]:
precision = precision_score(y_true, y_pred, average=None)
recall = recall_score(y_true, y_pred, average=None)
f1 = f1_score(y_true, y_pred, average=None)
auc = []
for i in range(len(class_names)):
    auc.append(roc_auc_score(y_true_bin[:, i], y_pred_prob[:, i]))


x = np.arange(len(class_names))
width = 0.2   # Bar width

plt.figure(figsize=(10, 6))

plt.bar(x - 1.5*width, precision, width, label="Precision")
plt.bar(x - 0.5*width, recall, width, label="Recall")
plt.bar(x + 0.5*width, f1, width, label="F1-Score")
plt.bar(x + 1.5*width, auc, width, label="AUC")

plt.xlabel("Classes")
plt.ylabel("Score")
plt.title("Class-wise Model Performance")
plt.xticks(x, class_names)
plt.ylim(0, 1.05)
plt.legend()
plt.grid(True)

plt.show()

## **Predictive values**

In [None]:
a = model.predict(X_test)
print(a)

In [None]:
# Sparse to Dense
b = []
for i in a:
    if i[0] > i[1] and i[0] > i[2] and i[0] > i[3]:
        b.append(0)
    elif i[1] > i[0] and i[1] > i[2] and i[1] > i[3]:
        b.append(1)
    elif i[2] > i[1] and i[2] > i[0] and i[2] > i[3]:
        b.append(2)
    else:
        b.append(3)

print(b)

# **Grad-CAM (Gradient-weighted Class Activation Mapping)**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
import matplotlib.cm as cm
import random

# --- RE-DEFINE UTILITIES (To ensure scope) ---
def get_last_conv_layer_name(model):
    for layer in reversed(model.layers):
        if 'conv' in layer.name.lower():
            return layer.name
    raise ValueError("No Conv2D layer found.")

def make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index=None):
    inputs = tf.keras.Input(shape=(28, 28, 3))
    x = inputs
    last_conv_layer_output = None
    for layer in model.layers:
        x = layer(x)
        if layer.name == last_conv_layer_name:
            last_conv_layer_output = x

    grad_model = tf.keras.Model(inputs, [last_conv_layer_output, x])

    with tf.GradientTape() as tape:
        last_conv_layer_output, preds = grad_model(img_array)
        if pred_index is None:
            pred_index = tf.argmax(preds[0])
        class_channel = preds[:, pred_index]

    grads = tape.gradient(class_channel, last_conv_layer_output)
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    last_conv_layer_output = last_conv_layer_output[0]
    heatmap = last_conv_layer_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
    return heatmap.numpy()

def display_gradcam(img, heatmap, alpha=0.4):
    heatmap = np.uint8(255 * heatmap)
    jet = cm.get_cmap("jet")
    jet_colors = jet(np.arange(256))[:, :3]
    jet_heatmap = jet_colors[heatmap]
    jet_heatmap = cv2.resize(jet_heatmap, (img.shape[1], img.shape[0]))
    superimposed_img = jet_heatmap * alpha + img
    superimposed_img = np.clip(superimposed_img, 0, 1)
    return jet_heatmap, superimposed_img

# --- MAIN EXECUTION ---

# 1. Get layer name
last_conv_layer_name = get_last_conv_layer_name(model)
print(f"Explaining logic using layer: {last_conv_layer_name}")

# 2. Handle y_test conversion (Fixing the KeyError)
# We convert to numpy array to ensure we can index by position [0, 1, 2...]
if hasattr(y_test, 'values'):
    y_test_values = y_test.values.flatten() # Convert DataFrame/Series to 1D array
else:
    y_test_values = np.array(y_test).flatten()

# 3. Select random images
indices_to_visualize = [random.randint(0, len(X_test)-1) for _ in range(4)]
class_names = ['Cyst', 'Normal', 'Stone', 'Tumor']

plt.figure(figsize=(16, 12))

for i, idx in enumerate(indices_to_visualize):
    # A. Prepare Image
    img = X_test[idx]
    img_array = np.expand_dims(img, axis=0)

    # B. Predict
    preds = model.predict(img_array, verbose=0)
    pred_index = np.argmax(preds[0])
    confidence = np.max(preds[0])

    # C. Generate Heatmap
    heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name, pred_index)

    # D. Overlay
    jet_heatmap, superimposed_img = display_gradcam(img, heatmap)

    # E. Get True Label (Using our fixed array)
    true_label_idx = int(y_test_values[idx])
    true_label = class_names[true_label_idx]
    pred_label_name = class_names[pred_index]

    # F. Plotting
    # Original
    plt.subplot(4, 3, i*3 + 1)
    plt.imshow(img)
    plt.title(f"True: {true_label}")
    plt.axis('off')

    # Heatmap
    plt.subplot(4, 3, i*3 + 2)
    plt.imshow(jet_heatmap)
    plt.title("Grad-CAM Heatmap")
    plt.axis('off')

    # Overlay
    plt.subplot(4, 3, i*3 + 3)
    plt.imshow(superimposed_img)
    plt.title(f"Pred: {pred_label_name} ({confidence:.2f})")
    plt.axis('off')

plt.tight_layout()
plt.show()

In [None]:
model.save("CKD_CNN_Model.h5")

In [None]:
from google.colab import files
files.download("CKD_CNN_Model.h5")