## Convnet practice with kidney dataset

In [35]:
# import cell
import os, warnings
import matplotlib.pyplot as plt
from matplotlib import gridspec
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import metrics


### Data Preparation

In [2]:
# seed setting and defaults for being good and clean
def set_seed(seed=31415):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
set_seed()

# Set Matplotlib defaults
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
       titleweight='bold', titlesize=18, titlepad=10)
plt.rc('image', cmap='magma')
warnings.filterwarnings("ignore") # to clean up output cells

In [None]:
# split dataset into train, validate, test
ds_train, ds_validate = image_dataset_from_directory(r'input/kaggle/ct-kidney/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone/CT-KIDNEY-DATASET-Normal-Cyst-Tumor-Stone',
                                  labels='inferred',
                                  label_mode = 'categorical',
                                  color_mode = 'grayscale',
                                  image_size = (512, 512),
                                  shuffle = True,
                                  validation_split = 0.2,
                                  subset = 'both',
                                  seed = 314)
val_size = int(0.5 * tf.data.experimental.cardinality(ds_validate).numpy())

ds_val = ds_validate.take(val_size)
ds_test = ds_validate.skip(val_size)

Found 12446 files belonging to 4 classes.
Using 9957 files for training.
Using 2489 files for validation.


In [34]:
def get_class_distribution(dataset, class_names):
    label_counts = np.zeros(len(class_names), dtype=int)
    total = 0
    for _, labels in dataset:
        indices = tf.argmax(labels, axis=1).numpy()
        for i in indices:
            label_counts[i] += 1
            total += 1
    percent_distribution = {class_names[i]: round(100 * count / total, 2) for i, count in enumerate(label_counts)}
    return percent_distribution

print("Train class %:")
print(get_class_distribution(ds_train, ds_train.class_names))

Train class %:
{'Cyst': np.float64(29.72), 'Normal': np.float64(41.05), 'Stone': np.float64(11.12), 'Tumor': np.float64(18.12)}


2025-05-09 16:27:37.945544: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


### Model Declaration and Compiling

In [None]:
model = keras.Sequential([
    layers.Conv2D(filters=32, kernel_size=5, activation = 'relu', padding = 'same', input_shape = [512, 512, 1]),
    layers.MaxPool2D(),
    layers.Conv2D(filters = 64, kernel_size=3, activation = 'relu', padding = 'same'),
    layers.MaxPool2D(),
    layers.Conv2D(filters = 64, kernel_size = 3, activation = 'relu', padding = 'same'),
    layers.MaxPool2D(),
    # time for the head
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(units=4, activation = 'softmax')
])

In [41]:
model.summary()

In [42]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(epsilon=0.01),
    loss = 'categorical_crossentropy',
    metrics=[
        'accuracy',
        metrics.Precision(name='precision'),
        metrics.Recall(name='recall')
    ]
)

In [None]:
history = model.fit(
    ds_train,
    validation_data=ds_val,
    epochs=10
)

I0000 00:00:1746833925.079795   73745 cuda_dnn.cc:529] Loaded cuDNN version 90300
