# Brain Tumor Prediction With Cropping Images

In [19]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import cv2
import os
from glob import glob

for dirname, _, _ in os.walk('../brain_tumor_dataset/'):
    print(dirname)

../brain_tumor_dataset/
../brain_tumor_dataset/pituitary_tumor
../brain_tumor_dataset/brain_tumor_classification
../brain_tumor_dataset/brain_tumor_classification/Testing
../brain_tumor_dataset/brain_tumor_classification/Testing/pituitary_tumor
../brain_tumor_dataset/brain_tumor_classification/Testing/no_tumor
../brain_tumor_dataset/brain_tumor_classification/Testing/meningioma_tumor
../brain_tumor_dataset/brain_tumor_classification/Testing/glioma_tumor
../brain_tumor_dataset/brain_tumor_classification/Training
../brain_tumor_dataset/brain_tumor_classification/Training/pituitary_tumor
../brain_tumor_dataset/brain_tumor_classification/Training/no_tumor
../brain_tumor_dataset/brain_tumor_classification/Training/meningioma_tumor
../brain_tumor_dataset/brain_tumor_classification/Training/glioma_tumor
../brain_tumor_dataset/no_tumor
../brain_tumor_dataset/meningioma_tumor
../brain_tumor_dataset/glioma_tumor


# Data Analyse

In [20]:
dirs = ['../brain_tumor_dataset/brain_tumor_classification/Training',
       '../brain-tumor-mri-dataset/brain_tumor_dataset/brain_tumor_classification/Testing']
paths = []

for parent_dir in dirs:
    sub_dirs = glob('{}/*'.format(parent_dir))
    
    for sub_dir in sub_dirs:
        paths.extend(glob('{}/*'.format(sub_dir)))
        
print(len(paths))

2870


In [21]:
from wolta.visual_tools import get_extensions

get_extensions(paths)

{'jpg': 2870}

In [22]:
from wolta.visual_tools import dataset_size_same

dataset_size_same(paths)

False

In [23]:
from wolta.visual_tools import dataset_ratio_same

dataset_ratio_same(paths)

False

In [24]:
from wolta.visual_tools import crop

for parent in dirs:
    children = glob('{}/*'.format(parent))
    
    for child in children:
        d_name = child.split('/')[-1]
        w_dir = '../brain_tumor_dataset/{}'.format(d_name)
        
        os.makedirs(w_dir, exist_ok=True)
        
        images = glob('{}/*'.format(child))
        id_num = len(glob('{}/*'.format(w_dir)))
        
        for image in images:
            obj = cv2.imread(image)
            edge = min(obj.shape[0], obj.shape[1])
            
            obj = crop(obj, crop_width=edge, crop_height=edge, get_img=True)
            obj = cv2.resize(obj, (128, 128))
            
            cv2.imwrite('{}/{}.png'.format(w_dir, id_num), obj)
            id_num += 1


In [25]:
w_dirs = glob('../brain_tumor_dataset/*')
paths = []

for w_dir in w_dirs:
    paths.extend(glob('{}/*'.format(w_dir)))

print(len(paths))


5742


# Data Preparation

In [26]:
train_ds, test_val_ds = tf.keras.utils.image_dataset_from_directory(
    '../brain_tumor_dataset',
    validation_split=0.4,
    subset='both',
    seed=123,
    image_size=(128, 128),
    batch_size=16
)

Found 9004 files belonging to 5 classes.
Using 5403 files for training.
Using 3601 files for validation.


In [27]:
test_val_ds_size = tf.data.experimental.cardinality(test_val_ds).numpy()
test_val_split_size = int(0.5 * test_val_ds_size)

validation_ds = test_val_ds.take(test_val_split_size)
test_ds = test_val_ds.skip(test_val_split_size)

In [28]:
names = train_ds.class_names
num_classes = len(names)

print(names)

['brain_tumor_classification', 'glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']


In [29]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_ds = validation_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Model

In [46]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [55]:
model = Sequential([
    layers.Input(shape=[128, 128, 3]),
    layers.Rescaling(1./255),
    layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])

In [56]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)


In [57]:
model.summary()

In [58]:
epochs = 10

history = model.fit(
    train_ds,
    validation_data=validation_ds,
    epochs=epochs
)

Epoch 1/10
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.3920 - loss: 1.2935 - val_accuracy: 0.5348 - val_loss: 0.9834
Epoch 2/10
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 34ms/step - accuracy: 0.5620 - loss: 0.9241 - val_accuracy: 0.5619 - val_loss: 0.8895
Epoch 3/10
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 57ms/step - accuracy: 0.5924 - loss: 0.7943 - val_accuracy: 0.6018 - val_loss: 0.8274
Epoch 4/10
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - accuracy: 0.6418 - loss: 0.7075 - val_accuracy: 0.6150 - val_loss: 0.8104
Epoch 5/10
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 35ms/step - accuracy: 0.6760 - loss: 0.6411 - val_accuracy: 0.5824 - val_loss: 0.8560
Epoch 6/10
[1m338/338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 35ms/step - accuracy: 0.7182 - loss: 0.5846 - val_accuracy: 0.5542 - val_loss: 0.8516
Epoch 7/10
[1m338

In [53]:
loss, acc = model.evaluate(test_ds)
print(f"Test accuracy: {acc * 100:.2f}%")

[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5294 - loss: 0.9597
Test accuracy: 52.09%


# Mlflow

In [None]:
import mlflow

import mlflow.tensorflow

# Set the experiment name
mlflow.set_experiment("Brain Tumor Prediction")

# Start a new run
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("batch_size", 16)
    mlflow.log_param("image_size", (128, 128))
    
    # Log metrics
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("loss", loss)
    
    # Log the model
    mlflow.tensorflow.log_model(model, "model")
    
    # Log the training history
    for epoch in range(epochs):
        mlflow.log_metric("train_loss", history.history['loss'][epoch], step=epoch)
        mlflow.log_metric("train_accuracy", history.history['accuracy'][epoch], step=epoch)
        mlflow.log_metric("val_loss", history.history['val_loss'][epoch], step=epoch)
        mlflow.log_metric("val_accuracy", history.history['val_accuracy'][epoch], step=epoch)