# Brain Tumor Prediction With Cropping Images

In [1]:
import numpy as np 
import pandas as pd
import tensorflow as tf
import cv2
import os
from glob import glob

for dirname, _, _ in os.walk('../brain_tumor_dataset/'):
    print(dirname)

2024-11-19 19:07:40.965982: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-19 19:07:40.971742: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-19 19:07:41.051408: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-19 19:07:41.131554: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732054061.220579    4086 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732054061.24

../brain_tumor_dataset/
../brain_tumor_dataset/no_tumor
../brain_tumor_dataset/glioma_tumor
../brain_tumor_dataset/brain_tumor_classification
../brain_tumor_dataset/brain_tumor_classification/Testing
../brain_tumor_dataset/brain_tumor_classification/Testing/no_tumor
../brain_tumor_dataset/brain_tumor_classification/Testing/glioma_tumor
../brain_tumor_dataset/brain_tumor_classification/Testing/meningioma_tumor
../brain_tumor_dataset/brain_tumor_classification/Testing/pituitary_tumor
../brain_tumor_dataset/brain_tumor_classification/Training
../brain_tumor_dataset/brain_tumor_classification/Training/no_tumor
../brain_tumor_dataset/brain_tumor_classification/Training/glioma_tumor
../brain_tumor_dataset/brain_tumor_classification/Training/meningioma_tumor
../brain_tumor_dataset/brain_tumor_classification/Training/pituitary_tumor
../brain_tumor_dataset/meningioma_tumor
../brain_tumor_dataset/pituitary_tumor


# Data Analyse

In [2]:
dirs = ['../brain_tumor_dataset/brain_tumor_classification/Training',
       '../brain-tumor-mri-dataset/brain_tumor_dataset/brain_tumor_classification/Testing']
paths = []

for parent_dir in dirs:
    sub_dirs = glob('{}/*'.format(parent_dir))
    
    for sub_dir in sub_dirs:
        paths.extend(glob('{}/*'.format(sub_dir)))
        
print(len(paths))

2870


In [3]:
from wolta.visual_tools import get_extensions

get_extensions(paths)

{'jpg': 2870}

In [4]:
from wolta.visual_tools import dataset_size_same

dataset_size_same(paths)

False

In [5]:
from wolta.visual_tools import dataset_ratio_same

dataset_ratio_same(paths)

False

In [6]:
from wolta.visual_tools import crop

for parent in dirs:
    children = glob('{}/*'.format(parent))
    
    for child in children:
        d_name = child.split('/')[-1]
        w_dir = '../brain_tumor_dataset/{}'.format(d_name)
        
        os.makedirs(w_dir, exist_ok=True)
        
        images = glob('{}/*'.format(child))
        id_num = len(glob('{}/*'.format(w_dir)))
        
        for image in images:
            obj = cv2.imread(image)
            edge = min(obj.shape[0], obj.shape[1])
            
            obj = crop(obj, crop_width=edge, crop_height=edge, get_img=True)
            obj = cv2.resize(obj, (128, 128))
            
            cv2.imwrite('{}/{}.png'.format(w_dir, id_num), obj)
            id_num += 1


In [45]:
w_dirs = glob('../brain_tumor_dataset/*')
paths = []

for w_dir in w_dirs:
    paths.extend(glob('{}/*'.format(w_dir)))

print(len(paths))


8612


# Data Preparation

In [46]:
train_ds, test_val_ds = tf.keras.utils.image_dataset_from_directory(
    '../brain_tumor_dataset',
    validation_split=0.4,
    subset='both',
    seed=123,
    image_size=(128, 128),
    batch_size=16
)

Found 11874 files belonging to 5 classes.
Using 7125 files for training.
Using 4749 files for validation.


In [48]:
test_val_ds_size = tf.data.experimental.cardinality(test_val_ds).numpy()
test_val_split_size = int(0.5 * test_val_ds_size)

validation_ds = test_val_ds.take(test_val_split_size)
test_ds = test_val_ds.skip(test_val_split_size)

In [49]:
names = train_ds.class_names
num_classes = len(names)

print(names)

['brain_tumor_classification', 'glioma_tumor', 'meningioma_tumor', 'no_tumor', 'pituitary_tumor']


In [50]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_ds = validation_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Model

In [51]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
# Define the ANN model
model = Sequential([
    layers.Input(shape=[128]),  # Cambia el tamaño según las características de entrada
    layers.Dense(64, activation='relu'),  # Primera capa oculta con 64 neuronas
    layers.Dense(128, activation='relu'),  # Segunda capa oculta con 128 neuronas
    layers.Dense(64, activation='relu'),  # Tercera capa oculta con 64 neuronas
    layers.Dense(num_classes, activation='softmax')  # Capa de salida con activación softmax
])


In [52]:
# Define the CNN model
model = Sequential([
    layers.Input(shape=[128, 128, 3]),
    layers.Rescaling(1./255),
    layers.Conv2D(32, (3,3), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Conv2D(64, (3,3), padding='same', activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])

In [53]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)


In [54]:
model.summary()

In [55]:
epochs = 10

history = model.fit(
    train_ds,
    validation_data=validation_ds,
    epochs=epochs
)

Epoch 1/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 34ms/step - accuracy: 0.4200 - loss: 1.2848 - val_accuracy: 0.5959 - val_loss: 0.9366
Epoch 2/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 32ms/step - accuracy: 0.6342 - loss: 0.8597 - val_accuracy: 0.6465 - val_loss: 0.8118
Epoch 3/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 32ms/step - accuracy: 0.6908 - loss: 0.6937 - val_accuracy: 0.6795 - val_loss: 0.7462
Epoch 4/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 33ms/step - accuracy: 0.7355 - loss: 0.5993 - val_accuracy: 0.7175 - val_loss: 0.6718
Epoch 5/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 32ms/step - accuracy: 0.7568 - loss: 0.5406 - val_accuracy: 0.7226 - val_loss: 0.6421
Epoch 6/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 32ms/step - accuracy: 0.7931 - loss: 0.4742 - val_accuracy: 0.6769 - val_loss: 0.6770
Epoch 7/10
[1m4

In [17]:
loss, acc = model.evaluate(test_ds)
print(f"Test accuracy: {acc * 100:.2f}%")

[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.5321 - loss: 1.0548
Test accuracy: 52.83%


# Mlflow

In [58]:
import mlflow
import mlflow.tensorflow

# Configurar el URI de seguimiento
mlflow.set_tracking_uri("http://localhost:5000")

## Experimiento 1

In [69]:
# Set the experiment name
mlflow.set_experiment("Brain Tumor Prediction with CNN")

# Start a new run
with mlflow.start_run() as run:
    run_id = run.info.run_id
    # Log parameters
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("batch_size", 16)
    mlflow.log_param("image_size", (128, 128))
    
    # Log metrics
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("loss", loss)
    
    # Log the model
    mlflow.tensorflow.log_model(model, "model")
    
    # Log the training history
    for epoch in range(epochs):
        mlflow.log_metric("train_loss", history.history['loss'][epoch], step=epoch)
        mlflow.log_metric("train_accuracy", history.history['accuracy'][epoch], step=epoch)
        mlflow.log_metric("val_loss", history.history['val_loss'][epoch], step=epoch)
        mlflow.log_metric("val_accuracy", history.history['val_accuracy'][epoch], step=epoch)
    print("Model logged in run {}".format(run_id))

2024/11/18 17:00:55 INFO mlflow.tracking._tracking_service.client: 🏃 View run puzzled-ox-739 at: http://localhost:5000/#/experiments/784554095399817559/runs/4e8fa097b02c4dd2a802fa6811540a49.
2024/11/18 17:00:55 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/784554095399817559.


Model logged in run 4e8fa097b02c4dd2a802fa6811540a49


## Experimiento 2

In [None]:
import tensorflow as tf
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Define the ANN model
model = Sequential([
    layers.Input(shape=[128]),  # Cambia el tamaño según las características de entrada
    layers.Dense(64, activation='relu'),  # Primera capa oculta con 64 neuronas
    layers.Dense(128, activation='relu'),  # Segunda capa oculta con 128 neuronas
    layers.Dense(64, activation='relu'),  # Tercera capa oculta con 64 neuronas
    layers.Dense(num_classes, activation='softmax')  # Capa de salida con activación softmax
])


# Compilar el modelo
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Cargar y preprocesar los datos (asegúrate de que tus datos estén cargados y preprocesados aquí)
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
validation_ds = validation_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Definir el número de épocas
epochs = 10

mlflow.set_experiment("Brain Tumor Prediction ANN") 

# Iniciar una nueva ejecución
with mlflow.start_run():
    history = model.fit(
        train_ds,
        validation_data=validation_ds,
        epochs=epochs
    )
    
    # Registrar el modelo
    mlflow.keras.log_model(model, "model")
    
    # Registrar métricas de entrenamiento
    for epoch in range(epochs):
        mlflow.log_metric("loss", history.history['loss'][epoch], step=epoch)
        mlflow.log_metric("val_loss", history.history['val_loss'][epoch], step=epoch)
    
    # Evaluar el modelo en el conjunto de prueba
    loss, acc = model.evaluate(test_ds)
    print(f"Test accuracy: {acc * 100:.2f}%")
    
    # Calcular métricas adicionales
    y_true = []
    y_pred = []
    for x, y in test_ds:
        y_true.extend(y.numpy())
        y_pred.extend(model.predict(x).flatten())

    y_pred = [1 if pred > 0.5 else 0 for pred in y_pred]

    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    
    # Registrar métricas adicionales
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 86ms/step - accuracy: 0.2122 - loss: -6739805601792.0000 - val_accuracy: 0.2086 - val_loss: -317319498694656.0000
Epoch 2/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 39ms/step - accuracy: 0.2094 - loss: -1363242349756416.0000 - val_accuracy: 0.2086 - val_loss: -12086760599191552.0000
Epoch 3/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 38ms/step - accuracy: 0.2160 - loss: -21935196844589056.0000 - val_accuracy: 0.2086 - val_loss: -83791719568506880.0000
Epoch 4/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 39ms/step - accuracy: 0.1994 - loss: -121572227589079040.0000 - val_accuracy: 0.2086 - val_loss: -309238091988598784.0000
Epoch 5/10
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 38ms/step - accuracy: 0.2125 - loss: -378414072208556032.0000 - val_accuracy: 0.2086 - val_loss: -815710084460969984.0000
Epoch 6/10
[1m446/446[



[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.1969 - loss: -14687027834203930624.0000   
Test accuracy: 20.62%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2024/11/18 15:33:04 INFO mlflow.tracking._tracking_service.client: 🏃 View run puzzled-cod-63 at: http://localhost:5000/#/experiments/672223266884605474/runs/c8e2a13a68f64ddd8a62b8602de0ec37.
2024/11/18 15:33:04 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/672223266884605474.
