In [40]:
# CNN algorithm implementation was used from https://www.kaggle.com/code/huseyinndogan/cnn-tumor-classification-with-96-accuracy and adapted a bit
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [41]:
!pip install tensorflow




[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [42]:
import cv2


In [43]:
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.initializers import HeUniform, GlorotUniform
from sklearn.preprocessing import StandardScaler
import numpy as npa
import matplotlib.pyplot as plt
import pandas as pd
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix , accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import GlobalAveragePooling2D, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import losses
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator



### 📌 1. Path Identification

In [54]:
import pathlib
data_dir = pathlib.Path("..") / "brain-tumor-mri-dataset"
train_dir = data_dir / "Training"
test_dir = data_dir / "Testing"


### 📌 2. Data Preprocessing with ImageDataGenerator

In [55]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

### 📌 3. Defining Train & Test Folders

In [56]:
train_dir = str(data_dir / "Training") 
test_dir = str(data_dir / "Testing")    

### 📌 4. Define Image Size & Batch Size for Model

In [57]:
batch_size = 32
img_height = 240
img_width = 240

### 📌 5. Loading Training and Test Datasets

In [58]:
# It automatically assigns labels based on subfolder names
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (img_height , img_width),
    batch_size= batch_size,
    class_mode = "categorical"
    )

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size = (img_height , img_width),
    batch_size = batch_size,
    class_mode = "categorical"

    )
# Print the class labels assigned by flow_from_directory
print(r"Class Label :" , train_generator.class_indices)


Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.
Class Label : {'glioma': 0, 'meningioma': 1, 'notumor': 2, 'pituitary': 3}


### DATA PREPROCESSING SUMMARY

🚀 What Does the Code Do in Brief?

* Determines the dataset folder (data_dir).

* Creates an ImageDataGenerator to rescale the images.

* Determines the training (train_dir) and test (test_dir) data paths.

* Resize the images to 240x240 and load them with flow_from_directory().

* Prints the class labels.

### 📌 6. Augment training data with transformations (rotation, flipping, etc.)

In [59]:

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,          
    width_shift_range=0.2,     
    height_shift_range=0.2,    
    shear_range=0.2,           
    zoom_range=0.2,             
    horizontal_flip=True,       
    fill_mode='nearest'       
)
# Redefine test_datagen (without augmentations)
test_datagen = ImageDataGenerator(rescale=1./255)


### Define CNN model architecture

In [60]:
model = Sequential([
    # Convolutional layer + Max Pooling + Normalization
    Conv2D(32 , (5,5) , activation="relu" ,padding="valid", kernel_initializer=HeUniform()),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Conv2D(64 , (5,5) , activation="relu" ,padding="valid", kernel_initializer=HeUniform()),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Conv2D(128 , (5,5) , activation="relu" ,padding="valid", kernel_initializer=HeUniform()),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Flatten(),

    Dense(64 , activation="relu" , kernel_initializer=HeUniform()),
    Dropout(0.2),
    BatchNormalization(),
    Dense(128 , activation="relu" , kernel_initializer=HeUniform()),
    Dropout(0.2),
    BatchNormalization(),
    Dense(256 , activation="relu" , kernel_initializer=HeUniform()),
    Dropout(0.3),
    BatchNormalization(),
    
    # Final output layer (4 classes) with softmax activation
    Dense(4 , activation="softmax" , kernel_initializer=GlorotUniform()),

    ])

### Compile the model with optimizer, loss function, and metrics

In [61]:
model.compile(optimizer=Adam(learning_rate=0.0001) , loss="categorical_crossentropy", metrics=["accuracy"])

In [62]:
# Set early stopping to stop training when validation loss stops improving
early_stop = EarlyStopping(patience=3,monitor="val_loss", restore_best_weights=True)

# Train the model
fit = model.fit(train_generator,epochs=10 #(epoch count is reduced due to process length.)
, validation_data=(test_generator) , callbacks=[early_stop])

Epoch 1/10
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m385s[0m 2s/step - accuracy: 0.5395 - loss: 1.1673 - val_accuracy: 0.4287 - val_loss: 1.5532
Epoch 2/10
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m242s[0m 1s/step - accuracy: 0.7489 - loss: 0.6423 - val_accuracy: 0.6857 - val_loss: 0.7909
Epoch 3/10
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 1s/step - accuracy: 0.8340 - loss: 0.4541 - val_accuracy: 0.8291 - val_loss: 0.4146
Epoch 4/10
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 1s/step - accuracy: 0.8952 - loss: 0.3052 - val_accuracy: 0.9130 - val_loss: 0.2269
Epoch 5/10
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m284s[0m 2s/step - accuracy: 0.9326 - loss: 0.2114 - val_accuracy: 0.8955 - val_loss: 0.3166
Epoch 6/10
[1m179/179[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 2s/step - accuracy: 0.9352 - loss: 0.1853 - val_accuracy: 0.8902 - val_loss: 0.3464
Epoch 7/10
[1m179/179

In [63]:
#  Save the model
model.save("brain_tumor_model.keras")


In [64]:
# Load the saved model
model = tf.keras.models.load_model("brain_tumor_model.keras")


# LET'S EVALUATE THE MODEL

### 📌 Evaluation: Test performance using one test batch

In [65]:

from sklearn.metrics import precision_score, recall_score, f1_score

# Get a batch of test images
x_test, y_test = next(test_generator)

# Predict using the trained model
predictions = model.predict(x_test)

# Convert one-hot encoded vectors to class labels
predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)

# Print evaluation metrics
print(f"Accuracy: { accuracy_score(true_classes, predicted_classes) * 100:.2f}%")
print(f"Precision: {precision_score(true_classes, predicted_classes, average='weighted') * 100:.2f}%")
print(f"Recall: {recall_score(true_classes, predicted_classes, average='weighted') * 100:.2f}%")
print(f"F1 Score: {f1_score(true_classes, predicted_classes, average='weighted') * 100:.2f}%")
print("Confusion Matrix:")
print(confusion_matrix(true_classes, predicted_classes))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 541ms/step
Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1 Score: 100.00%
Confusion Matrix:
[[ 7  0  0  0]
 [ 0  6  0  0]
 [ 0  0 11  0]
 [ 0  0  0  8]]


### 📌 Evaluate on full test set (batch by batch)

In [68]:
steps = len(test_generator)

all_predictions = []
all_true_classes = []

# Iterate over each batch in test set
for i in range(steps):
    x_batch, y_batch = next(test_generator)
    predictions = model.predict(x_batch)
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(y_batch, axis=1)

    all_predictions.extend(predicted_classes)
    all_true_classes.extend(true_classes)

# Final performance report
print("Accuracy:", accuracy_score(all_true_classes, all_predictions))
print("Classification Report:")

print(classification_report(all_true_classes, all_predictions))

print("Confusion Matrix:")
print(confusion_matrix(all_true_classes, all_predictions))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 313ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 371ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 343ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 314ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 324ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 399ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 298ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 291ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 