In [1]:
import pandas as pd

# Load CSV data
csv_data = pd.read_csv('Brain_MRI_conditions.csv')
print(csv_data.head())


  Type                                        Description Body Part  \
0  MRI            Magnetic resonance imaging of the brain      Head   
1  MRI                                   MRI of the brain      Head   
2  MRI  Magnetic resonance imaging of the brain with c...      Head   
3  MRI  Magnetic resonance imaging of the pituitary gland      Head   
4  MRI                  MRI of the brain (Turkish saddle)      Head   

     544,637   
0    283,631   
1     29,714   
2     17,629   
3     10,727   
4      9,757   


In [2]:
csv_data.isnull().sum()

Type           1
Description    0
Body Part      1
  544,637      0
dtype: int64

In [3]:
csv_data.describe()

Unnamed: 0,Type,Description,Body Part,"544,637"
count,51,52,51,52
unique,1,49,1,52
top,MRI,Magnetic resonance imaging of the brain with c...,Head,283631
freq,51,2,51,1


In [5]:
csv_data.dtypes

Type           object
Description    object
Body Part      object
  544,637      object
dtype: object

In [7]:
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
     ------------------------------------ 232.6/232.6 kB 592.9 kB/s eta 0:00:00
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1



[notice] A new release of pip available: 22.2.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import PyPDF2

# Load and extract text from PDF
pdf_file_path = 'Brain_MRI_tumor.pdf'
pdf_text = ""

with open(pdf_file_path, 'rb') as file:
    reader = PyPDF2.PdfReader(file)
    for page in reader.pages:
        pdf_text += page.extract_text()

print(pdf_text[:1000])  # Print a sample of the text


Study: Contrast -enhanced magnetic resonance imaging of the brain.  
MRI machine: Philips Intera 1.5T.  
Age: 74. 
Sex: Female.  
Race: Caucasian.  
Brief anamnesis of the disease (complaints): dizziness, memory loss, weakness, numbness in the 
lower/upper extremities (left arm, left leg).  
 
REPORT  
A series of T1 - and T2 -weighted MR tomograms in three planes visualized sub - and supratentorial 
structures.  
The midline structures are not displaced.  
In series of IV contrasting in the right frontal and parietal lobes, single focal masses of round shape 
with diffuse type of contrast accumulation were noted and isointense MR -signal, the sizes were 
0.6x0.7 cm and 0.54x0.54 cm, respectively. Against the ba ckground of these focal formations defined 
zone of vasogenic edema in the right hemisphere, spreading in the frontal and parietal lobes, with an 
approximate extent of 4.4x9.1x4.2 cm, with an indistinctly expressed mass effect in the form of 
deformation of the upper contour o

In [9]:
import os

# Define image folder path
image_folder = 'ST000001'
classes = ['SE000001', 'SE000002', 'SE000003', 'SE000004', 'SE000005', 
           'SE000006', 'SE000007', 'SE000008', 'SE000009', 'SE000010']

# Organize images
image_data = []
for class_name in classes:
    class_folder = os.path.join(image_folder, class_name)
    for img_file in os.listdir(class_folder):
        if img_file.endswith('.jpg'):
            image_data.append({'class': class_name, 'path': os.path.join(class_folder, img_file)})

print(f"Total images: {len(image_data)}")


Total images: 350


In [10]:
import tensorflow as tf

# Image preprocessing function
IMG_SIZE = (128, 128)  # Resize to a smaller size if needed

def preprocess_image(img_path):
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMG_SIZE)
    image = image / 255.0  # Normalize to [0,1] range
    return image

# Example usage
sample_image = preprocess_image(image_data[0]['path'])


In [11]:
from sklearn.preprocessing import LabelEncoder

# Extract class labels
labels = [img['class'] for img in image_data]
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)

# Map each image path to its encoded label
for i in range(len(image_data)):
    image_data[i]['label'] = encoded_labels[i]


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(classes), activation='softmax')  # Output layer for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 63, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 30, 30, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 14, 14, 128)      0

In [13]:
import numpy as np

# Prepare image and label arrays
images = np.array([preprocess_image(img['path']).numpy() for img in image_data])
labels = np.array([img['label'] for img in image_data])

# Split data into training and validation sets
from sklearn.model_selection import train_test_split
train_images, val_images, train_labels, val_labels = train_test_split(images, labels, test_size=0.2, random_state=42)


In [14]:
history = model.fit(train_images, train_labels, epochs=20, batch_size=32, validation_data=(val_images, val_labels))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
# Evaluate the model
val_loss, val_accuracy = model.evaluate(val_images, val_labels)
print(f"Validation Accuracy: {val_accuracy:.2f}")


Validation Accuracy: 0.96


In [16]:
model.save('brain_tumor_classifier.h5')
