In [None]:
import os
import sys
import ctypes

def run_as_admin():
    if sys.platform.startswith('win'):
        app_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
        cmd_args = sys.argv[:]
        cmd_args.insert(0, sys.executable)
        cmd_args = ['"{}"'.format(arg) for arg in cmd_args]
        cmd = '{} -m {}'.format(sys.executable, ' '.join(cmd_args))
        ctypes.windll.shell32.ShellExecuteW(None, "runas", cmd, None, app_dir, 1)

if __name__ == '__main__':
    run_as_admin()


In [115]:
# import libraries
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from PIL import Image
import shutil
import random
import matplotlib.pyplot as plt
import glob
from sklearn.utils import class_weight

In [None]:
# Path to data folder
data_dir = os.path.join(os.getcwd(), 'Brain Tumor MRI')

# Path to processed data
output_dir = os.path.join(os.getcwd(), 'Processed Data')

In [None]:
# check if folder was created
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
# Iteration by data in folder
for dir_name in os.listdir(data_dir):
    # Create path to folder with images
    dir_path = os.path.join(data_dir, dir_name)
    #Check if item is folder or hidden file
    if os.path.isdir(dir_path) and not dir_name.startswith("."):
        print(f"Processing images in {dir_name}...")
        #Create path to output directory for such type of images
        output_path = os.path.join(output_dir, dir_name)
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        # Iteration by files in folder
        for filename in os.listdir(dir_path):
            # Check if file is mage
            if filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".png"):
                # Load file, initial processing
                img_path = os.path.join(dir_path, filename)
                with Image.open(img_path) as img:
                    img = img.rotate(90)
                    # Save processed image
                    output_filename = os.path.join(output_path, filename)
                    img.save(output_filename)

In [None]:
# Input data dimensions
IMG_HEIGHT = 256
IMG_WIDTH = 256

# Initial pre processing
for folder_name in os.listdir(data_dir):
    folder_path = os.path.join(data_dir, folder_name)
    if os.path.isdir(folder_path):
        print(f'Preprocessing of {folder_name}')
        output_folder_path = os.path.join(output_dir, folder_name)
        if not os.path.exists(output_folder_path):
            os.makedirs(output_folder_path)
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.jpg'):
                file_path = os.path.join(folder_path, file_name)
                img = cv2.imread(file_path, cv2.IMREAD_COLOR)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
                img = img / 255.0
                output_file_path = os.path.join(output_folder_path, file_name)
                cv2.imwrite(output_file_path, img * 255.0)

## EDA for training set

In [None]:
# Path to Processed Data
processed_data_path = os.path.join(os.getcwd(), 'Processed Data')

# Paths to train/test and validation sets
train_path = os.path.join(processed_data_path, "train")
val_path = os.path.join(processed_data_path, "validation")
test_path = os.path.join(processed_data_path, "test")

# Split for train/ test and validation sets
train_split = 0.6
test_split = 0.2
val_split = 0.2

# Train, test, validation for paths in processed data
for folder_name in os.listdir(processed_data_path):
    folder_path = os.path.join(processed_data_path, folder_name)
    if os.path.isdir(folder_path):
        train_folder_path = os.path.join(train_path, folder_name)
        val_folder_path = os.path.join(val_path, folder_name)
        test_folder_path = os.path.join(test_path, folder_name)
        os.makedirs(train_folder_path, exist_ok=True)
        os.makedirs(val_folder_path, exist_ok=True)
        os.makedirs(test_folder_path, exist_ok=True)
        
        # List of files and random order
        files = os.listdir(folder_path)
        random.shuffle(files)
        
        
        train_files = files[:int(train_split * len(files))]
        val_files = files[int(train_split * len(files)):int((train_split + val_split) * len(files))]
        test_files = files[int((train_split + val_split) * len(files)):]
        for file_name in train_files:
            src_path = os.path.join(folder_path, file_name)
            dst_path = os.path.join(train_folder_path, file_name)
            shutil.copy(src_path, dst_path)
        for file_name in val_files:
            src_path = os.path.join(folder_path, file_name)
            dst_path = os.path.join(val_folder_path, file_name)
            shutil.copy(src_path, dst_path)
        for file_name in test_files:
            src_path = os.path.join(folder_path, file_name)
            dst_path = os.path.join(test_folder_path, file_name)
            shutil.copy(src_path, dst_path)


In [None]:
# Data check
train_path = os.path.join("Processed Data", "train")

# 
classes = []
num_images = []
dimensions = []
pixel_means = []


for class_name in os.listdir(train_path):
    class_path = os.path.join(train_path, class_name)
    if os.path.isdir(class_path):
        classes.append(class_name)
        
        class_num_images = 0
        
        class_dimensions = []
        
        class_pixel_means = []
        
        for image_name in os.listdir(class_path):
            image_path = os.path.join(class_path, image_name)
            if os.path.isfile(image_path):
                image = plt.imread(image_path)
                height, width = image.shape[:2]
                class_dimensions.append((height, width))
                
                pixel_mean = image.mean()
                class_pixel_means.append(pixel_mean)
                
                class_num_images += 1
        
        num_images.append(class_num_images)
        
        class_avg_height = sum([dim[0] for dim in class_dimensions]) / class_num_images
        class_avg_width = sum([dim[1] for dim in class_dimensions]) / class_num_images
        class_avg_dimension = (class_avg_height, class_avg_width)
        dimensions.append(class_avg_dimension)
        
        class_avg_pixel_mean = sum(class_pixel_means) / class_num_images
        pixel_means.append(class_avg_pixel_mean)

for i, class_name in enumerate(classes):
    print(f"Class {i + 1}: {class_name}")
    print(f"Number of images: {num_images[i]}")
    print(f"Average dimensions: {dimensions[i]}")
    print(f"Average pixel mean: {pixel_means[i]}")
    print()

In [None]:
# Chart creation
fig, ax = plt.subplots(figsize=(24, 16))
ax.bar(classes, num_images, width=0.5)

# Labels
ax.set_ylabel('Mean no. of images', fontsize=14)
ax.set_title('Mean number of images per class in training set', fontsize=16)

# Axis details
ax.set_xticklabels(classes, rotation=45, ha='right')

# Font
plt.rcParams.update({'font.size': 12})

# Chart show
plt.show()

In [None]:
# Chart creation
fig, ax = plt.subplots(figsize=(24, 16))
ax.bar(classes, num_images, width=0.5)

# Labels
ax.set_ylabel('Mean no. of images', fontsize=14)
ax.set_title('Mean number of images per class in training set', fontsize=16)

# Axis details
ax.set_xticklabels(classes, rotation=45, ha='right')

# Font
plt.rcParams.update({'font.size': 12})

# Chart show
plt.show()

In [None]:
# Tuple unpacking
x = range(len(classes))
y1 = [t[0] for t in dimensions]  # wymiar x
y2 = [t[1] for t in dimensions]  # wymiar y

# Chart creation
fig, ax = plt.subplots(figsize=(24, 16))
ax.bar(x, y1, width=0.4, align='center', label='Wymiar x')
ax.bar([i+0.4 for i in x], y2, width=0.4, align='center', label='Wymiar y')

# Axis details
ax.set_xticklabels(classes, rotation=45, ha='right')

# Title and lables
ax.set_xticks(x)
ax.set_xticklabels(classes)
ax.set_ylabel('Average dimensions', fontsize=14)
ax.set_title('Avergae dimensions per class', fontsize=16)
ax.legend()

# Font
plt.rcParams.update({'font.size': 12})

# Chart
plt.show()

In [None]:
class_dict = {class_name: str(i) for i, class_name in enumerate(classes)}

In [None]:
# ustaw ścieżkę do katalogu z obrazami
train_path = os.path.join("Processed Data", "train")

# utwórz pustą tablicę X_train i listę y_train
X_train = []
y_train = []

# przejdź przez katalog i wczytaj każdy obraz
for root, dirs, files in os.walk(train_path):
    for filename in files:
        if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png') or filename.endswith('.bmp'):
            # wczytaj obraz i przetwórz go
            img = cv2.imread(os.path.join(root, filename))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (224, 224)) # przykładowy rozmiar
            X_train.append(img)

            # wczytaj etykietę z nazwy katalogu i dodaj do listy y_train
            label = class_dict[root.split(os.path.sep)[-1]]
            y_train.append(label)

# przekształć listy w tablice numpy
X_train = np.array(X_train)
y_train = np.array(y_train)



In [None]:
print(X_train.shape)
print(y_train.shape)

In [118]:
class_weights = class_weight.compute_class_weight(
                                        class_weight = "balanced",
                                        classes = np.unique(y_train),
                                        y = y_train                                                   
                                    )
class_weights = dict(zip(np.unique(y_train), class_weights))
class_weights

{0: 0.5755411255411256,
 1: 0.43476128188358404,
 2: 0.5924688057040999,
 3: 1.5495337995337994,
 4: 0.915633608815427,
 5: 1.4053911205073997,
 6: 2.2382154882154883,
 7: 2.158279220779221,
 8: 1.7774064171122994,
 9: 5.035984848484849,
 10: 6.043181818181818,
 11: 4.648601398601398,
 12: 3.7769886363636362,
 13: 2.8777056277056277,
 14: 3.7769886363636362,
 15: 1.831267217630854,
 16: 1.0791396103896105,
 17: 1.831267217630854,
 18: 3.3573232323232323,
 19: 3.3573232323232323,
 20: 6.043181818181818,
 21: 4.648601398601398,
 22: 1.5107954545454545,
 23: 2.5179924242424243,
 24: 0.3707473508087005,
 25: 0.27344714109419993,
 26: 0.43476128188358404,
 27: 0.7747668997668997,
 28: 0.45437457279562543,
 29: 0.9747067448680352,
 30: 1.1849376114081998,
 31: 1.4053911205073997,
 32: 1.5495337995337994,
 33: 1.5495337995337994,
 34: 0.9442471590909091,
 35: 1.6332923832923834,
 36: 0.6867252066115702,
 37: 0.5209639498432602,
 38: 0.8278331257783312,
 39: 3.7769886363636362,
 40: 1.20863636