<a href="https://colab.research.google.com/github/VictoorV/mri_segmentation/blob/main/brain_mri_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
import pandas as pd
import kagglehub
import os
import cv2
import re
import matplotlib.pyplot as plt
from matplotlib import animation, rc
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
rc('animation', html='jshtml')
seed = 42

In [3]:
# Download latest version
path = kagglehub.dataset_download("mateuszbuda/lgg-mri-segmentation")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/mateuszbuda/lgg-mri-segmentation?dataset_version_number=2...


100%|██████████| 714M/714M [00:06<00:00, 110MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2


# Data visualization

In [4]:
path = '/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m'

In [41]:
data = pd.read_csv(os.path.join(path, 'data.csv'))
N, M = data.shape
data

Unnamed: 0,Patient,RNASeqCluster,MethylationCluster,miRNACluster,CNCluster,RPPACluster,OncosignCluster,COCCluster,histological_type,neoplasm_histologic_grade,tumor_tissue_site,laterality,tumor_location,gender,age_at_initial_pathologic,race,ethnicity,death01
0,TCGA_CS_4941,2.0,4.0,2,2.0,,3.0,2,1.0,2.0,1.0,3.0,2.0,2.0,67.0,3.0,2.0,1.0
1,TCGA_CS_4942,1.0,5.0,2,1.0,1.0,2.0,1,1.0,2.0,1.0,3.0,2.0,1.0,44.0,2.0,,1.0
2,TCGA_CS_4943,1.0,5.0,2,1.0,2.0,2.0,1,1.0,2.0,1.0,1.0,2.0,2.0,37.0,3.0,,0.0
3,TCGA_CS_4944,,5.0,2,1.0,2.0,1.0,1,1.0,1.0,1.0,3.0,6.0,2.0,50.0,3.0,,0.0
4,TCGA_CS_5393,4.0,5.0,2,1.0,2.0,3.0,1,1.0,2.0,1.0,1.0,6.0,2.0,39.0,3.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,TCGA_HT_8563,2.0,5.0,3,1.0,,2.0,2,1.0,2.0,1.0,3.0,2.0,1.0,30.0,3.0,2.0,0.0
106,TCGA_HT_A5RC,2.0,4.0,2,2.0,,3.0,2,1.0,2.0,1.0,1.0,2.0,1.0,70.0,3.0,2.0,1.0
107,TCGA_HT_A616,,5.0,2,1.0,,2.0,1,1.0,1.0,1.0,1.0,2.0,1.0,36.0,3.0,2.0,0.0
108,TCGA_HT_A61A,,5.0,2,,,,1,3.0,1.0,1.0,1.0,6.0,1.0,20.0,3.0,2.0,0.0


In [6]:
def get_patient_path(i):
  for dir in os.listdir(path):
    if data.loc[i, 'Patient'] in dir:
      return os.path.join(path, dir)

def get_img_mask_path(img_path):
  img_paths = []
  mask_paths = []
  for file in sorted(os.listdir(img_path), key=lambda s : [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]):
    if 'mask' not in file:
      img_paths.append(os.path.join(img_path, file))
    if 'mask' in file:
      mask_paths.append(os.path.join(img_path, file))
  return img_paths, mask_paths


In [7]:
def create_animation(i, save=False):
  images, masks = get_img_mask_path(get_patient_path(i))
  fig, ax = plt.subplots()

  def update(frame):
      ax.clear()
      img = cv2.imread(images[frame])
      img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      ax.imshow(img)

      mask = cv2.imread(masks[frame], cv2.IMREAD_GRAYSCALE)
      mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
      ax.imshow(mask, alpha=0.3)
      ax.set_title(f'Slide {frame+1}/{len(images)}',fontsize=14, color='blue')

  ani = animation.FuncAnimation(fig, update, frames=len(images), interval=500, repeat=True)
  plt.close(fig)
  if save:
    ani.save('animation.gif', writer='pillow')
  return ani

In [31]:
create_animation(0, save=True)

# Data preprocess

In [43]:
patient_paths = []
for i in range(N):
  patient_paths.append(get_patient_path(i))

train_patient_idx, test_patient_idx = train_test_split(range(N), test_size=5, random_state=seed)

train_patient_paths = [patient_paths[i] for i in train_patient_idx]
test_patient_paths = [patient_paths[i] for i in test_patient_idx]

train_image_mask_paths = [get_img_mask_path(patient_path) for patient_path in train_patient_paths]
test_image_mask_paths = [get_img_mask_path(patient_path) for patient_path in test_patient_paths]

train_image_paths, train_mask_paths = zip(*train_image_mask_paths)
test_image_paths, test_mask_paths = zip(*test_image_mask_paths)

train_image_paths = sum(train_image_paths, [])
train_mask_paths = sum(train_mask_paths, [])
test_image_paths = sum(test_image_paths, [])
test_mask_paths = sum(test_mask_paths, [])

In [40]:
test_image_paths

['/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m/TCGA_HT_7602_19951103/TCGA_HT_7602_19951103_1.tif',
 '/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m/TCGA_HT_7602_19951103/TCGA_HT_7602_19951103_2.tif',
 '/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m/TCGA_HT_7602_19951103/TCGA_HT_7602_19951103_3.tif',
 '/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m/TCGA_HT_7602_19951103/TCGA_HT_7602_19951103_4.tif',
 '/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m/TCGA_HT_7602_19951103/TCGA_HT_7602_19951103_5.tif',
 '/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m/TCGA_HT_7602_19951103/TCGA_HT_7602_19951103_6.tif',
 '/root/.cache/kagglehub/datasets/mateuszbuda/lgg-mri-segmentation/versions/2/kaggle_3m/TCGA_HT_7602_19951103/TCGA_HT_7602_19951103_7.tif',
 '/root/.cache/kaggl

In [44]:
len(test_image_paths)

116