<a href="https://colab.research.google.com/github/fsternini/brats-task2/blob/development/brats_task2_launcher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Notebook setting

In [1]:
# First, verification of Python version is completed
!which python # should return /usr/local/bin/python
!python --version

/usr/local/bin/python
Python 3.10.12


In [2]:
# Google Drive is mounted, to reach all relevant data
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!git clone https://github.com/fsternini/brats-task2.git
%cd 'brats-task2'
# Switch to the most updated branch
!git switch development
# Collect the most updated version of the repo
!git pull

Cloning into 'brats-task2'...
remote: Enumerating objects: 125, done.[K
remote: Counting objects: 100% (125/125), done.[K
remote: Compressing objects: 100% (97/97), done.[K
remote: Total 125 (delta 52), reused 73 (delta 20), pack-reused 0[K
Receiving objects: 100% (125/125), 148.60 KiB | 2.56 MiB/s, done.
Resolving deltas: 100% (52/52), done.
/content/brats-task2
Branch 'development' set up to track remote branch 'development' from 'origin'.
Switched to a new branch 'development'
Already up to date.


#Imports

In [4]:
# Install all requirements for the code
!pip install -r requirements.txt
#!pip install pydicom numpy torch torchvision matplotlib

Collecting pydicom (from -r requirements.txt (line 1))
  Downloading pydicom-2.4.4-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->-r requirements.txt (line 3))
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->-r requirements.txt (line 3))
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->-r requirements.txt (line 3))
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->-r requirements.txt (line 3))
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->-r requirements.txt (line 

In [5]:
import os, sys
import json
import pydicom
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from ipywidgets import interact, IntSlider, fixed

#Dataset reading

In [None]:
import json

class Dataset:
    """Object representing the dataset"""

    def __init__(self, json_file):
        self.json_file = json_file
        try:
            with open(self.json_file, 'r') as self.json_file:
                self.dataset = json.load(self.json_file)
        except FileNotFoundError:
            print(f"The file '{self.json_file}' was not found.")
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
        except Exception as e:
            print(f"An error occurred: {e}")

    def get_files_in_node(self, node_name):
        files = []
        node = self.dataset
        def traverse(node):
                if isinstance(node, dict):
                    for key, value in node.items():
                        if key == node_name:
                            if isinstance(value, dict):
                                for sub_key, sub_value in value.items():
                                    if isinstance(sub_value, str):
                                        files.append(sub_value)
                                    else:
                                        traverse(sub_value)
                        else:
                            traverse(value)
                elif isinstance(node, list):
                    for item in node:
                        traverse(item)
        traverse(node)
        return files

ds = Dataset('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/datasetdataset_structure.json')

In [None]:
class Volume:
    def __init__(self, dicom_dir):
        self.dicom_dir = dicom_dir
        self.volume_data = None

    def load_dicom_images(self):
        dicom_files = sorted([f for f in os.listdir(self.dicom_dir) if f.endswith('.dcm')])
        slices = [pydicom.dcmread(os.path.join(self.dicom_dir, f)) for f in dicom_files]
        slices.sort(key=lambda x: float(x.ImagePositionPatient[2]))

        self.volume_data = np.stack([s.pixel_array for s in slices])

    def show_slice(self, slice_idx, index=0):
        ind = {
            0:self.volume_data[slice_idx, :, :],
            1:self.volume_data[:, slice_idx, :],
            2:self.volume_data[:, :, slice_idx]
        }
        plt.imshow(ind[index], cmap='gray')
        plt.axis('off')
        plt.show()

    def show_volume(self,index=0):
        self.slices = self.volume_data.shape[index]
        interact(self.show_slice, slice_idx=IntSlider(min=0, max=self.slices-1, step=1, value=0), index=fixed(index))


##EDA

In [None]:
vol = Volume('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/train/00000/FLAIR')
vol.load_dicom_images()

In [None]:
vol.show_volume(0)

interactive(children=(IntSlider(value=0, description='slice_idx', max=399), Output()), _dom_classes=('widget-i…

In [6]:
import pandas as pd

In [7]:
df = pd.read_csv('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/train_labels.csv')

In [8]:
df

Unnamed: 0,BraTS21ID,MGMT_value
0,0,1
1,2,1
2,3,0
3,5,1
4,6,1
...,...,...
580,1005,1
581,1007,1
582,1008,1
583,1009,0


In [9]:
try:
  data_dict = json.load('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/data.json')
except:
  data_dict = {}

###Orientation analysis

In [10]:
def series_count(folder):
  s = set()
  p = folder
  for img in os.listdir(p):
    dcm = pydicom.dcmread(p+'/'+img)
    if dcm.SeriesInstanceUID not in s:
      s.add(dcm.SeriesInstanceUID)
  return len(s)
def orientation(folder):
  dcm = pydicom.dcmread(folder+'/'+os.listdir(folder)[0])

  return list(dcm.ImageOrientationPatient)

In [None]:
for i in df["BraTS21ID"]:
  if i not in data_dict:
    temp_dict = {"id":i, "class": df.loc[df["BraTS21ID"] == i]["MGMT_value"].item()}
    dir_path = f"/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/train/{i:05d}"
    temp_dict["data"]={}
    for dirname in os.listdir(dir_path):
      temp_dict["data"][dirname] = {
          "orientation": orientation(f"{dir_path}/{dirname}")
      }
    data_dict[i] = temp_dict
    with open('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/data.json', 'w') as outfile:
      json.dump(data_dict, outfile, indent=4)

In [28]:
df_data = pd.json_normalize(data_dict.values())

In [29]:
df_data.head()

Unnamed: 0,id,class,data.FLAIR.orientation,data.T1w.orientation,data.T1wCE.orientation,data.T2w.orientation
0,0,1,"[1, -0, 0, -0, -0, -1]","[1, -0, 0, -0, 1, 0]","[1, -0, 0, -0, -0, -1]","[-0, 1, 0, -0, -0, -1]"
1,2,1,"[-0.04540329, 0.99896874, 0, -0, -0, -1]","[0.999586, 0.0266035, 0.0109314, -0.0265941, 0...","[0.99942703, 0.033847, 0, -0, -0, -1]","[-0, 1, 0, -0, -0, -1]"
2,3,0,"[-0.00396099, 0.99816103, 0.06048864, 0.065223...","[0.995705, 0.091316, 0.015294, -0.0919349, 0.9...","[0.99963662, 0.02440997, 0.01143617, 0.0083274...","[0.0209028, 0.997263, 0.0709206, 0.0570101, 0...."
3,5,1,"[0.999933, 0.0115853, 0, 0.00121281, -0.104678...","[0.997774, 0.0247505, 0.0619187, -0.0169125, 0...","[0.99782761, 0.0567354, 0.03348367, 0.0382568,...","[-0.0425975, 0.994858, -0.091886, 0.0297932, -..."
4,6,1,"[0.9980977, -0.06165212, 3.88e-06, 0.06165186,...","[1, -0, 0, -0, 1, 0]","[1, -0, 0, -0, -0, -1]","[0.0596739, 0.998218, 0, -0, -0, -1]"
