<a href="https://colab.research.google.com/github/fsternini/brats-task2/blob/development/brats_task2_launcher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Notebook setting

In [5]:
# First, verification of Python version is completed
!which python # should return /usr/local/bin/python
!python --version

/usr/local/bin/python
Python 3.10.12


In [6]:
# Google Drive is mounted, to reach all relevant data
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
!git clone https://github.com/fsternini/brats-task2.git
%cd 'brats-task2'
# Switch to the most updated branch
!git switch development
# Collect the most updated version of the repo
!git pull

Cloning into 'brats-task2'...
remote: Enumerating objects: 122, done.[K
remote: Counting objects: 100% (122/122), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 122 (delta 50), reused 73 (delta 20), pack-reused 0[K
Receiving objects: 100% (122/122), 99.13 KiB | 1.03 MiB/s, done.
Resolving deltas: 100% (50/50), done.
/content/brats-task2/brats-task2
Branch 'development' set up to track remote branch 'development' from 'origin'.
Switched to a new branch 'development'
Already up to date.


#Imports

In [8]:
# Install all requirements for the code
!pip install -r requirements.txt
#!pip install pydicom numpy torch torchvision matplotlib



In [20]:
import os, sys
import json
import pydicom
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from ipywidgets import interact, IntSlider, fixed

#Dataset reading

In [10]:
import json

class Dataset:
    """Object representing the dataset"""

    def __init__(self, json_file):
        self.json_file = json_file
        try:
            with open(self.json_file, 'r') as self.json_file:
                self.dataset = json.load(self.json_file)
        except FileNotFoundError:
            print(f"The file '{self.json_file}' was not found.")
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON: {e}")
        except Exception as e:
            print(f"An error occurred: {e}")

    def get_files_in_node(self, node_name):
        files = []
        node = self.dataset
        def traverse(node):
                if isinstance(node, dict):
                    for key, value in node.items():
                        if key == node_name:
                            if isinstance(value, dict):
                                for sub_key, sub_value in value.items():
                                    if isinstance(sub_value, str):
                                        files.append(sub_value)
                                    else:
                                        traverse(sub_value)
                        else:
                            traverse(value)
                elif isinstance(node, list):
                    for item in node:
                        traverse(item)
        traverse(node)
        return files

ds = Dataset('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/datasetdataset_structure.json')

In [11]:
class Volume:
    def __init__(self, dicom_dir):
        self.dicom_dir = dicom_dir
        self.volume_data = None

    def load_dicom_images(self):
        dicom_files = sorted([f for f in os.listdir(self.dicom_dir) if f.endswith('.dcm')])
        slices = [pydicom.dcmread(os.path.join(self.dicom_dir, f)) for f in dicom_files]
        slices.sort(key=lambda x: float(x.ImagePositionPatient[2]))

        self.volume_data = np.stack([s.pixel_array for s in slices])

    def show_slice(self, slice_idx, index=0):
        ind = {
            0:self.volume_data[slice_idx, :, :],
            1:self.volume_data[:, slice_idx, :],
            2:self.volume_data[:, :, slice_idx]
        }
        plt.imshow(ind[index], cmap='gray')
        plt.axis('off')
        plt.show()

    def show_volume(self,index=0):
        self.slices = self.volume_data.shape[index]
        interact(self.show_slice, slice_idx=IntSlider(min=0, max=self.slices-1, step=1, value=0), index=fixed(index))


##EDA

In [18]:
vol = Volume('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/train/00000/FLAIR')
vol.load_dicom_images()

In [21]:
vol.show_volume(0)

interactive(children=(IntSlider(value=0, description='slice_idx', max=399), Output()), _dom_classes=('widget-i…

In [22]:
import pandas as pd

In [23]:
df = pd.read_csv('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/train_labels.csv')

In [24]:
df

Unnamed: 0,BraTS21ID,MGMT_value
0,0,1
1,2,1
2,3,0
3,5,1
4,6,1
...,...,...
580,1005,1
581,1007,1
582,1008,1
583,1009,0


In [53]:
try:
  data_dict = json.load('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/data.json')
except:
  data_dict = {}

###Orientation analysis

In [27]:
def series_count(folder):
  s = set()
  p = folder
  for img in os.listdir(p):
    dcm = pydicom.dcmread(p+'/'+img)
    if dcm.SeriesInstanceUID not in s:
      s.add(dcm.SeriesInstanceUID)
  return len(s)
def orientation(folder):
  dcm = pydicom.dcmread(folder+'/'+os.listdir(folder)[0])

  return list(dcm.ImageOrientationPatient)

In [55]:
for i in df["BraTS21ID"]:
  if i not in data_dict:
    temp_dict = {"id":i, "class": df.loc[df["BraTS21ID"] == i]["MGMT_value"].item()}
    dir_path = f"/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/train/{i:05d}"
    temp_dict["data"]={}
    for dirname in os.listdir(dir_path):
      temp_dict["data"][dirname] = {
          "orientation": orientation(f"{dir_path}/{dirname}")
      }
    data_dict[i] = temp_dict
    with open('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/data.json', 'w') as outfile:
      json.dump(data_dict, outfile, indent=4)

KeyboardInterrupt: 

In [56]:
data_dict

{0: {'id': 0,
  'class': 1,
  'data': {'FLAIR': {'orientation': ['1.0',
     '-0.0',
     '0.0',
     '-0.0',
     '-0.0',
     '-1.0']},
   'T1w': {'orientation': ['1.0', '-0.0', '0.0', '-0.0', '1.0', '0.0']},
   'T1wCE': {'orientation': ['1.0', '-0.0', '0.0', '-0.0', '-0.0', '-1.0']},
   'T2w': {'orientation': ['-0.0', '1.0', '0.0', '-0.0', '-0.0', '-1.0']}}},
 2: {'id': 2,
  'class': 1,
  'data': {'FLAIR': {'orientation': ['-0.04540329',
     '0.99896874',
     '0.0',
     '-0.0',
     '-0.0',
     '-1.0']},
   'T1w': {'orientation': ['0.999586',
     '0.0266035',
     '0.0109314',
     '-0.0265941',
     '0.999646',
     '-0.00100292']},
   'T1wCE': {'orientation': ['0.99942703',
     '0.033847',
     '0.0',
     '-0.0',
     '-0.0',
     '-1.0']},
   'T2w': {'orientation': ['-0.0', '1.0', '0.0', '-0.0', '-0.0', '-1.0']}}},
 3: {'id': 3,
  'class': 0,
  'data': {'FLAIR': {'orientation': ['-0.00396099',
     '0.99816103',
     '0.06048864',
     '0.06522313',
     '0.06061819',
    

In [41]:
orientation_df = pd.read_json('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/data.json', orient='index')
orientation_df.iloc[0]

class                                                    1
data     {'FLAIR': {'folder': '/content/drive/MyDrive/D...
Name: 0, dtype: object

Unnamed: 0,class,data
0,1,{'FLAIR': {'folder': '/content/drive/MyDrive/D...
2,1,{'FLAIR': {'folder': '/content/drive/MyDrive/D...
3,0,{'FLAIR': {'folder': '/content/drive/MyDrive/D...
5,1,{'FLAIR': {'folder': '/content/drive/MyDrive/D...
6,1,{'FLAIR': {'folder': '/content/drive/MyDrive/D...


In [None]:
def explore(dictionary):
  if type(dictionary) == dict:
    for i in dictionary:
      print(f"the key {i} is of type {type(i)} and the value {dictionary[i]} is of type {type(dictionary[i])}")
      try:
        explore(dictionary[i])
      except:
        pass
  else:
    pass
 #   print(f"the element {dictionary} is of type {type(dictionary)}")

In [None]:
type(temp_dict['data']['FLAIR']['orientation'][0])

In [None]:
import numpy as np

type(list(temp_dict['data']['T1w']['orientation'])[0])

In [None]:
with open('/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/data.json', 'w') as outfile:
    json.dump(data_dict, outfile, indent=4)

In [None]:
dir_path=f"/content/drive/MyDrive/Data Science/rsna-miccai-brain-tumor-radiogenomic-classification/dataset/train/{i:05d}"

In [57]:
df_data = pd.json_normalize(data_dict.values())

In [51]:
df_data.set_index(data_dict.keys())

TypeError: The parameter "keys" may be a column key, one-dimensional array, or a list containing only valid column keys and one-dimensional arrays.. Received column of type <class 'dict_keys'>

In [58]:
df_data.head()

Unnamed: 0,id,class,data.FLAIR.orientation,data.T1w.orientation,data.T1wCE.orientation,data.T2w.orientation
0,0,1,"[1, -0, 0, -0, -0, -1]","[1, -0, 0, -0, 1, 0]","[1, -0, 0, -0, -0, -1]","[-0, 1, 0, -0, -0, -1]"
1,2,1,"[-0.04540329, 0.99896874, 0, -0, -0, -1]","[0.999586, 0.0266035, 0.0109314, -0.0265941, 0...","[0.99942703, 0.033847, 0, -0, -0, -1]","[-0, 1, 0, -0, -0, -1]"
2,3,0,"[-0.00396099, 0.99816103, 0.06048864, 0.065223...","[0.995705, 0.091316, 0.015294, -0.0919349, 0.9...","[0.99963662, 0.02440997, 0.01143617, 0.0083274...","[0.0209028, 0.997263, 0.0709206, 0.0570101, 0...."
3,5,1,"[0.999933, 0.0115853, 0, 0.00121281, -0.104678...","[0.997774, 0.0247505, 0.0619187, -0.0169125, 0...","[0.99782761, 0.0567354, 0.03348367, 0.0382568,...","[-0.0425975, 0.994858, -0.091886, 0.0297932, -..."
4,6,1,"[0.9980977, -0.06165212, 3.88e-06, 0.06165186,...","[1, -0, 0, -0, 1, 0]","[1, -0, 0, -0, -0, -1]","[0.0596739, 0.998218, 0, -0, -0, -1]"
