In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pydicom as dcm
from ipywidgets import interact
%matplotlib inline

# Research and About the Data

**MRI**
           
    Magnetic resonance imaging (MRI) is a medical imaging technique that uses a magnetic field and computer-generated radio waves to create detailed images of the organs and tissues in your body.

    Most MRI machines are large, tube-shaped magnets. When you lie inside an MRI machine, the magnetic field temporarily realigns water molecules in your body. Radio waves cause these aligned atoms to produce faint signals, which are used to create cross-sectional MRI images — like slices in a loaf of bread.

    The MRI machine can also produce 3D images that can be viewed from different angles.
    
    It is one of the most commonly used tests in neurology and neurosurgery. MRI provides exquisite detail of brain, spinal cord and vascular anatomy, and has the advantage of being able to visualize anatomy in all three planes: axial, sagittal and coronal. 

![](https://case.edu/med/neurology/NR/mri%20slices%20new.jpg)

**mpMRI**

    A multi-parametric magnetic resonance imaging (mpMRI) scan is a special type of scan that creates more detailed pictures of your prostate than a standard MRI scan. It does this by combining four different types of image. These images give your doctor information about whether or not there is any cancer inside your prostate.

    During the mpMRI scan, you will be injected with a Gadolinium-based contrast agent which is an essential part of this type of imaging. It allows for a clearer picture of the prostate. The gadolinium (a metal ion) in these dynamic contrast agents has been chemically adapted to make it safe to use as part of a mpMRI scan. Part of the quality control for mpMRI involves using the lowest possible effective dose of the contrast agent. There is not yet any clinical evidence that gadolinium causes any harm when used as a contrast agent for mpMRI, however, we will continue to monitor the situation carefully. The contrast agent will not be administered if it's clinically contraindicated, for example, when dealing with kidney problems.
    
Visit this site for further knowledge: https://case.edu/med/neurology/NR/MRI%20Basics.htm | https://www.youtube.com/watch?v=rumRGO_2H0E

## DATA

The Images provided are mpMRI images, which is, Multi-Parametric Magnetic Resonance Imaging. Each mpMRI given has the following format:
    
    > 0000
    |
    |-- FLAIR -Images
    |--T1w - Images
    |--T1wCE - Post Contrast - Images
    |--T2w - Images
    
**MRI IMAGING SEQUENCES**

Repetition Time (TR) is the amount of time between successive pulse sequences applied to the same slice. Time to Echo (TE) is the time between the delivery of the RF pulse and the receipt of the echo signal. 

The most common MRI sequences are T1-weighted and T2-weighted scans. **T1-weighted images are produced by using short TE and TR times. The contrast and brightness of the image are predominately determined by T1 properties of tissue.** Conversely, **T2-weighted images are produced by using longer TE and TR times. In these images, the contrast and brightness are predominately determined by the T2 properties of tissue.**

*In general, T1- and T2-weighted images can be easily differentiated by looking the CSF. CSF is dark on T1-weighted imaging and bright on T2-weighted imaging.*

A third commonly used sequence is the **Fluid Attenuated Inversion Recovery (Flair)**. The Flair sequence is similar to a T2-weighted image except that the TE and TR times are very long. **By doing so, abnormalities remain bright but normal CSF fluid is attenuated and made dark. This sequence is very sensitive to pathology and makes the differentiation between CSF and an abnormality much easier.**

**T1-weighted imaging can also be performed while infusing Gadolinium (Gad)**. Gad is a non-toxic paramagnetic contrast enhancement agent. When injected during the scan, Gad changes signal intensities by shortening T1. Thus, Gad is very bright on T1-weighted images. Gad enhanced images are especially useful in looking at vascular structures and breakdown in the blood-brain barrier.

In [None]:
flairs = t1ws = t2ws = t1gds = 0
study = {}
for p in os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train'):
    for i in os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/' + p):
        study[p] = {}
        if i == 'FLAIR':
            flairs = len(os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/' + p + '/' + i))
        elif i == 'T1w':
            t1ws = len(os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/' + p + '/' + i))
        elif i == 'T2w':
            t2ws = len(os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/' + p + '/' + i))
        elif i == 'T1wCE':
            t1gds = len(os.listdir('../input/rsna-miccai-brain-tumor-radiogenomic-classification/train/' + p + '/' + i))
        study[p]['FLAIR'] = flairs;study[p]['T1w'] = t1ws;study[p]['T2w'] = t2ws;study[p]['T1wCE'] = t1gds
print(f'Total of {len(os.listdir("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train"))} patients')
print('Study Directory Created')

In [None]:
study = pd.DataFrame(study).transpose()
plt.figure(figsize=(30,5))
tmp = study.sort_values('FLAIR',ascending=False)['FLAIR'][:10];plt.subplot(141);sns.barplot(x=tmp.index,y=tmp);plt.title('FLAIR TOP 10')
tmp = study.sort_values('T1w',ascending=False)['T1w'][:10];plt.subplot(142);sns.barplot(x=tmp.index,y=tmp);plt.title('T1w TOP 10')
tmp = study.sort_values('T2w',ascending=False)['T2w'][:10];plt.subplot(143);sns.barplot(x=tmp.index,y=tmp);plt.title('T2w TOP 10')
tmp = study.sort_values('T1wCE',ascending=False)['T1wCE'][:10];plt.subplot(144);sns.barplot(x=tmp.index,y=tmp);plt.title('T1wCE TOP 10')
plt.show()
plt.figure(figsize=(20,5))
tmp = ['FLAIR','T1w','T2w','T1wCE']
tmp2 = []
for col in tmp:
    tmp2.append(sum(study[col]))
sns.barplot(x=tmp,y=tmp2)
plt.show()

On Examining the Patient dictionary I created, I found out that the different studies had different number of flair's, t1w's, t2w's and t1wce's. This was the case since the images were protocoled for a human to study and not for a computer.

# Analysis and Convenience 

In [None]:
df = pd.read_csv("../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv")
df.columns = ['id','mgmt']
df['id'] = df['id'].apply(lambda x : str(x).zfill(5))
df.head()

In [None]:
study.head()

BraTS2ID is the Study Number (0 refers to study 00000, 2 refers to 00002, etc.)
MGMT_value is MGMT promoter methylation status

Such radiogenomic models could improve the efficiency and accuracy of diagnosis, prognosis and treatment planning for patients with glioblastoma.

In [None]:
sns.countplot(x=df['mgmt']) # Fairly equally distributed
plt.show()

## Visualization

The Images are in the DICOM Format, such that,

![](https://www.researchgate.net/publication/341136795/figure/fig1/AS:887639720865792@1588641209109/A-stack-of-MRI-2D-images-a-The-resolution-in-the-through-plane-direction-coronal-and.jpg)

Each image in a study belonging to a particular mriType, when stacked upon each other will give us the 3d model of the brain is all 3 axises. Some slices are missing since, these images were initially expected to be used solely by humans.

In [None]:
# Paths -- DON'T CHANGE -- 
train_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/train'
test_path = '../input/rsna-miccai-brain-tumor-radiogenomic-classification/test'

# Key for Sorting
def crit(x):
    return int(x.split('-')[1].split('.')[0])

# Reading DICOM Files
def imread(path):
    return dcm.dcmread(path).pixel_array

# Showing DICOM Files
def imshow(arr):
    plt.imshow(arr,cmap='gray')
    plt.axis('off')

# Showing the Entire 3D Model
def get_3d(idx,mriType,train=True):
    path = os.path.join(train_path,idx,mriType) if train else os.path.join(test_path,idx,mriType)
    final = []
    imgs = os.listdir(path)
    imgs.sort(key=crit)
    for img in imgs:
        final.append(dcm.dcmread(os.path.join(path,img)).pixel_array)
    return np.transpose(np.array(final),(1,2,0)),len(final)

# Add all the MRI's to get a 512 x 512 Image
def get_added(idx,mriType,train=True):
    img,nlayers =  get_3d(idx,mriType,train)
    return np.mean(np.transpose(img,(2,0,1)),axis=0)

# Returns the Image of the most central layer (the mean of the central layer will be the largest)
def get_max(idx,mriType,train=True):
    path = os.path.join(train_path,idx,mriType) if train else os.path.join(test_path,idx,mriType)
    imgs = os.listdir(path)
    max_layer = imread(os.path.join(path,imgs[0]))
    max_val = np.mean(max_layer)
    for img in imgs:
        tmp = np.mean(imread(os.path.join(path,img)))
        if tmp > max_val:
            max_val = tmp
            max_layer = imread(os.path.join(path,img))
    return max_layer

# To get the label for a Study
def get_mgmt(idx):
    return df.loc[idx][0]

### Visualising a 3D Model

In [None]:
arr3d,nlayers = get_3d('00000','FLAIR')
def explore_3d(layer):
    imshow(arr3d[:,:,layer])
    plt.title(str(layer))
    plt.show()
    return layer

interact(explore_3d,layer=(1,nlayers-1))

In [None]:
plt.figure(figsize=(20,8))
plt.subplot(141);imshow(get_added('00000','FLAIR'));plt.title('FLAIR')
plt.subplot(142);imshow(get_added('00000','T1w'));plt.title('T1w')
plt.subplot(143);imshow(get_added('00000','T2w'));plt.title('T2w')
plt.subplot(144);imshow(get_added('00000','T1wCE'));plt.title('T1wCE')
plt.show()

In [None]:
plt.figure(figsize=(20,8))
plt.subplot(141);imshow(get_max('00561','FLAIR'));plt.title('FLAIR')
plt.subplot(142);imshow(get_max('00561','T1w'));plt.title('T1w')
plt.subplot(143);imshow(get_max('00561','T2w'));plt.title('T2w')
plt.subplot(144);imshow(get_max('00561','T1wCE'));plt.title('T1wCE')
plt.show()