# <center>Brain tumor classification using convolutional neural networks<center>


## Generic library imports

In [1]:
#Base imports
import sys
import os
import pathlib
import importlib # for reloading local class 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')
%matplotlib inline


## Check notebook environment 
- Runtime environment: Google Colab or local
- Setup paths for reading datasets and saving outputs (models, figures, etc)
    - If in *Colab Environment* copy to the running machine from Google Drive
        - datafiles
        - `btc_helpers.py` python file for importing helper classes and functions
- Check availability of CPU/GPU/TPU and get the `strategy` to use for running models


In [11]:
# output folder and file prefix
outputSubfolder = 'btc_cnn'  # easy to manage if you create a subfoder under output
saveFilePrefix = 'btc_cnn_'  # need the 2 underscores for subsequent parsing of attributes ffrom filename 

##----------Check colab environment---------##
colabEnv = False
try:
    from google.colab import drive
    colabEnv=True
    drive.mount('/content/drive')
    print('In Google Colab environment. Mounted drive at /content/drive/MyDrive')
except:
    print('Not in Google Colab environment')
    
##--------------Setup paths----------------##
if colabEnv:
    # make a directory at current working directorys
    pathlib.Path('DataSetBrainTumor').mkdir(parents=True,exist_ok=True)
    !cp /content/drive/MyDrive/ColabData/07-Capstone/DataSetBrainTumor/*.h5 DataSetBrainTumor/   
    !cp -R /content/drive/MyDrive/DataSciProjects/GL-IDSS-ADSB/07-Capstone/btc .
    
    # Setup Paths for reading dataset and creating output (Google Drive)
    dataPath = "./DataSetBrainTumor"
    outputPath = '/content/drive/MyDrive/MIT-IDSS-Capstone/07-Capstone/notebooks/output'
else:
    # Setup Paths for reading dataset and creating output
    dataPath = '../DataSetBrainTumor' # dir or link to dir for running local
    outputPath = './output' # dir or link to dir (usually under the location of current notebook)

outputPath = os.path.join(outputPath,outputSubfolder)
# Output path for figures, models, and model-tuning
modelPath = os.path.join(outputPath,'models')
figurePath = os.path.join(outputPath,'figures')
modelTunerPath = os.path.join(outputPath,'model-tuner')

# Create output paths if not exist
pathlib.Path(modelPath).mkdir(parents=True,exist_ok=True)
pathlib.Path(figurePath).mkdir(parents=True,exist_ok=True)
pathlib.Path(modelTunerPath).mkdir(parents=True,exist_ok=True)
    
# print data location and files:
print(f'\nContents of data folder [{dataPath}]')
print(os.listdir(dataPath))
# print python src location and files:
print(f'\nContents of python source folder [btc]')
print(os.listdir('btc'))
# print output folder locations
print(f'\nOutput folders [{outputPath}]')
print(os.listdir(outputPath))
print(f'\nPrefix for all saved files {saveFilePrefix}')
print()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In Google Colab environment. Mounted drive at /content/drive/MyDrive

Contents of data folder [./DataSetBrainTumor]
['Training_150.h5', 'Training_64.h5', 'Testing_150.h5', 'Testing_256.h5', 'Testing_512.h5', 'Testing_64.h5', 'Testing_224.h5', 'Training_512.h5', 'Training_224.h5', 'Training_256.h5']

Contents of python source folder [btc]
['btc_helpers.py', '__init__.py', 'test_btchelpers.py', '__pycache__']

Output folders [/content/drive/MyDrive/MIT-IDSS-Capstone/07-Capstone/notebooks/output/btc_cnn]
['models', 'figures', 'model-tuner']

Prefix for all saved files btc_cnn_



In [12]:
import btc.btc_helpers as btc

strategy = btc.getCPUorGPUorTPUStrategy()






INFO:tensorflow:Initializing the TPU system: grpc://10.81.58.90:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.81.58.90:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


Running on TPU
INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


In [17]:
dataUtil = btc.BTCDataUtil(dataPath,'Training','Testing')
imgSize=256
trainArr,testarr,trainDf,testDf = dataUtil.getDataSet(imgSize)
display(btc.getLabelDistributionDf({'train':trainDf['tumorCategory'],'test':testDf['tumorCategory']}))

Updating cache with training and testing datasets
Caching train and test datasets
Reading HDF5 file ./DataSetBrainTumor/Training_256.h5
Reading HDF5 file ./DataSetBrainTumor/Testing_256.h5
Returning cached [Original] training and testing datasets


Unnamed: 0,train,trainFraction,test,testFraction
glioma,829.0,0.287747,100.0,0.248756
meningioma,830.0,0.288094,115.0,0.28607
no_tumor,395.0,0.137105,113.0,0.281095
pituitary,827.0,0.287053,74.0,0.18408
Total,2881.0,1.0,402.0,1.0


In [16]:
trainArr,testarr,trainDf,testDf = dataUtil.getDataSet(150,mergeSplit='all')
display(btc.getLabelDistributionDf({'train':trainDf['tumorCategory'],'test':testDf['tumorCategory']}))

merging cached training and testing datasets

Splitting ratio for merged dataset is set to 0.20
Returning cached [Merged&Split] training and testing datasets


Unnamed: 0,train,trainFraction,test,testFraction
glioma,717.0,0.273039,212.0,0.322679
meningioma,762.0,0.290175,183.0,0.278539
no_tumor,420.0,0.159939,88.0,0.133942
pituitary,727.0,0.276847,174.0,0.26484
Total,2626.0,1.0,657.0,1.0
