# asdnetv7 TPU
This notebook is part of ongoing research to develop computer vision solutions for diagnosing Autism Spectrum Disorder by way of functional magnetic resonance images (fMRIs).

----

This particular notebook is setup for use in Google Colab with a tensor processing unit (TPU).  Follow these steps to run the notebook:  
1. In Colab, ensure your instance is configured to use a TPU.  
2. Upload your GCS service account credential JSON file to the instance. 
3. Upload the colab_requirements.txt file in the config directory to the instance.  
4. Upload the asdnet_tools.zip file (compressed version of the directory) to the instance.  
5. Ensure the "colab" variable and the "first_run" variables are set to True.  
6. Run the first 2 cells in the notebook.  
7. Restart the instance runtime after the installations finish.  
8. Set the "first_run" variable to False. 
<br>  
**Everything is ready to go now!**

# colab instance setup

In [1]:
##############
colab = False   
first_run = False 
##############

In [2]:
if colab & first_run:
    !unzip -q ./asdnet_tools.zip 
    !pip3 install -r /content/colab_requirements.txt --upgrade  

# import

In [3]:
# requirements #
import pandas as pd 
import numpy as np 
import tensorflow as tf
from tensorflow import keras 
from sklearn.model_selection import train_test_split
import random 
import gc 
####
from collections import OrderedDict, Counter 
import os 
pj = os.path.join
import pickle 
from pprint import pprint
####
from asdnet_tools import augmentation, visualization, data_handler, modeler

ModuleNotFoundError: No module named 'tensorflow'

In [4]:
# import importlib
# importlib.reload(visualization)
# importlib.reload(augmentation)
# importlib.reload(data_handler)
# importlib.reload(modeler)

# gcp storage setup

In [5]:
if colab:
    service_acct_fpath = '/content/asd-cv-prediction-service-acct-1-key.json'
else:
    service_acct_fpath = '../config/asd-cv-prediction-service-acct-1-key.json'

dh = data_handler.DataHandler(service_acct_fpath, 'by_site')

# viz

In [None]:
viz = visualization.Viz(results_path='../results')

# data arrays

In [None]:
X, y = dh.get_pickled_data_arrays(site_ls=None, max_results=1)
print('DATA:\n', X.shape, '\n', y.shape, '\n', sep='')

In [None]:
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=.2, shuffle=True, stratify=y)
print('TRAIN:\n', x_train.shape, '\n', y_train.shape, '\n', sep='')
print('VAL:\n', x_val.shape, '\n', y_val.shape, sep='')

In [None]:
Counter(y_train), Counter(y_val)

In [None]:
viz.show_slice(x_train[1])

In [None]:
augmentor = augmentation.Augmentor([1, 1, 1, 1, 1])
viz.show_slice(augmentor.train_preprocessing(x_train[1], 1)[0].numpy())

# params

In [None]:
#################
model_id = 2000
#################
batch_size = 64
#####################################################
#            [None, rotate, rotate90, warp, noise]
augweights = [1,      0,       0,      2,     2]
#####################################################

params = OrderedDict({
    'kreg': 1e-4,

    'b1do': .2,
    'b2do': .2,
    'b3do': .3,

    'b4do': .2,
    'b5do': .2,
    'b6do': .3,

    'b7do': .3,
    'b8do': .3,
    'b9do': .4,

    'b10do': .2,
    'b11do': .2,
    'b12do': .5,

    'topdrop1': .5,
    'topdrop2': .5
})

# reproducibility

In [None]:
tf.random.set_seed(92)
np.random.seed(92)
random.seed(92)

In [None]:
keras.backend.clear_session()
tf.get_logger().setLevel('ERROR')
_ = gc.collect() 

# tpu strategy

In [None]:
try:
    # Get a handle to the attached TPU. On GCP it will be the CloudTPU itself
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
    print("Device:", tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
    print('STATEGY: TPU CLUSTER', "Number of replicas:", strategy.num_replicas_in_sync)
except:
    print('problems with the TPU connection...')

######################      MODEL CREATION      #################################
with strategy.scope():
    m = modeler.create_model_skip_attention(
        model_id=model_id,
        params=params,
        compile=True, 
        batch_size=batch_size, 
        summary=False
        ) 
# model_id += 1
##############

# data augmentor

In [None]:
# data augmentor setup #
augmentor = augmentation.Augmentor(augweights)

# datasets

In [None]:
# get train Dataset #
train_dataset = dh.get_dataset(
    x_train, y_train, batch_size, augmentor.train_preprocessing, train=True
)
# get val Dataset #
val_dataset = dh.get_dataset(x_val, y_val, batch_size)

# callbacks

In [None]:
reduce_lr_cb = keras.callbacks.ReduceLROnPlateau(
    monitor='loss', verbose=1, patience=20, mode='min', 
    cooldown=1, min_lr=1e-8
)
earlystop_cb = keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=.001, patience=60, verbose=1, restore_best_weights=True,
)

callbacks = [reduce_lr_cb, earlystop_cb]

# fit

In [None]:
# save initial weights #
dh.save_model_gcp(m, 'asdnetv4_results', init=True)

In [None]:
start = datetime.datetime.now() 
# fit model #
h = m.fit(
    x=train_dataset, 
    steps_per_epoch=(x_train.shape[0] * sum(augweights) // batch_size + 1), 
    validation_data=val_dataset,
    validation_steps=(x_val.shape[0] // batch_size + 1),
    epochs=200, 
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=0
)
print('TOTAL EXEC TIME: ', round((datetime.datetime.now() - start).seconds / 60), ' mins')
# save trained model & weights #
dh.save_model_gcp(m, 'asdnetv4_results', init=False)

# visualize


In [None]:
pprint(params)
print(augweights)

class_ls=['asd', 'typ']

viz.plot_metrics(h.history, m.name)
viz.plot_confusion_matrix(m, x_val, y_val, class_ls)
viz.get_classification_report(m, x_val, y_val, class_ls)
viz.plot_model_structure(m)

# clean tpu system

In [None]:
# shutdown tpu devices and clear all caches
tf.tpu.experimental.shutdown_tpu_system(
    cluster_resolver=None
)