In [6]:
%load_ext autoreload
%autoreload 2
%load_ext autotime

import os
import sys
sys.path.append('..')

import splintr as sp
from splintr.splice import rmats_subset_top_events
# sp.util.verbose = True

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
from torchsummary import summary

from ax.plot.contour import plot_contour, interact_contour
from ax.plot.trace import optimization_trace_single_method
from ax.service.managed_loop import optimize
from ax.utils.notebook.plotting import render, init_notebook_plotting
init_notebook_plotting()

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

seed = 99
np.random.seed(seed)
torch.manual_seed(seed)
torch.set_num_threads=16

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime


[INFO 08-07 22:28:03] ipy_plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.


time: 7.37 ms


In [2]:
# Parameters
data_dir = '../data/features'
feature_file = f'{data_dir}/SE.txt'
feature_df = rmats_subset_top_events(feature_file, 5)
feature_df = feature_df.loc[feature_df.IncLevelDifference > 0] # upregulated AS events

time: 93.1 ms


In [3]:
datasets, samplers, label_names = sp.learn.load_datasets(feature_df, '../data/hg19.fa', seq_size=384, augment_k=5)
train_loader, valid_loader, test_loader = sp.learn.init_loaders(datasets, samplers, batch_size=32)

HBox(children=(IntProgress(value=0, description='Datasets', max=3, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


time: 8.98 s


In [4]:
def train_evaluate(params):
    train_loader, valid_loader, test_loader = sp.learn.init_loaders(datasets, samplers, batch_size=params['batch_size'])
    net, valid_acc = sp.learn.fit(params, train_loader, valid_loader, num_epochs=20, label_names=label_names, log_dir='/home/ubuntu/tb/8-07-19-6class/')
    return valid_acc

time: 1.78 ms


In [45]:
best_params, values, experiment, model = optimize(
    parameters=[
        {"name": "batch_size", "type": "range", "bounds": [10, 1000], "log_scale": True},
        {"name": "filter1", "type": "range", "bounds": [4, 256], "log_scale": True},
        {"name": "filter2", "type": "range", "bounds": [4, 256], "log_scale": True},
        {"name": "main_kernel", "type": "range", "bounds": [8, 40]},
        {"name": "receptive_layers", "type": "range", "bounds": [0, 1]},
        {"name": "lr", "type": "range", "bounds": [1e-6, 0.01], "log_scale": True},
        {"name": "fc_out", "type": "range", "bounds": [6, 32], "log_scale": True},
#         {"name": "weight_decay", "type": "range", "bounds": [1e-4, 0.1], "log_scale": True}
    ],
    evaluation_function=train_evaluate,
    objective_name='accuracy_loss',
    total_trials=20)

[INFO 08-07 23:01:44] ax.service.utils.dispatch: Using Bayesian Optimization generation strategy. Iterations after 7 will take longer to generate due to model-fitting.
[INFO 08-07 23:01:44] ax.service.managed_loop: Started full optimization with 20 steps.
[INFO 08-07 23:01:44] ax.service.managed_loop: Running optimization trial 1...


{'batch_size': 206, 'filter1': 50, 'filter2': 163, 'main_kernel': 27, 'receptive_layers': 0, 'lr': 5.280713535656538e-06, 'fc_out': 8}
Input:  torch.Size([2, 4, 4, 384])
CNN1 output:  torch.Size([2, 50, 4, 128])
Sequential(
  (0): Conv2d(50, 163, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
  (1): ReLU()
  (maxpool): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
)
CNN2 output:  torch.Size([2, 163, 4, 64])
FC input:  torch.Size([2, 41728])
Final output:  torch.Size([2, 6])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 50, 4, 128]           5,450
              ReLU-2           [-1, 50, 4, 128]               0
            Conv2d-3          [-1, 163, 4, 128]          24,613
              ReLU-4          [-1, 163, 4, 128]               0
         MaxPool2d-5           [-1, 163, 4, 64]               0
           Dropout-6         

HBox(children=(IntProgress(value=0, max=20), HTML(value='')))

Input:  torch.Size([206, 4, 4, 384])
CNN1 output:  torch.Size([206, 50, 4, 128])
Sequential(
  (0): Conv2d(50, 163, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
  (1): ReLU()
  (maxpool): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
)
CNN2 output:  torch.Size([206, 163, 4, 64])
FC input:  torch.Size([206, 41728])
Final output:  torch.Size([206, 6])
Input:  torch.Size([206, 4, 4, 384])
CNN1 output:  torch.Size([206, 50, 4, 128])
Sequential(
  (0): Conv2d(50, 163, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
  (1): ReLU()
  (maxpool): MaxPool2d(kernel_size=(1, 2), stride=(1, 2), padding=0, dilation=1, ceil_mode=False)
)
CNN2 output:  torch.Size([206, 163, 4, 64])
FC input:  torch.Size([206, 41728])
Final output:  torch.Size([206, 6])
Input:  torch.Size([206, 4, 4, 384])
CNN1 output:  torch.Size([206, 50, 4, 128])
Sequential(
  (0): Conv2d(50, 163, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
  (1): ReLU()
  (maxpool): MaxPool2d(ke

KeyboardInterrupt: 

time: 1.35 s


In [None]:
best_params

In [None]:
means, covariances = values
means, covariances

In [None]:
render(interact_contour(model=model, metric_name='accuracy'))

## Check to see if model accuracy improves as we identify better hyperparameters

In [None]:
# `plot_single_method` expects a 2-d array of means, because it expects to average means from multiple 
# optimization runs, so we wrap out best objectives array in another array.
best_objectives = np.array([[trial.objective_mean*100 for trial in experiment.trials.values()]])
best_objective_plot = optimization_trace_single_method(
    y=np.maximum.accumulate(best_objectives, axis=1),
    title="Model performance vs. # of iterations",
    ylabel="Classification Accuracy, %",
)
render(best_objective_plot)