In [1]:
#AML save importance scores IF inclusion optimus scrambler 


import pandas as pd
import numpy as np
from pickle import dump
from pickle import load


import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
import scipy as scp
import random

import keras
from keras.models import Sequential, Model, load_model
from keras.preprocessing import sequence
from keras.optimizers import RMSprop
from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.convolutional import Conv1D



import tensorflow as tf
import os 
from tensorflow.python.framework import ops

#python imports 
#functions from Optimus 5-Prime model
from optimusFunctions import *
#dna sequence graphing 
from sklearn import preprocessing
from sklearn import metrics
import scipy as scp




Using TensorFlow backend.


In [2]:
#IMPORTANT!  Only run this once due to the registered gradient 


@ops.RegisterGradient("STMul")
def st_mul(op, grad):
    return [grad, grad]




In [3]:
def st_sampled_softmax(logits):
    with ops.name_scope("STSampledSoftmax") as namescope :
        nt_probs = tf.nn.softmax(logits)
        onehot_dim = logits.get_shape().as_list()[1]
        sampled_onehot = tf.one_hot(tf.squeeze(tf.multinomial(logits, 1), 1), onehot_dim, 1.0, 0.0)
        with tf.get_default_graph().gradient_override_map({'Ceil': 'Identity', 'Mul': 'STMul'}):
            return tf.ceil(sampled_onehot * nt_probs)

def st_hardmax_softmax(logits):
    with ops.name_scope("STHardmaxSoftmax") as namescope :
        nt_probs = tf.nn.softmax(logits)
        onehot_dim = logits.get_shape().as_list()[1]
        sampled_onehot = tf.one_hot(tf.argmax(nt_probs, 1), onehot_dim, 1.0, 0.0)
        with tf.get_default_graph().gradient_override_map({'Ceil': 'Identity', 'Mul': 'STMul'}):
            return tf.ceil(sampled_onehot * nt_probs)
        
#Gumbel Distribution Sampler
def gumbel_softmax(logits, temperature=0.5) :
    gumbel_dist = tf.contrib.distributions.RelaxedOneHotCategorical(temperature, logits=logits)
    batch_dim = logits.get_shape().as_list()[0]
    onehot_dim = logits.get_shape().as_list()[1]
    return gumbel_dist.sample()


In [4]:
#model functions for loading optimus scramblers 

import keras.backend as K

def mask_dropout_multi_scale(mask, drop_scales=[1, 2, 4, 7], min_drop_rate=0.0, max_drop_rate=0.5) :
    rates = K.random_uniform(shape=(K.shape(mask)[0], 1, 1, 1), minval=min_drop_rate, maxval=max_drop_rate)
    scale_logits = K.random_uniform(shape=(K.shape(mask)[0], len(drop_scales), 1, 1, 1), minval=-5., maxval=5.)
    scale_probs = K.softmax(scale_logits, axis=1)
    ret_mask = mask
    for drop_scale_ix, drop_scale in enumerate(drop_scales) :
        ret_mask = mask_dropout(ret_mask, rates * scale_probs[:, drop_scale_ix, ...], drop_scale=drop_scale)
    return K.switch(K.learning_phase(), ret_mask, mask)
def mask_dropout(mask, drop_rates, drop_scale=1) :
    random_tensor_downsampled = K.random_uniform(shape=(
        K.shape(mask)[0],
        1,
        K.cast(K.shape(mask)[2] / drop_scale, dtype=tf.int32),
        K.shape(mask)[3]
    ), minval=0.0, maxval=1.0)
    keep_mask_downsampled = random_tensor_downsampled >= drop_rates
    keep_mask = K.repeat_elements(keep_mask_downsampled, rep=drop_scale, axis=2)
    ret_mask = mask * K.cast(keep_mask, dtype=tf.float32)
    return ret_mask
def mask_dropout_single_scale(mask, drop_scale=1, min_drop_rate=0.0, max_drop_rate=0.5) :
    rates = K.random_uniform(shape=(K.shape(mask)[0], 1, 1, 1), minval=min_drop_rate, maxval=max_drop_rate)
    random_tensor_downsampled = K.random_uniform(shape=(
        K.shape(mask)[0],
        1,
        K.cast(K.shape(mask)[2] / drop_scale, dtype=tf.int32),
        K.shape(mask)[3]
    ), minval=0.0, maxval=1.0)
    keep_mask_downsampled = random_tensor_downsampled >= rates
    keep_mask = K.repeat_elements(keep_mask_downsampled, rep=drop_scale, axis=2)
    ret_mask = mask * K.cast(keep_mask, dtype=tf.float32)
    return K.switch(K.learning_phase(), ret_mask, mask)

In [8]:
def getImportanceScoresForPredictedSet(model_name, predictor_path, x_test, batch_size = 32):
    #Load models
    save_dir = 'saved_models'

    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    model_path = os.path.join(save_dir, model_name)
    scrambler_model = load_model(model_path, custom_objects={
        'st_sampled_softmax' : st_sampled_softmax,
        'gumbel_softmax': gumbel_softmax
        
    })

    print('Loaded scrambler model %s ' % (model_path))
    
    #Load Predictor
    print ("loading predictor: ", predictor_path)
    predictor = load_model(predictor_path)
    predictor.trainable = False
    predictor.compile(optimizer=keras.optimizers.SGD(lr=0.1), loss='mean_squared_error')    

    #Pad x_test
    n_pad = 32 - x_test.shape[0] % 32 if x_test.shape[0] % 32 != 0 else 0
    
    x_test = np.concatenate([
        x_test,
        np.zeros((n_pad, x_test.shape[1], x_test.shape[2], x_test.shape[3]))
    ], axis=0)

    s_test = np.zeros((x_test.shape[0], 1))

    _, pwm_test, sample_test, scores = scrambler_model.predict(x=[x_test, s_test])

    print (scores.shape)
    return scores[:-n_pad]


def getImportanceScoresForPredictedSet_dropout(model_name, predictor_path, x_test, batch_size = 32):
    
    scrambler_model = load_model(model_name, custom_objects={
        'st_sampled_softmax' : st_sampled_softmax,
        'gumbel_softmax': gumbel_softmax,
        "mask_dropout_multi_scale": mask_dropout_multi_scale
    })
    
    #Load Predictor
    print ("loading predictor: ", predictor_path)
    predictor = load_model(predictor_path)
    predictor.trainable = False
    predictor.compile(optimizer=keras.optimizers.SGD(lr=0.1), loss='mean_squared_error')    

    #Pad x_test
    n_pad = 32 - x_test.shape[0] % 32 if x_test.shape[0] % 32 != 0 else 0
    
    x_test = np.concatenate([
        x_test,
        np.zeros((n_pad, x_test.shape[1], x_test.shape[2], x_test.shape[3]))
    ], axis=0)
    
    s_test = np.zeros((x_test.shape[0], 1))
    drop_pattern = np.ones((x_test.shape[0], 1, 50, 1))

    _, pwm_test, sample_test, scores = scrambler_model.predict(x=[x_test, drop_pattern, s_test])

    print (scores.shape)
    return scores[:-n_pad]

In [9]:

#Load Predictor
#############################################################
predictor_path = 'optimusRetrainedMain.hdf5'
model = load_model(predictor_path)

sequence_template = "N"*50


allFiles = ["optimus5_synthetic_random_insert_if_uorf_1_start_1_stop_variable_loc_512.csv",
            "optimus5_synthetic_random_insert_if_uorf_1_start_2_stop_variable_loc_512.csv",
            "optimus5_synthetic_random_insert_if_uorf_2_start_1_stop_variable_loc_512.csv",
            "optimus5_synthetic_random_insert_if_uorf_2_start_2_stop_variable_loc_512.csv",
            "optimus5_synthetic_examples_3.csv"]



In [11]:
saveDir = "./"
for csv_to_open in allFiles:
    
    #Load dataset for benchmarking 
    dataset_name = csv_to_open.replace(".csv", "")
    benchmarkSet = pd.read_csv("./" + csv_to_open) #open from scores folder 
 

    #get correct input shape 
    seq_e_test = one_hot_encode(benchmarkSet, seq_len=50)
    benchmarkSet_seqs = seq_e_test
    benchmarkSet_seqs = np.reshape(benchmarkSet_seqs, (benchmarkSet_seqs.shape[0], 1, benchmarkSet_seqs.shape[1], benchmarkSet_seqs.shape[2]))

    #m1 
    #0.125 model 
    model_name = "saved_models/autoscrambler_dataset_egfp_unmod_1_sample_mode_gumbel_n_samples_32_resnet_5_4_32_3_00_00_to_015_n_epochs_50_target_bits_0125_example_if_uorf_seqs_drop_multi_scale_weight_10.h5"
    scores= getImportanceScoresForPredictedSet_dropout(model_name, predictor_path, benchmarkSet_seqs)
    save_name = saveDir + "_10_dropout_autoscrambler_" + dataset_name
    np.save(save_name + "_importance_scores_test", scores)
    
    #m2 
    #0.25 model 
    model_025_IF_only = "saved_models/autoscrambler_dataset_egfp_unmod_1_sample_mode_gumbel_n_samples_32_resnet_5_4_32_3_00_00_to_015_n_epochs_50_target_bits_0125_example_if_uorf_seqs_drop_multi_scale_weight_1.h5"
    scores  = getImportanceScoresForPredictedSet_dropout(model_025_IF_only, predictor_path, benchmarkSet_seqs)
    save_name = saveDir + "_1_dropout_autoscrambler_" + dataset_name
    np.save(save_name + "_importance_scores_test", scores)
    


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.contrib.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlo