In [1]:
from __future__ import print_function
import keras
from keras.models import Sequential, Model, load_model

import tensorflow as tf

import pandas as pd

import os
import pickle
import numpy as np

import scipy.sparse as sp
import scipy.io as spio

import isolearn.io as isoio
import isolearn.keras as iso

from aparent.data.aparent_data_array import load_data


Using TensorFlow backend.


In [2]:
#Load designed MPRA data (not collapsed over barcodes)

file_path = '../../data/prepared_data/apa_array_data/'
array_gens = load_data(batch_size=1, file_path=file_path)


Designed MPRA size = 186066
Training set size = 0
Validation set size = 0
Test set size = 186066


In [3]:
#Retrieve sequence one hots

x = np.concatenate([array_gens['all'][i][0][0] for i in range(len(array_gens['all']))], axis=0)


In [4]:
#Load APARENT Resnet

model_name = 'aparent_all_libs_resnet_no_clinvar_wt_ep_5'

save_dir = os.path.join(os.getcwd(), '../../saved_models')
model_path = os.path.join(save_dir, model_name + '.h5')

aparent_model = load_model(model_path)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




In [5]:
#Pad x

remainder = x.shape[0] % 32
to_fill = 32 - remainder

x = np.concatenate([x, np.zeros((to_fill, 1, 205, 4))], axis=0)


In [6]:
#Predict

l = np.zeros((x.shape[0], 13))
l[:, 11] = 1.

iso_pred, cut_pred = aparent_model.predict(x=[x, l], batch_size=32, verbose=True)

#Calculate isoform logits
logodds_pred = np.ravel(np.log(iso_pred / (1. - iso_pred)))




In [7]:
#Remove padded examples

iso_pred = iso_pred[:-to_fill, ...]
cut_pred = cut_pred[:-to_fill, ...]
logodds_pred = logodds_pred[:-to_fill, ...]


In [8]:
#Copy the test set dataframe and store isoform predictions

array_df = array_gens['all'].sources['df'].reset_index().copy()

array_df['iso_pred'] = iso_pred
array_df['logodds_pred'] = logodds_pred

array_df = array_df[['master_seq', 'seq', 'iso_pred', 'logodds_pred']]


In [9]:
#Dump prediction dataframe and cut probability matrix

isoio.dump({'array_df' : array_df, 'cut_prob' : sp.csr_matrix(cut_pred)}, 'apa_array_data_legacy/' + model_name + '_predictions_seq')