In [1]:
# ACKNOWLEDGEMENTS:
# Code copied and modified (MIT License) from 
# (https://github.com/JanZrimec/DeepExpression/blob/master/scripts/Chapter_3_1.ipynb)
# Original author: Jan Zrimec (https://github.com/JanZrimec)

import numpy as np
import pandas as pd

from scipy import stats, special, spatial

import matplotlib.pyplot as plt
import seaborn as sns

from matplotlib.lines import Line2D
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

import importlib.util
import os

import warnings
warnings.filterwarnings('ignore')

#s.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [2]:
## datasets
folder = '../data/'

# Data
fn1 = 'scerevisiae_tpm_full_cut5.csv'
fn2 = 'scerevisiae_accessions.tsv'
fn3 = 'scerevisiae.rsd1.lmbda_22.npz'
fn4 = 'Saccharomyces_cerevisiae.R64-1-1.regions.csv'

fn5 = 'organisms_deep_results.csv'

fname_data1 = folder+'scerevisiae.rsd1.lmbda_22.npz'
fname_module1 = folder+'Model_C3F2all.py'
fname_p1 = folder+'scerevisiae_good_rsd1_lmbda22_Model_C3F2all_1234_c008090fd8e4b9971fe71eed502c9f7b_best.p'
fname_weights1 = folder+'scerevisiae_good_rsd1_lmbda22_Model_C3F2all_1234_c008090fd8e4b9971fe71eed502c9f7b_best'

fn6 = 'df_Keren2013_predictions.csv'
fn7 = 'df_Yamanishi2013_predictions.csv'


In [3]:
def load_data(fname):
    # X is multi-variable array
    # Y contains single variable - fix shape for Keras

    npzfile = np.load(fname,allow_pickle=True)
    Xh_train = npzfile['arr_0']
    Xh_test = npzfile['arr_1']
    Xv_train = npzfile['arr_2'][:,8:]
    Xv_test = npzfile['arr_3'][:,8:]
    Y_train = npzfile['arr_4']
    Y_test = npzfile['arr_5']
    names_train = npzfile['arr_6']
    names_test = npzfile['arr_7']

    X_train = list()
    X_train.append(Xh_train)
    #X_train.append(Xv_train)
    X_test = list()
    X_test.append(Xh_test)
    #X_test.append(Xv_test)

    Y_train = Y_train.astype(np.float32).reshape((-1,))
    Y_test = Y_test.astype(np.float32).reshape((-1,))

    return (np.concatenate((Y_train, Y_test)),
            np.concatenate((names_train,names_test)),
            np.concatenate((Xv_train,Xv_test)))

# load modules - POC model
def load_module(model_path):
    '''loads module containing models given path'''
    spec = importlib.util.spec_from_file_location('module',model_path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module


In [4]:
## load data and model

module = load_module(fname_module1)

# load p
p = pd.read_csv(fname_p1,sep=';')
p = p.iloc[0]
p.drop(['path','overfit'],inplace=True)
print(p.shape)

# load data
X_train, X_test, Y_train, Y_test = module.load_data(fname_data1)
# Y = np.concatenate((Y_train, Y_test))
# X = np.concatenate((X_train[0], X_test[0]))
print(len(X_test))
print(len(Y_test))

# model
input_shapes = [sl.shape[1:] for sl in X_train]
model = module.POC_model(input_shapes, p)
# set weights model 1
print("Loading model 1 from disk..")
model.load_weights(fname_weights1)


(42,)
2
425
Loading model 1 from disk..


In [5]:
# predict Ypred

# Y_pred = model.predict(X_test)

In [7]:
fpath_npz_directory = "/home/andy/new-results/inputs/npz_files/"
fpath_output_directory = "/home/andy/predictions/"

for filename in os.listdir(fpath_npz_directory):
    working_npz = np.load(fpath_npz_directory + filename, allow_pickle=True)
    X_hot = working_npz["x_hot"]
    X_var = working_npz["x_var"]

    current_input = list()
    current_input.append(X_hot)
    current_input.append(X_var)

    current_predictions = model.predict(current_input)

    rename = filename.replace(".npz", ".predict.npy")
    output_filename = fpath_output_directory + rename
    np.save(output_filename, current_predictions)
    print("Predictions generated and saved for: " + filename)


Predictions generated and saved for: Bimuria_novae_zelandiae_cbs_107_79_gca_010015655.Bimnz1.npz
Predictions generated and saved for: Lentithecium_fluviatile_cbs_122367_gca_010405425.Lenfl1.npz
Predictions generated and saved for: Ascobolus_immersus_rn42_gca_003788565.Ascim1.npz
Predictions generated and saved for: Sphaerosporella_brunnea_gca_008704415.Sphbr2.npz
Predictions generated and saved for: Aspergillus_avenaceus_gca_009193465.Aspave1.npz
Predictions generated and saved for: Aspergillus_coremiiformis_gca_009193565.Aspcor1.npz
Predictions generated and saved for: Westerdykella_ornata_gca_010094085.Wesor1.npz
Predictions generated and saved for: Aspergillus_niger_atcc_13496_gca_003344705.Aspni_bvT_1.npz
Predictions generated and saved for: Choiromyces_venosus_120613_1_gca_003788595.Chove1.npz
Predictions generated and saved for: Aspergillus_costaricaensis_cbs_115574_gca_003184835.Aspcos1.npz
Predictions generated and saved for: Viridothelium_virens_gca_010094025.Tryvi1.npz
Predic