In [1]:
#-------------------------------------------------------------------------------------------------------------------------------
# By Alexandra Lee (July 2018) 
#
# Apply saved model to new samples including:
#
# Encode samples from new condition using saved model
# Encode test set using saved model
# Decode estimated gene experssion after LSA
#-------------------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
from keras.models import model_from_json, load_model
from keras import metrics, optimizers

randomState = 123
from numpy.random import seed
seed(randomState)

Using TensorFlow backend.


In [2]:
# load arguments
input_file = os.path.join(os.path.dirname(os.getcwd()), "encoded","cipro_treatment", "estimated_test_control_2layer_10latent_encoded.txt")
model_file = os.path.join(os.path.dirname(os.getcwd()), "models", "cipro_treatment", "tybalt_2layer_10latent_decoder_model.h5")
weights_file = os.path.join(os.path.dirname(os.getcwd()), "models", "cipro_treatment", "tybalt_2layer_10latent_decoder_weights.h5")

# output files
out_file = os.path.join(os.path.dirname(os.getcwd()), "output", "cipro_treatment", "estimated_test_control_2layer_10latent.txt")

In [3]:
# read in data
data = pd.read_table(input_file, header = 0, sep = '\t', index_col = 0)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
GSM356955.CEL,3.040025,4.030092,1.841662,-0.43417,6.984473,-1.015616,0.379514,-0.328075,-0.909621,-0.513998
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,4.81587,2.29446,2.647791,-0.43417,5.652481,1.373186,-0.278809,6.080879,-0.713972,-1.352648
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,6.516452,0.652039,2.478118,0.415415,3.550224,3.006198,-0.278809,6.631818,-0.060933,-1.045642
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,6.020496,0.724891,2.451224,0.239392,3.699776,2.51446,-0.278809,6.403667,-0.114225,-1.210889
GSM92182.CEL,3.603594,2.209407,2.923329,3.246021,2.808909,6.011205,-0.278809,4.460039,-0.909621,-0.380609


In [4]:
# read in saved models

loaded_model = load_model(model_file)
# load weights into new model
loaded_model.load_weights(weights_file)



In [5]:
# Use trained model to encode new data into SAME latent space
reconstructed = loaded_model.predict_on_batch(data)

reconstructed_df = pd.DataFrame(reconstructed, index=data.index)
reconstructed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548
GSM356955.CEL,0.358542,0.324456,0.35083,0.481336,0.347849,0.489554,0.472218,0.396659,0.385732,0.296099,...,0.468868,0.395207,0.307133,0.615222,0.486624,0.518483,0.546446,0.50264,0.357112,0.329554
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,0.420885,0.483535,0.325879,0.493468,0.220801,0.39199,0.618103,0.348969,0.277072,0.216658,...,0.380398,0.359727,0.336657,0.492243,0.547591,0.327762,0.420156,0.272394,0.318638,0.384749
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,0.542868,0.576337,0.368145,0.507825,0.262005,0.368468,0.642378,0.367015,0.295715,0.161305,...,0.420684,0.369184,0.462039,0.478669,0.597905,0.195168,0.287577,0.223031,0.404061,0.588534
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,0.516973,0.55963,0.35606,0.500068,0.253469,0.370276,0.630808,0.365668,0.288986,0.168772,...,0.403792,0.370252,0.440641,0.475569,0.586929,0.219099,0.310545,0.230055,0.390818,0.550326
GSM92182.CEL,0.624377,0.696055,0.443347,0.662598,0.38304,0.40829,0.501135,0.474856,0.489191,0.185874,...,0.407585,0.445548,0.544919,0.52449,0.56807,0.189258,0.340374,0.383591,0.604146,0.656161


In [6]:
# Save latent space representation
reconstructed_df.to_csv(out_file, sep='\t')