In [1]:
#-------------------------------------------------------------------------------------------------------------------------------
# By Alexandra Lee (July 2018) 
#
# Apply saved model to new samples including:
#
# Encode samples from new condition using saved model
# Encode test set using saved model
# Decode estimated gene experssion after LSA
#-------------------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
from keras.models import model_from_json, load_model
from keras import metrics, optimizers

randomState = 123
from numpy.random import seed
seed(randomState)

Using TensorFlow backend.


In [2]:
# load arguments
input_file = os.path.join(os.path.dirname(os.getcwd()), "data","test_control.txt")
model_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_1layer_10latent_encoder_model5.h5")
weights_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_1layer_10latent_encoder_weights5.h5")

# output files
out_file = os.path.join(os.path.dirname(os.getcwd()), "encoded", "test_control_1layer_10latent_encoded5.txt")

In [3]:
# read in data
data = pd.read_table(input_file, header = 0, sep = '\t', index_col = 0)
data

Unnamed: 0,PA0001,PA0002,PA0003,PA0004,PA0005,PA0006,PA0007,PA0008,PA0009,PA0010,...,PA5561,PA5562,PA5563,PA5564,PA5565,PA5566,PA5567,PA5568,PA5569,PA5570
GSM356955.CEL,0.523823,0.594183,0.376242,0.74709,0.439185,0.466252,0.493666,0.543735,0.622464,0.244651,...,0.313673,0.56457,0.48715,0.701037,0.635983,0.599291,0.714874,0.796487,0.674756,0.638105
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,0.449574,0.569119,0.356904,0.606993,0.313796,0.452389,0.540796,0.346749,0.372734,0.185079,...,0.391077,0.546586,0.495389,0.531542,0.378673,0.222432,0.432766,0.287531,0.343099,0.194245
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,0.650631,0.594249,0.396833,0.511396,0.341081,0.380707,0.574916,0.296616,0.348616,0.165047,...,0.501884,0.577548,0.634781,0.524943,0.425947,0.279438,0.405522,0.417845,0.365188,0.388724
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,0.607604,0.613629,0.362308,0.528051,0.363008,0.380931,0.53551,0.333461,0.320784,0.227256,...,0.475606,0.564781,0.63032,0.47337,0.36633,0.268081,0.353713,0.350998,0.344761,0.326619
GSM92182.CEL,0.687206,0.709832,0.432323,0.731554,0.400733,0.306312,0.419512,0.48463,0.62361,0.134044,...,0.427516,0.50747,0.581297,0.614464,0.661921,0.118582,0.400441,0.492726,0.738369,0.74831


In [4]:
# read in saved models

loaded_model = load_model(model_file)
# load weights into new model
loaded_model.load_weights(weights_file)



In [5]:
# Use trained model to encode new data into SAME latent space
reconstructed = loaded_model.predict_on_batch(data)

reconstructed_df = pd.DataFrame(reconstructed, index=data.index)
reconstructed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
GSM356955.CEL,0.0,0.0,0.349871,0.214144,0.556372,1.526051,0.0,5.393809,2.112408,7.802351
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,2.631948,0.0,0.295328,2.144236,6.446996,0.0,7.486694,1.506475,0.0,7.361189
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,3.828303,0.263629,0.87581,1.253171,6.346218,0.0,11.103443,0.0,1.339814,3.746747
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,3.744793,0.090259,0.630118,1.064464,6.256991,0.0,10.447172,0.121291,1.057709,3.952817
GSM92182.CEL,4.176693,4.694715,0.829852,1.869054,9.500675,0.0,6.02193,0.031502,6.165462,0.0


In [6]:
# Save latent space representation
reconstructed_df.to_csv(out_file, sep='\t')