In [1]:
#-------------------------------------------------------------------------------------------------------------------------------
# By Alexandra Lee (July 2018) 
#
# Apply saved model to new samples including:
#
# Encode samples from new condition using saved model
# Encode test set using saved model
# Decode estimated gene experssion after LSA
#-------------------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
from keras.models import model_from_json
from keras import metrics, optimizers
np.random.seed(123)

Using TensorFlow backend.


In [2]:
# load arguments
input_file = os.path.join(os.path.dirname(os.getcwd()), "encoded", "estimated_test_control_encoded.txt")
model_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_1layer_10_train_decoder_model.json")
weights_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_1layer_10_train_decoder_weights.h5")

# If encoding
encoding = True

# output files
out_file = os.path.join(os.path.dirname(os.getcwd()), "output", "estimated_test_control_latent.txt")


In [3]:
# read in data
data = pd.read_table(input_file, header = 0, sep = '\t', index_col = 0)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
GSM356955.CEL,4.417708,1.209315,-0.244977,-1.464938,2.338107,-1.569835,-0.695702,0.642414,-1.293433,6.04132
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,5.612884,-1.435007,-0.244977,-1.464938,3.188005,0.846147,3.049279,1.670792,-1.293433,2.547656
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,4.751827,-1.435007,-0.244977,-1.464938,2.295868,1.427788,5.169485,2.719749,-1.293433,0.980941
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,4.661746,-1.435007,-0.244977,-1.464938,2.278832,1.293261,4.825707,2.576568,-1.293433,1.083358
GSM92182.CEL,3.958025,-1.286253,-0.244977,2.557713,3.199293,2.583455,0.728022,4.429398,-0.750169,0.980941


In [4]:
# read in saved models

# load json and create model
json_file = open(model_file, 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
                 
# load weights into new model
loaded_model.load_weights(weights_file)

<keras.engine.training.Model at 0x7f3b47eadeb8>

In [5]:
# Use trained model to encode new data into SAME latent space
reconstructed = loaded_model.predict_on_batch(data)

if encoding:
    reconstructed_df = pd.DataFrame(reconstructed, index=data.index)
else:
    reconstructed_df = pd.DataFrame(reconstructed) # Can we assume the index is preserved after decoding?

reconstructed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548
GSM356955.CEL,0.323517,0.309373,0.309864,0.405708,0.271572,0.417736,0.467231,0.370485,0.291928,0.284242,...,0.416546,0.35464,0.33839,0.499328,0.412813,0.615027,0.515981,0.409719,0.333788,0.312862
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,0.384142,0.416988,0.282158,0.448668,0.254912,0.398521,0.570311,0.273905,0.236283,0.217827,...,0.384127,0.319376,0.365773,0.400491,0.382304,0.368386,0.307662,0.261711,0.333547,0.436505
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,0.433372,0.471056,0.281765,0.441297,0.27671,0.391403,0.618149,0.262287,0.22516,0.200228,...,0.382639,0.350359,0.414574,0.405386,0.446774,0.267932,0.281288,0.239871,0.372709,0.519478
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,0.430081,0.466469,0.283866,0.441816,0.276944,0.391873,0.610368,0.266893,0.228774,0.206882,...,0.38269,0.350382,0.412809,0.407545,0.442787,0.281344,0.289223,0.245969,0.371729,0.509833
GSM92182.CEL,0.520115,0.608316,0.364308,0.598826,0.308658,0.390341,0.507164,0.356859,0.350573,0.171219,...,0.39098,0.37274,0.524765,0.472539,0.466123,0.237226,0.298347,0.314493,0.558329,0.60367


In [6]:
# Save latent space representation
reconstructed_df.to_csv(out_file, sep='\t')