In [1]:
#-------------------------------------------------------------------------------------------------------------------------------
# By Alexandra Lee (July 2018) 
#
# Apply saved model to new samples including:
#
# Encode samples from new condition using saved model
# Encode test set using saved model
# Decode estimated gene experssion after LSA
#-------------------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
from keras.models import model_from_json, load_model
from keras import metrics, optimizers
np.random.seed(123)

Using TensorFlow backend.


In [2]:
# load arguments
input_file = os.path.join(os.path.dirname(os.getcwd()), "encoded", "tybalt_2layer_10_test_control_encoded.txt")
model_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_2layer_10_train_decoder_model.h5")
weights_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_2layer_10_train_decoder_weights.h5")

# If encoding
encoding = True

# output files
out_file = os.path.join(os.path.dirname(os.getcwd()), "output", "estimated_test_control_latent_2layer.txt")


In [3]:
# read in data
data = pd.read_table(input_file, header = 0, sep = '\t', index_col = 0)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
GSM356955.CEL,1.557081,0.0,0.0,0.0,4.812786,0.0,0.296684,0.318213,4.430712,0.455239
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,0.405962,0.0,0.749353,5.579066,3.664676,4.965723,0.0,1.020203,3.017876,0.0
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,0.245053,0.0,0.261679,7.001842,1.163499,5.724838,0.0,2.427797,2.109365,0.0
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,0.361329,0.080056,0.176546,6.583868,1.589735,5.530672,0.0,2.284974,2.120986,0.0
GSM92182.CEL,4.61073,0.391221,0.050018,6.894863,0.194969,2.03205,2.15295,2.695515,0.0,3.091622


In [4]:
# read in saved models

# load json and create model
#json_file = open(model_file, 'r')
#loaded_model_json = json_file.read()
#json_file.close()
#loaded_model = model_from_json(loaded_model_json)
   
loaded_model = load_model(model_file)
# load weights into new model
loaded_model.load_weights(weights_file)



In [5]:
# Use trained model to encode new data into SAME latent space
reconstructed = loaded_model.predict_on_batch(data)

if encoding:
    reconstructed_df = pd.DataFrame(reconstructed, index=data.index)
else:
    reconstructed_df = pd.DataFrame(reconstructed) # Can we assume the index is preserved after decoding?

reconstructed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548
GSM356955.CEL,0.477484,0.515526,0.363541,0.6188,0.365553,0.411387,0.427771,0.563649,0.520676,0.244805,...,0.438112,0.579595,0.480473,0.689857,0.623972,0.579436,0.688972,0.750287,0.56124,0.449993
GSM954576_Nomura_PA01-1_Pae_G1a_.CEL,0.521049,0.61942,0.347263,0.559277,0.31504,0.378935,0.449362,0.484582,0.405961,0.190307,...,0.385676,0.540066,0.483354,0.54259,0.571044,0.27521,0.436921,0.416739,0.491154,0.510596
GSM954578_Nomura_PA01-3_Pae_G1a_.CEL,0.605867,0.67294,0.356914,0.566674,0.362107,0.349446,0.496895,0.46021,0.405965,0.153048,...,0.420379,0.550845,0.582028,0.533298,0.617527,0.161721,0.34883,0.395203,0.568011,0.645493
GSM954579_Nomura_PA01-4_Pae_G1a_.CEL,0.60372,0.673259,0.360111,0.567868,0.360127,0.35457,0.491138,0.464283,0.410305,0.15708,...,0.417571,0.554011,0.577131,0.54328,0.622229,0.174536,0.361174,0.404193,0.572957,0.640457
GSM92182.CEL,0.679434,0.731911,0.48828,0.713364,0.461989,0.379543,0.42346,0.538023,0.577771,0.165634,...,0.3164,0.544758,0.587342,0.61376,0.642981,0.124814,0.39841,0.525236,0.696359,0.657144


In [6]:
# Save latent space representation
reconstructed_df.to_csv(out_file, sep='\t')