In [1]:
#-------------------------------------------------------------------------------------------------------------------------------
# By Alexandra Lee (July 2018) 
#
# Apply saved model to new samples including:
#
# Encode samples from new condition using saved model
# Encode test set using saved model
# Decode estimated gene experssion after LSA
#-------------------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
from keras.models import model_from_json, load_model
from keras import metrics, optimizers

randomState = 123
from numpy.random import seed
seed(randomState)

Using TensorFlow backend.


In [2]:
# load arguments
input_file = os.path.join(os.path.dirname(os.getcwd()), "encoded","PA1673_full_old", "estimated_test_mid2_2layer_10latent_encoded.txt")
model_file = os.path.join(os.path.dirname(os.getcwd()), "models", "PA1673_full_old", "tybalt_2layer_10latent_decoder_model.h5")
weights_file = os.path.join(os.path.dirname(os.getcwd()), "models", "PA1673_full_old", "tybalt_2layer_10latent_decoder_weights.h5")

# output files
out_file = os.path.join(os.path.dirname(os.getcwd()), "output", "PA1673_full_old", "estimated_test_mid2_2layer_10latent.txt")

In [3]:
# read in data
data = pd.read_table(input_file, header=0, sep='\t', index_col=0)
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
120330-10978D_24_(Pae_G1a).CEL,1.502113,-0.336158,4.535495,0.617225,1.128773,-0.869897,1.179078,3.965321,5.820569,-0.662211
120330-10978D_M1_(Pae_G1a).CEL,1.409328,-0.082228,5.586763,0.617225,0.793102,-1.171443,1.179078,4.547769,5.009968,-0.54865
120330-10978D_M3_(Pae_G1a).CEL,1.018984,0.428751,6.051841,0.617225,0.726908,-1.277186,1.179078,3.843061,4.935409,-0.535028
GSM1027586_062508plcHR.CEL,0.009481,1.924743,6.494624,0.864837,0.708646,0.011474,1.179078,3.524818,5.790625,-1.4236
GSM1027589_071008gbdR.CEL,0.308882,2.048166,7.457642,1.006605,0.708646,-1.148097,1.179078,3.649598,4.8723,-1.306051
GSM1191071_WT-pyr-2.CEL,0.009481,-0.845507,7.822137,0.617225,1.054051,-1.445886,2.425322,0.511704,1.014367,-1.4236
GSM1267105_HZI1971_Pae_G1a.CEL,0.009481,-0.299124,6.052669,5.317363,0.708646,-1.344366,1.179078,2.890354,1.014367,0.262917
GSM1267106_HZI1972_Pae_G1a.CEL,0.009481,-0.393418,6.233583,4.959278,0.708646,-1.307659,1.179078,3.140943,1.014367,0.300921
GSM1267107_HZI1973_Pae_G1a.CEL,0.009481,-0.562296,6.086774,5.027606,0.708646,-1.302077,1.179078,3.233937,1.014367,0.302872
GSM1421002_EXdnr_control1.CEL,0.009481,3.644576,-0.065043,0.677548,0.708646,2.266686,2.297861,0.593641,1.014367,3.090185


In [4]:
# read in saved models

loaded_model = load_model(model_file)
# load weights into new model
loaded_model.load_weights(weights_file)



In [5]:
# Use trained model to encode new data into SAME latent space
reconstructed = loaded_model.predict_on_batch(data)

reconstructed_df = pd.DataFrame(reconstructed, index=data.index)
reconstructed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548
120330-10978D_24_(Pae_G1a).CEL,0.582582,0.613748,0.336716,0.504033,0.392826,0.38078,0.528855,0.404997,0.340071,0.169191,...,0.305447,0.537699,0.533951,0.520409,0.636199,0.183437,0.479989,0.385545,0.489014,0.529879
120330-10978D_M1_(Pae_G1a).CEL,0.638687,0.672752,0.364511,0.545104,0.427337,0.400776,0.533558,0.415141,0.376059,0.16068,...,0.314609,0.546805,0.590113,0.541077,0.637388,0.155501,0.463584,0.392802,0.540282,0.567132
120330-10978D_M3_(Pae_G1a).CEL,0.632481,0.673937,0.363544,0.535927,0.427455,0.39912,0.541185,0.40773,0.389324,0.163384,...,0.340965,0.538638,0.610636,0.552072,0.647059,0.162893,0.463058,0.394577,0.554323,0.578269
GSM1027586_062508plcHR.CEL,0.626867,0.675495,0.353311,0.556112,0.420349,0.384065,0.53191,0.401,0.419514,0.158736,...,0.361968,0.513897,0.623781,0.556063,0.654169,0.167054,0.470679,0.397062,0.551804,0.576062
GSM1027589_071008gbdR.CEL,0.671664,0.725778,0.371934,0.580732,0.429468,0.400511,0.558787,0.415007,0.438434,0.149811,...,0.362851,0.533097,0.673632,0.589,0.670948,0.145197,0.46543,0.392497,0.594758,0.610278
GSM1191071_WT-pyr-2.CEL,0.656134,0.816971,0.425279,0.684872,0.436234,0.641576,0.50828,0.324517,0.419527,0.140594,...,0.515671,0.493664,0.784092,0.603036,0.583894,0.293613,0.410436,0.393917,0.757493,0.546296
GSM1267105_HZI1971_Pae_G1a.CEL,0.735756,0.767674,0.444331,0.674934,0.439797,0.400743,0.631648,0.552356,0.613463,0.19013,...,0.449213,0.559447,0.684511,0.682657,0.716144,0.129105,0.426886,0.453982,0.733842,0.716695
GSM1267106_HZI1972_Pae_G1a.CEL,0.738464,0.771205,0.446443,0.67529,0.442267,0.405545,0.630021,0.548688,0.609488,0.190823,...,0.450796,0.561274,0.689178,0.683777,0.71348,0.130877,0.433377,0.454594,0.730339,0.712521
GSM1267107_HZI1973_Pae_G1a.CEL,0.739564,0.772754,0.447645,0.680002,0.44347,0.404278,0.628578,0.550805,0.61044,0.191354,...,0.445923,0.560884,0.686454,0.681674,0.710356,0.129487,0.431044,0.45373,0.730177,0.711255
GSM1421002_EXdnr_control1.CEL,0.741906,0.68185,0.596962,0.731736,0.550071,0.477181,0.231966,0.726736,0.751161,0.24388,...,0.509386,0.654298,0.694561,0.776431,0.766671,0.257217,0.637227,0.737682,0.769244,0.729849


In [6]:
# Save latent space representation
reconstructed_df.to_csv(out_file, sep='\t')