In [1]:
#-------------------------------------------------------------------------------------------------------------------------------
# By Alexandra Lee (July 2018) 
#
# Apply saved model to new samples including:
#
# Encode samples from new condition using saved model
# Encode test set using saved model
# Decode estimated gene experssion after LSA
#-------------------------------------------------------------------------------------------------------------------------------
import os
import pandas as pd
import numpy as np
from keras.models import model_from_json
from keras import metrics, optimizers
np.random.seed(123)

Using TensorFlow backend.


In [2]:
# load arguments
input_file = os.path.join(os.path.dirname(os.getcwd()), "data", "train_control.txt")
model_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_1layer_10_train_encoder_model.json")
weights_file = os.path.join(os.path.dirname(os.getcwd()), "models", "tybalt_1layer_10_train_encoder_weights.h5")

# output files
out_file = os.path.join(os.path.dirname(os.getcwd()), "encoded", "tybalt_1layer_10_train_control_encoded.txt")


In [3]:
# read in data
data = pd.read_table(input_file, header = 0, sep = '\t', index_col = 0)
data

Unnamed: 0,PA0001,PA0002,PA0003,PA0004,PA0005,PA0006,PA0007,PA0008,PA0009,PA0010,...,PA5561,PA5562,PA5563,PA5564,PA5565,PA5566,PA5567,PA5568,PA5569,PA5570
GSM1146022_pJN105_1.CEL,0.806714,0.683389,0.453634,0.778612,0.663953,0.670457,0.248299,0.824756,0.54926,0.169628,...,0.669536,0.728465,0.672026,0.775149,0.860613,0.27397,0.728074,0.836035,0.89351,0.924049
GSM1146023_pJN105_2.CEL,0.842696,0.578656,0.761799,0.678369,0.598433,0.915911,0.389249,0.72996,0.503101,0.339278,...,0.504338,0.829166,0.642739,0.850083,0.794474,0.385694,0.440355,0.825117,0.701173,0.744124
GSM838212_PAO1-LB-1_Pae_G1a.CEL,0.623817,0.643189,0.40031,0.553798,0.333717,0.458539,0.341272,0.470607,0.419711,0.208776,...,0.495065,0.538893,0.58598,0.594138,0.562862,0.166516,0.379741,0.3905,0.661516,0.77409
GSM838213_PAO1-LB-2_Pae_G1a.CEL,0.596453,0.655571,0.346735,0.534616,0.408947,0.469965,0.330233,0.528747,0.430203,0.146707,...,0.413421,0.534103,0.561084,0.6015,0.589232,0.169157,0.304431,0.329272,0.64915,0.730737
GSM838214_PAO1-LB-3_Pae_G1a.CEL,0.594004,0.633081,0.444955,0.573017,0.379068,0.393022,0.369182,0.509752,0.434623,0.170151,...,0.440153,0.528376,0.548728,0.592335,0.594754,0.164498,0.372694,0.392381,0.704068,0.772427
GSM954577_Nomura_PA01-2_Pae_G1a_.CEL,0.59459,0.622302,0.368323,0.531359,0.395954,0.354838,0.459528,0.344429,0.291227,0.168556,...,0.42637,0.544641,0.603508,0.481153,0.378467,0.270442,0.378208,0.347225,0.381133,0.296923
GSM822708_wtLB_A.CEL,0.645315,0.807116,0.508371,0.80321,0.430919,0.439845,0.309297,0.597883,0.595614,0.19434,...,0.326413,0.517948,0.545747,0.592992,0.531535,0.465619,0.529491,0.550239,0.746168,0.622188
GSM822709_wtLB_B.CEL,0.641751,0.855192,0.522287,0.81492,0.386412,0.491765,0.196192,0.570357,0.655637,0.204225,...,0.323012,0.541272,0.568973,0.592775,0.523286,0.249053,0.438205,0.600394,0.701803,0.631495
GSM821495.CEL,0.597207,0.708296,0.342284,0.740859,0.364999,0.046689,0.600343,0.61458,0.58968,0.182998,...,0.0,0.539431,0.5072,0.498583,0.513659,0.124029,0.333152,0.42469,0.546653,0.549992
GSM821496.CEL,0.51851,0.663846,0.311811,0.687727,0.3307,0.048458,0.667743,0.50904,0.476136,0.185333,...,0.06359,0.491478,0.459339,0.441919,0.465993,0.195068,0.35535,0.33637,0.434249,0.432357


In [4]:
# read in saved models

# load json and create model
json_file = open(model_file, 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
                 
# load weights into new model
loaded_model.load_weights(weights_file)

In [5]:
# Use trained model to encode new data into SAME latent space
reconstructed = loaded_model.predict_on_batch(data)
reconstructed_df = pd.DataFrame(reconstructed, index=data.index)

In [6]:
# Save latent space representation
reconstructed_df.to_csv(out_file, sep='\t')