Skip to content

Commit

Permalink
Merge 0e6651c into d44a4b6
Browse files Browse the repository at this point in the history
  • Loading branch information
ajlee21 committed Apr 30, 2021
2 parents d44a4b6 + 0e6651c commit c6c96c6
Show file tree
Hide file tree
Showing 5 changed files with 320 additions and 332 deletions.
92 changes: 39 additions & 53 deletions human_tests/Human_latent_transform_simulation.ipynb

Large diffs are not rendered by default.

328 changes: 157 additions & 171 deletions human_tests/Human_random_sampling_simulation.ipynb

Large diffs are not rendered by default.

125 changes: 53 additions & 72 deletions human_tests/Human_template_simulation.ipynb

Large diffs are not rendered by default.

77 changes: 41 additions & 36 deletions ponyo/simulate_expression_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def run_latent_transformation_simulation(


def shift_template_experiment(
normalized_data,
normalized_data_filename,
NN_architecture,
latent_dim,
dataset_name,
Expand All @@ -580,7 +580,7 @@ def shift_template_experiment(
selected_experiment_id,
local_dir,
base_dir,
run,
num_runs,
):
"""
Generate new simulated experiment using the selected_experiment_id as a template
Expand All @@ -592,8 +592,8 @@ def shift_template_experiment(
Arguments
----------
normalized_data: df
Normalized gene expression data
normalized_data_filename: str
File containing normalized gene expression data
------------------------------| PA0001 | PA0002 |...
05_PA14000-4-2_5-10-07_S2.CEL | 0.8533 | 0.7252 |...
Expand Down Expand Up @@ -636,8 +636,8 @@ def shift_template_experiment(
base_dir: str
Root directory containing analysis subdirectories
run: int
Simulation run
num_runs: int
Number of experiments to simulate
Returns
--------
Expand Down Expand Up @@ -668,6 +668,11 @@ def shift_template_experiment(
loaded_model.load_weights(weights_encoder_filename)
loaded_decode_model.load_weights(weights_decoder_filename)

# Read data
normalized_data = normalized_data = pd.read_csv(
normalized_data_filename, header=0, sep="\t", index_col=0
)

# Get corresponding sample ids
sample_ids = get_sample_ids(
metadata_filename,
Expand All @@ -680,46 +685,46 @@ def shift_template_experiment(
# Gene expression data for selected samples
selected_data_df = normalized_data.loc[sample_ids]

simulated_data_decoded_df, simulated_data_encoded_df = run_shift_template(
loaded_model, loaded_decode_model, normalized_data, selected_data_df, latent_dim
)
for run in range(num_runs):
simulated_data_decoded_df, simulated_data_encoded_df = run_shift_template(
loaded_model, loaded_decode_model, normalized_data, selected_data_df, latent_dim
)

# Un-normalize the data in order to run DE analysis downstream
simulated_data_scaled = scaler.inverse_transform(simulated_data_decoded_df)
# Un-normalize the data in order to run DE analysis downstream
simulated_data_scaled = scaler.inverse_transform(simulated_data_decoded_df)

simulated_data_scaled_df = pd.DataFrame(
simulated_data_scaled,
columns=simulated_data_decoded_df.columns,
index=simulated_data_decoded_df.index,
)
simulated_data_scaled_df = pd.DataFrame(
simulated_data_scaled,
columns=simulated_data_decoded_df.columns,
index=simulated_data_decoded_df.index,
)

# Save template data for visualization validation
test_filename = os.path.join(
local_dir,
"pseudo_experiment",
"template_normalized_data_" + selected_experiment_id + "_test.txt",
)
# Save
out_filename = os.path.join(
local_dir,
"pseudo_experiment",
"selected_simulated_data_" + selected_experiment_id + "_" + str(run) + ".txt",
)

selected_data_df.to_csv(test_filename, float_format="%.3f", sep="\t")
simulated_data_scaled_df.to_csv(out_filename, float_format="%.3f", sep="\t")

# Save
out_filename = os.path.join(
local_dir,
"pseudo_experiment",
"selected_simulated_data_" + selected_experiment_id + "_" + str(run) + ".txt",
)
out_encoded_filename = os.path.join(
local_dir,
"pseudo_experiment",
f"selected_simulated_encoded_data_{selected_experiment_id}_{run}.txt",
)

simulated_data_scaled_df.to_csv(out_filename, float_format="%.3f", sep="\t")
simulated_data_encoded_df.to_csv(
out_encoded_filename, float_format="%.3f", sep="\t"
)

out_encoded_filename = os.path.join(
# Save template data for visualization validation
test_filename = os.path.join(
local_dir,
"pseudo_experiment",
f"selected_simulated_encoded_data_{selected_experiment_id}_{run}.txt",
)

simulated_data_encoded_df.to_csv(
out_encoded_filename, float_format="%.3f", sep="\t"
"template_normalized_data_" + selected_experiment_id + "_test.txt",
)
selected_data_df.to_csv(test_filename, float_format="%.3f", sep="\t")


def run_shift_template(encoder, decoder, normalized_data, selected_data_df, latent_dim):
Expand Down
30 changes: 30 additions & 0 deletions ponyo/train_vae_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
import pickle
import pandas as pd
from sklearn import preprocessing
import tensorflow as tf
import numpy as np
import random

import warnings

Expand All @@ -26,6 +29,33 @@ def fxn():
fxn()


def set_all_seeds(seed_val=42):
"""
This function sets all seeds to get reproducible VAE trained
models.
"""

# The below is necessary in Python 3.2.3 onwards to
# have reproducible behavior for certain hash-based operations.
# See these references for further details:
# https://keras.io/getting-started/faq/#how-can-i-obtain-reproducible-results-using-keras-during-development
# https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED
# https://github.com/keras-team/keras/issues/2280#issuecomment-306959926

os.environ["PYTHONHASHSEED"] = "0"

# The below is necessary for starting Numpy generated random numbers
# in a well-defined initial state.
np.random.seed(seed_val)

# The below is necessary for starting core Python generated random numbers
# in a well-defined state.
random.seed(seed_val)
# The below tf.set_random_seed() will make random number generation
# in the TensorFlow backend have a well-defined initial state.
tf.set_random_seed(seed_val)


def normalize_expression_data(
base_dir, config_filename, raw_input_data_filename, normalized_data_filename
):
Expand Down

0 comments on commit c6c96c6

Please sign in to comment.