In [None]:
"""
This script is intended to generate 10(0) replicates for each of the DREAM3/4 networks to check inference algo performence
The pipeline works as follows:
1) For each network kinetic model file (.xml)
Repeat 10(0) times:
2) GNW: generate expression data (it generate it in the app folder due to a bug)
3) make a new subdirectory for replicate_X (including all parent directories)
4) move expression data files to the network subdirectory
5) Combine Steady-State expression data files to one file

Written by: Lior Shachaf
2020-11-09

2021-07-20: added dream4 option, more comments and variables for path instead of hard-coding
2021-07-30: Replaced the last bash block responding to appending all steady-state data file into one, with a python equivalent
"""

### Instructions:
<p>make sure settings.txt file in GNW is unchanged or set to default </p>
<p>change dream3 or dream4 according to usage in 3 places in the notebook </p>
<p>change "targetpath" to where you want to store the data </p>

In [8]:
import os

In [None]:
# Change directory to where GNW is installed:
#path_to_gnw = '/home/local/WIN/lshacha1/GNW/gnw-3.1.2b' # long path
path_to_gnw = os.path.expanduser('~/GNW/gnw-3.1.2b')
os.chdir(path_to_gnw)

In [None]:
%%bash
path_to_dreamX_networks="~/GNW/gnw-3.1.2b/src/ch/epfl/lis/networks/dream4"
eval path_to_dreamX_networks=$path_to_dreamX_networks # to expand Tilde
# iterating over all network XML files
for file in ${path_to_dreamX_networks}/*;
do
echo $file;
filename=$(basename $file .xml);
targetpath="~/DATA/Networks/Replicates_for_network_inference/dream4";
eval targetpath=$targetpath # to expand Tilde
# generating 10 replicates. If this is changed please update cell below as well
for replicate in {1..10};
do
java -jar gnw-3.1.2b.jar --simulate -c settings.txt --input-net ${path_to_dreamX_networks}/${filename}.xml
mkdir -p ${targetpath}/${filename}/rep_${replicate}/;
mv ${filename}* ${targetpath}/${filename}/rep_${replicate}/;
done;
done;

echo "done"

### Combine Steady-State expression data (wildtype, multifactorial, knockdowns, knockouts, dualknockouts) to one file

In [13]:
""" iterate over all networks 
and for each network iterate over all replicates 
and for each replicate append the 5 steady-state 
data files into one file {Network name}_SS_all.tsv 
"""
# Change to specific DREAM data folder containing the different network folders
path_to_data = os.path.expanduser('../DATA/dream3/')
# path_to_data = os.path.expanduser('../DATA/dream4/')
os.chdir(path_to_data)

data_type_list = ["wildtype", "multifactorial", "knockdowns", "knockouts", "dualknockouts"]

for network_name in os.listdir():
    # if "100_" in network_name:  # Debug
    #     continue
    
    if os.path.isdir(network_name) == True:
        os.chdir(f"./{network_name}")
        
        for replicate in os.listdir():

            if os.path.isdir(replicate) == True:
                os.chdir(f"./{replicate}")
                
                output_file_name = f"{network_name}_SS_all.tsv"
                output_file = open(output_file_name, "w")
                
                for data_type in data_type_list:
                    input_file = f"{network_name}_{data_type}.tsv"
                    in1 = open(input_file, "r")
                    data1 = in1.readlines()
                    in1.close()
                    
                    for line in data1:
                        if "G1" not in line:
                            output_file.write(line)
                        
                output_file.close()
                
            os.chdir('../')
        os.chdir('../')

print("All done")

All done
