In [322]:
# load packages
import pandas as pd
import statsmodels.tsa.stattools as stats
import statsmodels.graphics.tsaplots as sg
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import sys
from datetime import datetime
import numpy as np

import networkx as nx
from nxpd import draw
from nxpd import nxpdParams
nxpdParams['show'] = 'ipynb'

sys.path.append("../pipelines")
import Pipelines as tdw
data_folder = "/projects/p20519/roller_output/optimizing_window_size/RandomForest/insilico_size10_1/"

output_path = "/home/jjw036/Roller/insilico_size10_1"

current_time = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')

data_folder = "../output/insilico_size10_1"
file_path = "../data/dream8/insilico/insilico_trueGraph.csv"
data_file_path = "../data/dream8/insilico/insilico_timeseries.tsv"

# Parsing In Silico Data

In [326]:
adj_mat = pd.read_csv(file_path, header=None)
data_file = pd.read_csv(data_file_path, sep="\t")
node_names = ["AB"+str(n) for n in range(1,21)]
edge_list = []
for i,row in adj_mat.iterrows():
    targets=row[row>0].index.tolist()
    if targets:
        pairs = [("AB"+str(i+1),"AB"+str(target+1)) for target in targets]
        edge_list.extend(pairs)

#removing self-edges...
no_self_edge_list = [edge for edge in edge_list if edge[0] != edge[1]]
print(len(edge_list)-len(no_self_edge_list))

#saving gold standard file
save_file_name = "../data/dream8/insilico/insilico_goldstandard.tsv"
with open(save_file_name, 'w') as fp:
    fp.write('\n'.join('%s\t%s\t1' % x for x in no_self_edge_list))

linked_list = pd.read_csv(save_file_name, header=None,sep="\t")
linked_list

16


Unnamed: 0,0,1,2
0,AB1,AB6,1
1,AB1,AB11,1
2,AB2,AB1,1
3,AB2,AB9,1
4,AB2,AB11,1
5,AB4,AB19,1
6,AB5,AB2,1
7,AB5,AB7,1
8,AB5,AB9,1
9,AB5,AB10,1


#Parsing In Vitro Time Series

In [331]:
# there are essentially 32 networks. 4 cell types and 8 contexts. 
# first extract the files
import os
data_folder = "../data/dream8/invitro/"

csv_files=[f for f in os.listdir(data_folder) if os.path.isfile(os.path.join(data_folder,f)) if "Test" in f]


cell_type = [x.split('_')[0] for x in csv_files]
node_names = []
cell_stimuli =[]
for csv_file in csv_files:
    print(csv_file)
    raw_data = pd.read_csv(os.path.join(data_folder,csv_file))

    #open csv file
    #grab the names of the antibodies
    hugo_ids = raw_data.iloc[1,4:].tolist()
    if "UACC812_main_Test" in csv_file:
        raw_data = raw_data.iloc[3:]
    else:
        raw_data = raw_data.iloc[2:]
    raw_data.iloc[0,4:]=hugo_ids
    raw_data.columns = raw_data.iloc[0]
    raw_data = raw_data.iloc[1:]
    raw_data['Timepoint']= raw_data['Timepoint'].str.replace("2hr","120min")
    raw_data['Timepoint']= raw_data['Timepoint'].str.replace("4hr","240min")
    raw_data['Timepoint']= raw_data['Timepoint'].str.replace("min","")
    raw_data['Stimulus'] = raw_data['Stimulus'].fillna("None")
    stimuli = raw_data['Stimulus'].unique().tolist()

    #except for the None stimulus, place each stimulus into own dataframe
    df_list = []
    df_list_t0 = []

    for stimulus in stimuli:
        stim_df = pd.DataFrame()
        if stimulus != "None":
            stim_df = stim_df.append(raw_data[raw_data['Stimulus'].str.contains(stimulus)])
            df_list.append(stim_df)
    for df in df_list:
        stimulus = df['Stimulus'].iloc[0]
        cell_line = df['Cell Line'].iloc[0]
        #add t0
        df = df.append(raw_data[raw_data['Stimulus'].str.contains("None")])
        #convert to int
        df['Timepoint'] = pd.to_numeric(df['Timepoint'])
        df=df.sort('Timepoint')
        #remove header information
        df=df.iloc[:,3:]
        df.columns = df.columns.str.replace('Timepoint', 'Time')
        df.to_csv("../data/dream8/invitro/" + cell_line + "_"+ stimulus + ".csv", sep="\t", index = False)
        df_list_t0.append(df)
        node_names.append(df.columns.values.tolist())
        cell_stimuli.append((cell_line,stimulus))

    df_list_t0[0]

BT20_main_Test.csv
BT549_main_Test.csv
MCF7_main_Test.csv
UACC812_main_Test.csv




#Parsing In Vitro Gold Standard

In [335]:
parent_node = "MTOR_pS2448"
gold_standard_folder = "../data/dream8/invitro/raw_gold_standards/"
csv_files=[f for f in os.listdir(gold_standard_folder) if os.path.isfile(os.path.join(gold_standard_folder,f)) if "TrueVec" in f]
cell_stimuli_names=list(zip(cell_stimuli, node_names))

print(csv_file)
for csv_file in csv_files:
    
    raw_data = pd.read_csv(os.path.join(gold_standard_folder,csv_file), header=None)
    cell_type = csv_file.split('_')[1]
    stimulus = csv_file.split('_')[2].rstrip('.csv')
    labels = [x[1] for x in cell_stimuli_names if x[0][0] == cell_type if x[0][1] == stimulus][0][1:]
    raw_data.columns=labels
    edge_list = []
    for i,row in raw_data.iterrows():
        targets=row[row>0].index.tolist()

        if targets:
            pairs = [(parent_node,target) for target in targets]
            edge_list.extend(pairs)
    invitro_save_file_name = "../data/dream8/invitro/raw_gold_standards/"+cell_type+'_'+stimulus+'_goldstandard.tsv'
    with open(invitro_save_file_name, 'w') as fp:
        fp.write('\n'.join('%s\t%s\t1' % x for x in edge_list))
    


UACC812_main_Test.csv
../data/dream8/invitro/raw_gold_standards/BT20_EGF_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT20_FGF1_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT20_HGF_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT20_IGF1_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT20_Insulin_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT20_NRG1_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT20_PBS_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT20_Serum_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT549_EGF_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT549_FGF1_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT549_HGF_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT549_IGF1_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT549_Insulin_goldstandard.tsv
../data/dream8/invitro/raw_gold_standards/BT549_NRG1_goldstandard.tsv
../d