In [1]:
import sys
import os
import pandas as pd
import numpy as np
import networkx as nx
from Swing.util.Evaluator import Evaluator
from Swing.util.lag_identification import get_experiment_list, xcorr_experiments, calc_edge_lag
from nxpd import draw
from nxpd import nxpdParams
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.stats import fisher_exact, linregress, ttest_rel

## Functions!

#### Note: Several of the functions do not pass variables correctly. Code will need to be cleaned if shared

In [48]:
def is_square(n):
    """
    Determine if a number is a perfect square
    :param n: int or float
        The number to check
    :return: Boolean
        Return True if the number is a perfect square
    """
    return np.sqrt(n).is_integer()


def get_factors(n):
    """
    Calculate the factors of a number
    :param n: int
        The number to be factored
    :return: list
        A sorted list of the unique factors from smallest to largest
    """
    factor_list = np.array([[i, n // i] for i in range(1, int(n ** 0.5) + 1) if n % i == 0]).flatten().astype(int)
    return sorted(factor_list.tolist())


def calc_subplot_dimensions(x):
    """
    Calculate the dimensions for a matplotlib subplot object.
    :param x: int
        Number of plots that need to be made
    :return: rows, columns
        The number of rows and columns that should be in the subplot
    """
    if x <= 3:
        rows = x
        columns = 1
    else:
        factor_list = get_factors(x)
        while len(factor_list) <= 2 and not is_square(x):
            x += 1
            factor_list = get_factors(x)
        if is_square(x):
            rows = int(np.sqrt(x))
            columns = int(np.sqrt(x))

        else:
            rows = factor_list[int(len(factor_list)/2-1)]
            columns = factor_list[int(len(factor_list)/2)]

    return rows, columns


def get_true_edges(gold_filename):
    evaluator = Evaluator(gold_filename, '\t')
    edges = evaluator.gs_flat.tolist()
    return edges, evaluator


def get_edge_lags(data_filename):
    df = pd.read_csv(data_filename, sep="\t")
    gene_list = df.columns.values[1:].tolist()
    experiment_list = get_experiment_list(data_filename, 21, 10)
    xcorr_array = xcorr_experiments(experiment_list)
    lags = calc_edge_lag(xcorr_array, gene_list, 0.1, 0.5, timestep=1)
    return lags, df


def get_network_changes(pickle_filename, edge_str='regulator-target', method = 'RF',
                        base_str='rank_importance_RF-td_21', shortener_str='rank_importance_'):
    print(shortener_str)
    results_df = pd.read_pickle(pickle_filename)
    edges = results_df[edge_str].values
    baseline = results_df[base_str].values

    diff_df = pd.DataFrame()
    diff_df[edge_str] = edges
    diff_df[('base_%s' %method)] = baseline
    rank_df = pd.DataFrame()
    rank_df[edge_str] = edges
    rank_df[('base_%s' %method)] = baseline
    for column in results_df.columns:
        if column != edge_str and column != base_str:
            short_name = column.replace(shortener_str, method)
            diff_df[short_name] = baseline - results_df[column].values
            rank_df[short_name] = results_df[column].values
    diff_df.set_index(['regulator-target'], inplace=True)
    rank_df.set_index(['regulator-target'], inplace=True)
    parameters = set(rank_df.columns[1:].values)
    return diff_df, rank_df, parameters


def get_network_data(goldstandard, timeseries, ignore_self=True):
    # Get true network
    true_edges, evaluator = get_true_edges(goldstandard)
    dg = nx.DiGraph()
    dg.add_edges_from(true_edges)
    
    #Network statistics - deprecated
    #degree = nx.degree_centrality(dg)
    #b_cent = pd.DataFrame.from_dict({k: [v] for k, v in nx.edge_betweenness_centrality(dg).items()}, 'index')
    #b_cent.columns = ['Bcent']
    
    #Calculate edge lags
    edge_lags, data = get_edge_lags(timeseries)
    if ignore_self:
        edge_lags = edge_lags[edge_lags['Parent'] != edge_lags['Child']]
    edge_df = pd.DataFrame(edge_lags['Lag'].values, index=edge_lags['Edge'].values, columns=['Lag'])

    return true_edges, edge_df, data, dg, evaluator

In [49]:
lag_range = {'ml_0': [0, 1], 'ml_1': [0, 2], 'ml_2': [0, 3], 'ml_3': [1, 2], 'ml_4': [1, 4], 'ml_5': [2, 3]}
num_nets = 20
methods = ['D', 'RF']
models = ['Yeast', 'Ecoli']
true_edge_df = pd.DataFrame()
roc_df = pd.DataFrame()
pr_df = pd.DataFrame()
conditions = set()
for model in models:
    for net in range(1, num_nets+1):
        gold_file = "../data/gnw_insilico/network_data/%s/%s-%i_goldstandard.tsv" % (model, model, net)
        data_file = "../data/gnw_insilico/network_data/%s/%s-%i_timeseries.tsv" % (model, model, net)
        true_edges, edge_df, data, dg, evalutaor = get_network_data(gold_file, data_file)
        for ii, method in enumerate(methods):
            short = 'rank_importance_%s' %method
            pickle_file = "%s_net%i_%s_promotion.pkl" % (model, net, method.replace('D', 'dionesus'))
            base_str = ('rank_importance_%s-td_21' %method.replace('D', 'Dionesus'))
            change, ranks, params = get_network_changes(pickle_file, base_str= base_str, 
                                                        shortener_str=short, method=method)
            print(change)
            print(ranks)
            sys.exit()
            if ii==0:
                ranks_df = ranks
                change_df = change
            else:
                pass

        sys.exit()


        filtered_ranks = ranks_df.copy()
        roc = []
        aupr = []
        for c in conditions:
            filtered_ranks.sort_values(filtered_ranks.columns[1], inplace=True)
            roc.append(ee.calc_roc(filtered_ranks.iloc[:, :2])[2].values[-1])
            aupr.append(ee.calc_pr(filtered_ranks.iloc[:, :2])[2].values[-1])
            filtered_ranks.drop(c, axis=1, inplace=True)
        roc_df[model + str(net)] = roc
        pr_df[model + str(net)] = aupr

        full_df = pd.concat([edge_df, change_df.set_index(['regulator-target'])], axis=1, join='inner')
        current_true = full_df[full_df.index.isin(true_edges)].join(b_cent)
        current_true['P_deg'] = [degree[edge[0]] for edge in current_true.index.values]
        current_true['C_deg'] = [degree[edge[1]] for edge in current_true.index.values]
        true_edge_df = true_edge_df.append(current_true)

t_promoted = np.sum(true_edge_df.iloc[:, 2:11].values > 0, axis=0)
t_demoted = np.sum(true_edge_df.iloc[:, 2:11].values < 0, axis=0)
t_same = np.sum(true_edge_df.iloc[:, 2:11].values == 0, axis=0)

t_lagged = true_edge_df[true_edge_df['Lag'] > 0]
l_promoted = np.sum(t_lagged.iloc[:, 2:11].values > 0, axis=0)
l_demoted = np.sum(t_lagged.iloc[:, 2:11].values < 0, axis=0)
l_same = np.sum(t_lagged.iloc[:, 2:11].values == 0, axis=0)

c_table = np.array([[l_promoted, t_promoted-l_promoted], [l_demoted+l_same, t_demoted+t_same-l_demoted-l_same]])
conditions = true_edge_df.columns[2:11].values

lag_set = sorted(list(set(true_edge_df['Lag'].values)))

f = plt.figure()
n_rows, n_cols = calc_subplot_dimensions(len(conditions))
for ii, run in enumerate(conditions):
    if 'ml' in run:
        key = run.split('-')[1]
        min_lag = lag_range[key][0]
        max_lag = lag_range[key][1]
        in_range = true_edge_df[(true_edge_df['Lag'] >= min_lag) & (true_edge_df['Lag'] <= max_lag)][[run]]
        print(run, np.sum(in_range.values > 0)/len(in_range))

rank_importance_D
                  base_D  Dionesus-td_2  Dionesus-td_10  Dionesus-td_15  Dionesus-ml_0  Dionesus-ml_1  Dionesus-ml_2  Dionesus-ml_3  Dionesus-ml_4  Dionesus-ml_5
regulator-target                                                                                                                                                 
(G4, G3)               5              5              -1               0             -3             -2             -1             -1             -1             -2
(G8, G7)               8              7               4               4              4              4              3              4              4              6
(G8, G6)              13             11               4               4              2              3              2              4              4              5
(G6, G5)               4              1               2               1              1              2              3              2              1              1
(G7, G6)  

SystemExit: 

To exit: use 'exit', 'quit', or Ctrl-D.
