In [2]:
import sys
import os
import pandas
from pandas import ExcelWriter
import matplotlib
from matplotlib import pyplot as plt
import statistics
import math
import scipy
from scipy import stats
import numpy as np


#AUTHOR: ASHTI M. SHAH
#MENTORS: DR. YORAM VODOVOTZ AND DR. RUBEN ZAMORA
#DATE: January, 2023

"Import data"
raw_data = pandas.DataFrame()
raw_data = pandas.read_csv('raw_data_sham_vgx_no_outliers.csv')
cytokines = list(raw_data.columns.values)[4:] #list of all cytokines

"Organize data by condition"

all_data_Baseline = raw_data.loc[raw_data['Condition'] == 'Baseline']
all_data_Sham = pandas.concat([all_data_Baseline,raw_data.loc[raw_data['Condition'] == 'Sham']])
all_data_Vgx = pandas.concat([all_data_Baseline, raw_data.loc[raw_data['Condition'] == 'Vgx']])

#Function to get the correlation of each inflammatory mediator in a single organ
#with TIME based on RATE OF CHANGE
def get_rate_of_change_inflammatory_mediator_and_TIME(tissue_rel_data, time_int):
    #tissue_rel_data: rel_data_cur_organ; table of the relevant data from two time points from a single organ 
    #time_int: int array of consective time points. ex [1, 3]
    
    "Get the data frames of data for each time point"
    time_1 = tissue_rel_data.loc[tissue_rel_data['Condition'] == "Baseline"]
    time_2 = tissue_rel_data.loc[tissue_rel_data['Condition'] != "Baseline"]
    "Get the data frames of just the inflammatory mediators for each time point"
    time1_mediators = time_1.iloc[:,4:]
    time2_mediators = time_2.iloc[:,4:]
    
    cytokines = list(time1_mediators.columns.values) #list of all cytokines

    "Calculate rate of change"
    rate_of_change_table = pandas.DataFrame();
    for c in cytokines:
        x1 = time_int[0]
        x2 = time_int[1]
        "Rate of change is calculated using median cytokine value at each timepoint"
        y1 = time1_mediators[c].median()
        y2 = time2_mediators[c].median()
        "Calculate rate of change, x = time, y = median inflammatory value"
        rate_of_change = (y2-y1)/(x2-x1)
        rate_of_change_table[c] = [rate_of_change]
    return (rate_of_change_table)

#Function to return a list of mediators that are positively correlated with time in a single organ
def get_significant_mediators_withTIME_positive (inflammatory_mediator_rates, num_std_dev):
    #inflammatory_msediator_rates: correlation matrix (1x20) which has the pearson correlation of each inflammatory
    #mediator with itself over a dynamic time interval
    #num_std_dev: number of standard deviations above the mean rate of change at which a cytokine is considered
    #to be significantly increasing
    
    cytokines = list(inflammatory_mediator_rates.columns.values) #list of all cytokines
    pos_mediators = []

    for c in cytokines:
        if inflammatory_mediator_rates[c][0]>0:
            pos_mediators.append(inflammatory_mediator_rates[c][0])
    mean = np.mean(pos_mediators)
    stdev = np.std(pos_mediators)
    threshold = mean + stdev*num_std_dev
    
    significant_pos_mediators = []
    for j in cytokines:
         if inflammatory_mediator_rates[j][0]>threshold:
            significant_pos_mediators.append(j)
    return significant_pos_mediators   

def get_significant_mediators_withTIME_negative (inflammatory_mediator_rates,num_std_dev):
    #inflammatory_msediator_rates: correlation matrix (1x20) which has the pearson correlation of each inflammatory
    #mediator with itself over a dynamic time interval
    #num_std_dev: number of standard deviations below the mean rate of change at which a cytokine is considered
    #to be significantly decreasing
    
    cytokines = list(inflammatory_mediator_rates.columns.values) #list of all cytokines
    neg_mediators = []

    for c in cytokines:
        if inflammatory_mediator_rates[c][0]<0:
            neg_mediators.append(inflammatory_mediator_rates[c][0])
    mean = np.mean(neg_mediators)
    stdev = np.std(neg_mediators)
    threshold = mean - stdev*num_std_dev
    
    significant_neg_mediators = []
    for j in cytokines:
         if inflammatory_mediator_rates[j][0]<threshold:
            significant_neg_mediators.append(j)
    return significant_neg_mediators 

def DyHyp_Network_Complexity (cur_condition_data, std_dev_dyHyp, condition):
    "Loop to run all functions"
    all_times_str = ["t=0d", "t=7d"]
    all_times_int = [0,7]
    #List of all tissues, does not include plasma
    list_organs = ["Plasma","Heart", "Lung", "Liver","Spleen", "Gut", "Kidney"] 
    #Loop through time interval 0-7d
    cur_times_str = all_times_str[0:2]
    cur_times_int = all_times_int [0:2]
    dict_pos_mediators = {}
    dict_neg_mediators = {}
    for j in range (len(list_organs)):
        rel_data_cur_organ = cur_condition_data.loc[cur_condition_data['Compartment'] ==list_organs[j]]
        correl_with_time_cur_organ = get_rate_of_change_inflammatory_mediator_and_TIME(rel_data_cur_organ, cur_times_int)
        dict_pos_mediators[list_organs[j]] = get_significant_mediators_withTIME_positive(correl_with_time_cur_organ, std_dev_dyHyp)
        dict_neg_mediators[list_organs[j]]= get_significant_mediators_withTIME_negative(correl_with_time_cur_organ, std_dev_dyHyp)
    table_pos_mediators = pandas.DataFrame(dict([ (k,pandas.Series(v)) for k,v in dict_pos_mediators.items() ]))
    table_neg_mediators = pandas.DataFrame(dict([ (k,pandas.Series(v)) for k,v in dict_neg_mediators.items() ]))
    table_pos_mediators.to_excel("{}_PosMediators.xlsx".format(condition))
    table_neg_mediators.to_excel("{}_NegMediators.xlsx".format(condition))

DyHyp_Network_Complexity(all_data_Vgx, 1, "Vgx")