In [37]:
import pandas as pd
import math

In [38]:
df = pd.read_csv("NCLab_df.csv")

In [39]:
# Initialize the dictionary with each key as "INPUT|CONTROLLER|x_a" where x_a is activation type, and value being a list of 
# [num_mentions, pos_or_neg]

interaction_dict = {}
i = 0
for i in range(0, len(df.INPUT)):
    if df.NUM_LABEL[i] == 1:
        interaction_dict[f"{df.INPUT[i]}|{df.CONTROLLER[i]}|pos"] = [df.COUNTER[i], df.NUM_LABEL[i]]
    elif df.NUM_LABEL[i] == -1:
        interaction_dict[f"{df.OUTPUT[i]}|{df.CONTROLLER[i]}|neg"] = [df.COUNTER[i], df.NUM_LABEL[i]]
    elif df.NUM_LABEL[i] == 0:
        interaction_dict[f"{df.OUTPUT[i]}|{df.CONTROLLER[i]}|nan"] = [df.COUNTER[i], df.NUM_LABEL[i]]
    i += 1

In [40]:
# We make a proportion dictionary with each key as "Input/Output|Controller" and each value as [pos_count, total_count]

# initialize the dictionary
prop_dict = {}

for key in interaction_dict:
    prop_dict[key[:-4]] = [0, 0]

In [41]:
# Add just Input/Output|Controller as key to prop_dict, and if the activation is postive, add to the first element in the list value. Otherwise
# just add to total.

for key in interaction_dict:
    split_key = key.split("|")
    count = interaction_dict[key][0]
    if split_key[2] == "pos":
        prop_dict[key[:-4]][0] += count
        prop_dict[key[:-4]][1] += count

    if split_key[2] == "neg":
        prop_dict[key[:-4]][1] += count
        
    if split_key[2] == "nan":
        prop_dict[key[:-4]][0] += count/2
        prop_dict[key[:-4]][1] += count

In [42]:
# for each entry in prop_dict, divide pos by total to get the proportion. Then input into the modified tanh function to get our edge value.
# subtract 1 for tanh, leave -1 out for sigmoid

for key in prop_dict:
    proportion = prop_dict[key][0] / prop_dict[key][1]
    y = (1/(1+math.exp(-5*(proportion-.5))))
    prop_dict[key].append(y)

In [43]:
# Change dictionary's form into dataframe-compatible form.
df_dict = {
    "INPUT": [],
    "CONTROLLER": [],
    "EDGE": [],
    "POS": [],
    "TOTAL": [],
}
for key in prop_dict:
    met_input, met_cont = key.split("|")
    df_dict["INPUT"].append(met_input)
    df_dict["CONTROLLER"].append(met_cont)
    df_dict["EDGE"].append(prop_dict[key][2])
    df_dict["POS"].append(prop_dict[key][0])
    df_dict["TOTAL"].append(prop_dict[key][1])


In [44]:
prop_df = pd.DataFrame.from_dict(df_dict)

In [45]:
i = 0
input_spec = prop_df["INPUT"]
cont_spec = prop_df["CONTROLLER"]
ID_PAIRS = {"ID_PAIRS": []}
for i in range(0, len(input_spec)):
    if len(input_spec[i]) > 5:
        input_spec_id = input_spec[i].split(":")[-2] + ":" + input_spec[i].split(":")[-1]
    if len(cont_spec[i]) > 5:
        cont_spec_id = cont_spec[i].split(":")[-2] + ":" + cont_spec[i].split(":")[-1]
    else:
        input_spec_id = input_spec
        cont_spec_id = cont_spec

    ID_PAIRS["ID_PAIRS"].append(f"{input_spec_id}|{cont_spec_id}")
    i += 1

In [46]:
ID_PAIRS_df = pd.DataFrame.from_dict(ID_PAIRS)
prop_df = pd.concat([prop_df, ID_PAIRS_df], axis=1, join="inner")
with_id = pd.concat([prop_df["INPUT"], prop_df["CONTROLLER"], ID_PAIRS_df], axis=1, join="inner")
with_id = with_id.drop_duplicates(subset="ID_PAIRS", inplace=False)

In [47]:
unique_dict = {}
for pair in ID_PAIRS_df["ID_PAIRS"]:
    unique_dict[pair] = [0, 0]

i = 0
for i in range(len(prop_df["ID_PAIRS"])):
    id_pair = prop_df["ID_PAIRS"][i]
    unique_dict[id_pair][0] += prop_df["POS"][i]
    unique_dict[id_pair][1] += prop_df["TOTAL"][i]

In [48]:
pos_t_dict = {"POS" : [], "TOTAL": [], "EDGE": []}

for key in unique_dict:
    pos_t_dict["POS"].append(unique_dict[key][0])
    pos_t_dict["TOTAL"].append(unique_dict[key][1])

    proportion = unique_dict[key][0] / unique_dict[key][1]
    y = (1/(1+math.exp(-5*(proportion-.5))))
    
    pos_t_dict["EDGE"].append(y)

In [49]:
pos_t_df = pd.DataFrame.from_dict(pos_t_dict)
with_id = with_id.reset_index(drop=True)
prop_df_reduced = pd.concat([with_id, pos_t_df], axis=1, join="inner")

In [50]:
prop_df_reduced

Unnamed: 0,INPUT,CONTROLLER,ID_PAIRS,POS,TOTAL,EDGE
0,Bax::uniprot:Q07812.t,Bcl-2::uniprot:P10415.t,uniprot:Q07812.t|uniprot:P10415.t,1.0,7,0.143599
1,E5,Bax::uniprot:Q07812.t,uniprot:Q07812.t|uniprot:Q07812.t,4.0,4,0.924142
2,Jagged1::uniprot:P78504.t,Bax::uniprot:Q07812.t,uniprot:P78504.t|uniprot:Q07812.t,0.0,4,0.075858
3,Notch1::uniprot:P46531.t,Bax::uniprot:Q07812.t,uniprot:P46531.t|uniprot:Q07812.t,0.0,4,0.075858
4,PCNA::uniprot:P12004.t,Bax::uniprot:Q07812.t,uniprot:P12004.t|uniprot:Q07812.t,8.0,12,0.697059
...,...,...,...,...,...,...
2873,E3,Eq::uaz:UAZ4571,uaz:UAZCE94.p@Y311|uaz:UAZ4571,1.0,1,0.924142
2874,E3,Fpr2::uniprot:P25090,uaz:UAZCE94.p@Y311|uniprot:P25090,1.0,1,0.924142
2875,E3,Gls1::uniprot:O94925,uaz:UAZCE94.p@Y311|uniprot:O94925,1.0,1,0.924142
2876,Δ::uaz:UAZCE94.m,SMTs::uaz:UAZ534D5473,uaz:UAZCE94.m|uaz:UAZ534D5473,0.5,1,0.500000


In [51]:
in_cont_id = {"INPUT_ID" : [], "CONT_ID" : []}
for pair in prop_df_reduced["ID_PAIRS"]:
    in_id, cont_id = pair.split("|") 
    in_cont_id["INPUT_ID"].append(in_id)
    in_cont_id["CONT_ID"].append(cont_id)
in_cont_id_df = pd.DataFrame.from_dict(in_cont_id)
with_ids = pd.concat([in_cont_id_df, prop_df_reduced], axis=1)

In [36]:
with_ids.to_csv("NC_tanh.csv")