In [1]:
import requests
import pandas as pd

In [2]:
def get_ppi_score(genes):
    request_url = "https://string-db.org/api/tsv-no-header/ppi_enrichment?"
    params = {
        "identifiers" : "%0d".join(genes), # your proteins
        "species" : 9606, # species NCBI identifier 
        "caller_identity" : "www.awesome_app.org" # your app name
    }
    response = requests.post(request_url, data=params)
    # print(response.text)
    # header_names = ["number_of_nodes", "number_of_edges", "average_node_degree", "local_clustering_coefficient", "expected_number_of_edges", "p_value"]
    number_of_nodes, number_of_edges, average_node_degree, local_clustering_coefficient, expected_number_of_edges, p_value = list(map(float, response.text.strip().split("\n")[0].split("\t")))
    # df = pd.DataFrame([line.split("\t") for line in response.text.strip().split("\n")], columns=header_names)
    # df["combo"] = "|".join(genes)
    return pd.Series({
        "number_of_nodes": number_of_nodes,
        "number_of_edges": number_of_edges,
        "average_node_degree": average_node_degree,
        "local_clustering_coefficient": local_clustering_coefficient,
        "expected_number_of_edges": expected_number_of_edges,
        "p_value": p_value 
    })

In [3]:
combo_files = [
    "/data6/deepro/ukb_bmi/2_rarecomb/data/british/combo2.csv",
    "/data6/deepro/ukb_bmi/2_rarecomb/data/british/combo3.csv"
]

combo_df = pd.concat([pd.read_csv(cf).loc[:, ["uniq_items"]] for cf in combo_files])

In [4]:
combo_df["uniq_items"] = combo_df.uniq_items.str.replace("Input_", "").str.split("|")

In [5]:
combo_df = pd.concat((combo_df, combo_df.uniq_items.apply(get_ppi_score)), axis=1)

In [8]:
combo_df.loc[combo_df.p_value<0.05].sort_values("p_value")

Unnamed: 0,uniq_items,number_of_nodes,number_of_edges,average_node_degree,local_clustering_coefficient,expected_number_of_edges,p_value
496,"[ABCB4, UGT1A4, UGT1A7]",3.0,3.0,2.00,1.000,0.0,0.000005
554,"[ABCB4, UGT1A1, UGT1A7]",3.0,3.0,2.00,1.000,0.0,0.000005
555,"[ABCB4, UGT1A1, UGT1A4]",3.0,3.0,2.00,1.000,0.0,0.000005
1049,"[ABCB4, UGT1A6, UGT1A7]",3.0,3.0,2.00,1.000,0.0,0.000005
1050,"[ABCB4, UGT1A4, UGT1A6]",3.0,3.0,2.00,1.000,0.0,0.000006
...,...,...,...,...,...,...,...
178,"[GRM1, NME8]",2.0,1.0,1.00,1.000,0.0,0.047500
298,"[ABCA12, RYR1, TTN]",3.0,1.0,0.67,0.667,0.0,0.047700
271,"[CACNA1S, MYO1C, MYO7A]",3.0,1.0,0.67,0.667,0.0,0.049000
144,"[RYR2, TENM4, TTN]",3.0,1.0,0.67,0.667,0.0,0.049700


In [9]:
combo_df.loc[combo_df.p_value<0.05].sort_values("p_value").head(60)

Unnamed: 0,uniq_items,number_of_nodes,number_of_edges,average_node_degree,local_clustering_coefficient,expected_number_of_edges,p_value
496,"[ABCB4, UGT1A4, UGT1A7]",3.0,3.0,2.0,1.0,0.0,5e-06
554,"[ABCB4, UGT1A1, UGT1A7]",3.0,3.0,2.0,1.0,0.0,5e-06
555,"[ABCB4, UGT1A1, UGT1A4]",3.0,3.0,2.0,1.0,0.0,5e-06
1049,"[ABCB4, UGT1A6, UGT1A7]",3.0,3.0,2.0,1.0,0.0,5e-06
1050,"[ABCB4, UGT1A4, UGT1A6]",3.0,3.0,2.0,1.0,0.0,6e-06
1053,"[ABCB4, UGT1A1, UGT1A6]",3.0,3.0,2.0,1.0,0.0,6e-06
210,"[ABCA4, MYO7A, MYO7B]",3.0,2.0,1.33,0.667,0.0,9.1e-05
947,"[ECEL1, PCDHGC4, PCDHGC5]",3.0,1.0,0.67,0.667,0.0,0.00011
832,"[ECEL1, PCDHGA11, PCDHGB7]",3.0,1.0,0.67,0.667,0.0,0.00017
228,"[ECEL1, PCDHGA5, PCDHGA8]",3.0,1.0,0.67,0.667,0.0,0.00021


In [12]:
all_combo_genes = set("|".join(combo_df.uniq_items.str.replace("Input_", "").values).split("|"))

TypeError: sequence item 0: expected str instance, numpy.float64 found

In [13]:
combo_df.uniq_items

0                 [BMPR1B, SHC2]
1                  [BCHE, TRPV4]
2               [ABCA13, DDX60L]
3                 [MYH14, NR1D1]
4                 [ADAM19, MMUT]
                  ...           
1128         [CPT1B, DRG1, SFI1]
1129     [ACAP3, SLC7A8, TAS1R3]
1130        [F5, NBEAL2, SPINK8]
1131       [GHDC, KRTAP2-3, TTN]
1132    [CYP4F22, ZNF490, ZNF69]
Name: uniq_items, Length: 1834, dtype: object

In [None]:
len(all_combo_genes)

372

In [None]:
get_ppi_score(all_combo_genes)

372	1322	7.11	0.456	432	0.0



Unnamed: 0,number_of_nodes,number_of_edges,average_node_degree,local_clustering_coefficient,expected_number_of_edges,p_value,combo
0,372,1322,7.11,0.456,432,0.0,AP3M2|NOTCH4|TRPM1|ZRANB3|TRNT1|LDLR|CEP290|SP...


In [None]:
ppi_df = pd.concat(list(map(get_ppi_score, combo_df.uniq_items.str.replace("Input_", "").str.split("|"))))

In [None]:
ppi_df["p_value"] = ppi_df.p_value.astype(float)

In [None]:
ppi_df.loc[ppi_df.p_value<0.05]

Unnamed: 0,number_of_nodes,number_of_edges,average_node_degree,local_clustering_coefficient,expected_number_of_edges,p_value,combo
0,3,1,0.67,0.667,0,0.0498,DNAH5|DNAJC6|TTN
0,3,1,0.67,0.667,0,0.0105,MYH8|SCN11A|SPTB
0,3,1,0.67,0.667,0,0.0345,MYH13|SCN9A|TTN
0,3,1,0.67,0.667,0,0.0252,KIF26A|OBSCN|RYR1
0,3,2,1.33,0.667,0,0.0083,KCNQ1|RYR1|TTN
...,...,...,...,...,...,...,...
0,3,1,0.67,0.667,0,0.0342,HOXB1|UGT1A1|UGT1A10
0,3,1,0.67,0.667,0,0.0350,HOXB1|UGT1A10|UGT1A6
0,3,1,0.67,0.667,0,0.0335,HOXB1|UGT1A10|UGT1A8
0,3,1,0.67,0.667,0,0.0256,HOXB1|UGT1A10|UGT1A3
