## Permutation test results

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
def load_interaction(project_name:str) -> pd.DataFrame:

    use_columns = ["interaction", 'rmst_diff_f1+f2', 'rmst_diff_f1-f2', 'rmst_diff_f1*f2']

    interactions = pd.read_csv(f"../computed_interactions/{project_name}.csv", index_col=[0], usecols=use_columns)
    interactions = interactions.fillna(-1)
    return interactions

def load_permutations(project_name:str) -> list:
    
    use_columns = ["interaction", 'rmst_diff_f1+f2', 'rmst_diff_f1-f2', 'rmst_diff_f1*f2']

    permutations = [
        pd.read_csv(f"../computed_permutations/{project_name}/{fn}", index_col=[0], usecols=use_columns)
        for fn in tqdm(os.listdir(f"../computed_permutations/{project_name}/"))
    ]
    return permutations

def get_interaction_values(df, interaction_column, N_INTERACTIONS, placeholder=-1):
    values = df[interaction_column].dropna().values
    return np.append(values, [placeholder]*(N_INTERACTIONS - len(values)))

In [3]:
GENES = pd.read_csv("../data/L1000.txt", header=None)
N_GENES = len(GENES)
N_INTERACTIONS = int((N_GENES * (N_GENES-1))/2)
interaction_types = ['rmst_diff_f1+f2', 'rmst_diff_f1-f2', 'rmst_diff_f1*f2']
interaction_label = {
    'rmst_diff_f1+f2': "additive",
    'rmst_diff_f1-f2': "competing",
    'rmst_diff_f1*f2': "xor"
}

In [7]:
project_names = os.listdir("../computed_permutations/")
PLACEHOLDER = -1
SIGNIFICANCE_THRESHOLD = 99.99
print(project_names)

['METABRIC', 'KIRC', 'HNSC']


In [5]:
with open("../data/permutation_test_results.csv", "a") as f:
    f.write("project;interaction-type;n-hits;hits\n")

for project_name in project_names:
    print(project_name)

    project_interactions = load_interaction(project_name)
    project_permutations = load_permutations(project_name)

    for interaction in interaction_types:
        
        permutation_values = np.concatenate([
            get_interaction_values(project_permutations[i], interaction, N_INTERACTIONS, PLACEHOLDER)
            for i in range(len(project_permutations))
        ])

        permutation_threshold = np.percentile(permutation_values, SIGNIFICANCE_THRESHOLD)
        del permutation_values

        hits = project_interactions.index[project_interactions[interaction] > permutation_threshold].values

        with open("../data/permutation_test_results.csv", "a") as f:
            f.write(f"{project_name};{interaction_label[interaction]};{len(hits)};[{','.join(x for x in hits)}]\n")



HNSC


100%|██████████| 100/100 [01:06<00:00,  1.50it/s]


['ABHD6*BAG6' 'ABHD6*MTF2' 'AKR1A1*FOS' 'AKR1A1*PYCR1' 'AKT1*BNIP3'
 'AKT1*CCDC85B' 'ALDOA*ANO10' 'ALDOC*PSME1' 'AMDHD2*UBE2J1' 'ARL4C*CDK7'
 'ATMIN*IKBKE' 'ATP11B*NT5DC2' 'ATP6V0B*CLTB' 'AXIN1*CDK1' 'BAG6*STXBP2'
 'BDH1*ID2' 'BDH1*IKBKB' 'BDH1*NISCH' 'BNIP3*CCND3' 'BNIP3*CDC42'
 'BNIP3*ECD' 'BNIP3*SCP2' 'BNIP3*TOMM70A' 'BRCA1*DYNLT3' 'CAPN1*HES1'
 'CASP2*FBXL12' 'CBR1*KIAA0355' 'CCDC85B*CDC42' 'CCDC85B*CRYZ'
 'CCDC85B*GALE' 'CCDC85B*SRC' 'CCDC85B*SYPL1' 'CCDC92*PUM2' 'CCND3*CHIC2'
 'CCND3*NRIP1' 'CCNE2*DUSP14' 'CCT7*DRAP1' 'CCT7*GNAI1' 'CDK5R1*P4HTM'
 'CDK6*GNPDA1' 'CDK7*CTNNAL1' 'CEP57*TJP1' 'CFLAR*ZNF131' 'CGRRF1*SFN'
 'CHEK1*DAXX' 'CLTB*DUSP3' 'CLTB*TXNDC9' 'COG7*EIF4G1' 'COL1A1*RFC5'
 'CSK*VPS28' 'CTSL1*MRPL18' 'CTSL1*USP14' 'CXCR4*HMGCS1' 'DCTD*DRAP1'
 'DNAJB1*GNA11' 'DNAJB1*PUM2' 'DNAJB6*ERBB3' 'DNAJC15*DRAP1'
 'DNAJC15*ITFG1' 'DNAJC15*KCTD5' 'DNAJC15*OXA1L' 'DNAJC15*PCBD1'
 'DNAJC15*PGRMC1' 'DNAJC15*SFN' 'DRAP1*GRWD1' 'DRAP1*HSPA8' 'DRAP1*PIGB'
 'DRAP1*SNX7' 'DRAP1*TMCO1' 'DUSP

In [5]:
d = pd.read_csv("../data/permutation_test_results.csv", sep=";")
d

Unnamed: 0,project,interaction-type,n-hits,hits
0,STAD,additive,114,"[ABHD4*PARP1,ABHD4*TMEM97,ACBD3*DPH2,ACD*SPDEF..."
1,STAD,competing,118,"[ABCC5*SPDEF,ABCC5*XPNPEP1,ACD*TRAPPC3,ADAM10*..."
2,STAD,xor,42,"[ARFIP2*CALM3,BDH1*GRWD1,BUB1B*TMEM97,CALM3*HN..."
3,READ,additive,331,"[AARS*FIS1,AARS*LOXL1,ABHD6*FIS1,ADCK3*ATP6V1F..."
4,READ,competing,494,"[AARS*BRPF1,AARS*CAMSAP2,AARS*DDX42,AARS*HMGCS..."
...,...,...,...,...
58,CESC,competing,142,"[ABL1*DNMT1,ABL1*OXCT1,ABL1*PIK3R4,ADNP2*DDB2,..."
59,CESC,xor,44,"[ACD*SATB1,ACD*UBE3B,ARHGEF12*BLMH,ATMIN*PROS1..."
60,BRCA,additive,124,"[ABCC5*CDKN2A,ABCC5*NOP16,ABCC5*TCERG1,ABL1*IC..."
61,BRCA,competing,196,"[AARS*BMP4,ABCC5*ASAH1,ABCC5*CGRRF1,ABCC5*SPDE..."


In [11]:
end = "\\"
for pr in np.unique(d["project"].values):
    df = d[d['project'] == pr]
    add = df[df['interaction-type'] == 'additive']['n-hits'].values[0]
    com = df[df['interaction-type'] == 'competing']['n-hits'].values[0]
    xor = df[df['interaction-type'] == 'xor']['n-hits'].values[0]
    print(f"{pr} & {add} & {com} & {xor} {end}{end}")

BLCA & 113 & 149 & 58 \\
BRCA & 124 & 196 & 55 \\
CESC & 91 & 142 & 44 \\
COAD & 89 & 51 & 22 \\
GBM & 95 & 97 & 37 \\
HNSC & 112 & 272 & 217 \\
KIRC & 527 & 571 & 16 \\
KIRP & 68 & 95 & 25 \\
LAML & 277 & 339 & 45 \\
LGG & 253 & 382 & 26 \\
LIHC & 106 & 115 & 18 \\
LUAD & 113 & 137 & 38 \\
LUSC & 73 & 64 & 23 \\
METABRIC & 830 & 925 & 17 \\
OV & 83 & 105 & 69 \\
PRAD & 54 & 27 & 59 \\
READ & 331 & 494 & 305 \\
SKCM & 138 & 122 & 59 \\
STAD & 114 & 118 & 42 \\
THCA & 101 & 116 & 63 \\
UCEC & 105 & 129 & 31 \\
