# Python Notebook

In [None]:
def add_row(change):
    global gene_pair
    # Add a new row at the top with None values
    new_row = {col: None for col in gene_pair.columns}
    gene_pair = pd.DataFrame([new_row] + gene_pair.to_dict(orient="records"))
    update_table()

# Function to remove the last row of the dataframe
def remove_row(change):
    global gene_pair
    if len(gene_pair) > 0:
        gene_pair = gene_pair[:-1]  # Remove the last row
        update_table()

In [65]:
gene_pair.columns

Index(['Human LR Pair', 'Ligand', 'Receptor', 'Interaction Source',
       'PMID support', 'Ligand HGNC ID', 'Ligand location', 'Receptor HGNC ID',
       'Receptor location', 'Perplexity', 'Ligand name', 'Ligand MGI ID',
       'Ligand RGD ID', 'Mouse Ligand', 'Rat Ligand', 'Ligand ZFIN ID',
       'Ligand ZFIN ID', 'Zebrafish Ligand', 'Zebrafish Ligand name',
       'Receptor name', 'Receptor MGI ID', 'Receptor RGD ID', 'Mouse Receptor',
       'Rat Receptor', 'Ligand ZFIN ID', 'Ligand ZFIN ID',
       'Zebrafish Receptor', 'Zebrafish Receptor name', 'HGNC L R',
       'sanity check', 'curator', 'secondary source?'],
      dtype='object')

In [9]:
duplicates = gene_pair00[gene_pair00["Human LR Pair"].duplicated()]
print(duplicates["Human LR Pair"])


2312       TAFA4 FPR1
2333    SLAMF1 SLAMF1
2342     FLRT3 ADGRL3
2344     TENM2 ADGRL1
Name: Human LR Pair, dtype: object


## Testing Liana+

In [14]:
import liana as li
import omnipath as op
import decoupler as dc
import pandas as pd

In [15]:
import sys
import os
sys.path.append(os.path.abspath("src"))  # Add src directory to path
from createDataTable import gene_pair0

### Pathway Annotations

In [43]:
# load PROGENy pathways, we use decoupler as a proxy as it formats the data in a more convenient way
progeny = dc.get_progeny(top=10000)
progeny

Unnamed: 0,source,target,weight,p_value
0,Androgen,TMPRSS2,11.490631,0.0
1,Androgen,NKX3-1,10.622551,0.0
2,Androgen,MBOAT2,10.472733,0.0
3,Androgen,KLK2,10.176186,0.0
4,Androgen,SARG,11.386852,0.0
...,...,...,...,...
139455,p53,SIN3B,0.347078,0.269528
139456,p53,SS18,-0.516365,0.26954
139457,p53,CAPN3,1.14331,0.269546
139458,p53,BRICD5,-0.38892,0.269551


In [44]:
lr_pairs = gene_pair0[["Ligand", "Receptor"]]
lr_pairs.columns = lr_pairs.columns.str.lower()

In [45]:
lr_pairs

Unnamed: 0,ligand,receptor
0,CCL3L3,ACKR2
1,DEFB103B,CCR2
2,CCL3L3,CCR5
3,DEFB103B,CCR6
4,DEFB4A,CCR6
...,...,...
2361,KIR2DL5A,PVR
2362,SAA1,SCARB1
2363,SAA1,TLR2
2364,SAA1,TLR4


In [46]:
# generate ligand-receptor geneset
lr_progeny = li.rs.generate_lr_geneset(lr_pairs, progeny, lr_sep="^")

In [47]:
lr_progeny

Unnamed: 0,source,interaction,weight
14,NFkB,IFNA13^IFNAR1,0.582020
46,TNFa,IFNA13^IFNAR1,1.219279
57,Trail,IFNA13^IFNAR1,0.339437
89,NFkB,IFNA13^IFNAR2,2.453309
108,TNFa,IFNA13^IFNAR2,4.271504
...,...,...,...
140018,PI3K,LRFN4^PTPRS,-0.742439
140068,Trail,LRFN5^PTPRD,0.367912
140157,EGFR,KIR2DL5A^PVR,0.801227
140179,MAPK,KIR2DL5A^PVR,0.933643


In [48]:
# some of the pairs are missing
len(lr_progeny["interaction"].unique())

2028

In [49]:
output_file="data/pathway_annotations_per_pair.csv"
lr_progeny.to_csv(output_file, index=False)

### Disease Annotations

In [24]:
diseases = op.requests.Annotations.get(
    resources = ['DisGeNet']
    )

Downloading annotations for all proteins from the following resources: `['DisGeNet']`


In [26]:
diseases = diseases[['genesymbol', 'label', 'value']]
diseases = diseases.pivot_table(index='genesymbol',
                                columns='label', values='value',
                                aggfunc=lambda x: '; '.join(x)).reset_index()
diseases = diseases[['genesymbol', 'disease']]
diseases['disease'] = diseases['disease'].str.split('; ')
diseases = diseases.explode('disease')
lr_diseases = li.rs.generate_lr_geneset(lr_pairs, diseases, source='disease', target='genesymbol', weight=None, lr_sep="^")
lr_diseases.sort_values("interaction")



Unnamed: 0,disease,interaction
786337,Bipolar Disorder,ACE^BDKRB2
788038,Hypotension,ACE^BDKRB2
788618,Major Depressive Disorder,ACE^BDKRB2
790218,Unipolar Depression,ACE^BDKRB2
787989,Hypertensive disease,ACE^BDKRB2
...,...,...
835601,"Mammary Neoplasms, Human",YBX1^NOTCH1
835101,Colonic Neoplasms,YBX1^NOTCH1
835012,Breast Carcinoma,YBX1^NOTCH1
835283,Malignant neoplasm of breast,YBX1^NOTCH1


In [34]:
# some of the pairs are missing
len(lr_diseases["interaction"].unique())

593

In [29]:
output_file="data/disease_annotations_per_pair.csv"
lr_diseases.to_csv(output_file, index=False)

In [37]:
op.requests.Annotations.resources()

('Adhesome',
 'Almen2009',
 'Baccin2019',
 'CORUM_Funcat',
 'CORUM_GO',
 'CSPA',
 'CSPA_celltype',
 'CancerDrugsDB',
 'CancerGeneCensus',
 'CancerSEA',
 'CellCall',
 'CellCellInteractions',
 'CellChatDB',
 'CellChatDB_complex',
 'CellPhoneDB',
 'CellPhoneDB_complex',
 'CellTalkDB',
 'CellTypist',
 'Cellinker',
 'Cellinker_complex',
 'ComPPI',
 'CytoSig',
 'DGIdb',
 'DisGeNet',
 'EMBRACE',
 'Exocarta',
 'GO_Intercell',
 'GPCRdb',
 'Guide2Pharma',
 'HGNC',
 'HPA_secretome',
 'HPA_subcellular',
 'HPA_tissue',
 'HPMR',
 'HumanCellMap',
 'ICELLNET',
 'ICELLNET_complex',
 'IntOGen',
 'Integrins',
 'InterPro',
 'KEGG-PC',
 'Kirouac2010',
 'LOCATE',
 'LRdb',
 'Lambert2018',
 'MCAM',
 'MSigDB',
 'Matrisome',
 'MatrixDB',
 'Membranome',
 'NetPath',
 'OPM',
 'PROGENy',
 'PanglaoDB',
 'Phobius',
 'Phosphatome',
 'Ramilowski2015',
 'Ramilowski_location',
 'SIGNOR',
 'SignaLink_function',
 'SignaLink_pathway',
 'Surfaceome',
 'TCDB',
 'TFcensus',
 'TopDB',
 'UniProt_family',
 'UniProt_keyword',
 'Un