# Python Notebook

In [None]:
def add_row(change):
    global gene_pair
    # Add a new row at the top with None values
    new_row = {col: None for col in gene_pair.columns}
    gene_pair = pd.DataFrame([new_row] + gene_pair.to_dict(orient="records"))
    update_table()

# Function to remove the last row of the dataframe
def remove_row(change):
    global gene_pair
    if len(gene_pair) > 0:
        gene_pair = gene_pair[:-1]  # Remove the last row
        update_table()

In [61]:
## Function to prepare datatables (cleaning and hyperlinking, adding tool tips, etc) input for the database qmds
import sys, os
sys.path.append(os.path.abspath("src"))  
from itables import init_notebook_mode
import pandas as pd
from itables import show
from itables import options
from IPython.display import HTML, display
import numpy as np
import fetchGSheet 
import warnings

# Suppress SettingWithCopyWarning
warnings.simplefilter("ignore", category=UserWarning)


# Select only the relevant columns from pop_up_info

pop_up_info = fetchGSheet.pop_up_info.rename(columns={"Mouse genome informatics (MGI) ID": "MGI ID", 
                                                      "Zebrafish genome database (ZFIN) ID": "ZFIN ID"})

pop_up_info_lim = pop_up_info[["Approved symbol", "Approved name", "MGI ID", "ZFIN ID"]]
pop_up_info_lim = pop_up_info_lim.drop_duplicates(subset="Approved symbol", keep="first")

# Drop columns where all values are NA in gene_pair
gene_pair = fetchGSheet.gene_pair.dropna(axis=1, how='all')
# Fetch species IDs from the dataset
hgnc_id = [col for col in gene_pair.columns if "HGNC ID" in col]
hgnc_id = pd.concat([gene_pair[col] for col in hgnc_id]).unique()

# Rename columns for better clarity
gene_pair = gene_pair.rename(columns={
    "Ligand receptor pair": "Human LR Pair",
    "Ligand gene symbol": "Ligand",
    "Receptor gene symbol": "Receptor",
    "Perplexity link": "Perplexity",
    "Source": "Interaction Source"
})

# Merge gene_pair with pop_up_info_lim for Ligand(L)
gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Ligand', right_on='Approved symbol')

gene_pair = gene_pair.rename(columns={"Approved name": "Ligand name", 
                                     "MGI ID": "Ligand MGI ID",
                                     "ZFIN ID": "Ligand ZFIN ID"},
                            )

# Add MGI annotation
MGI_info = pd.read_csv("data/MGI_ID_biomart.csv")
gene_pair = gene_pair.merge(MGI_info, how='left', left_on='Ligand MGI ID', right_on='MGI ID')

# Add ZFIN annotation
ZFIN_info = pd.read_csv("data/ZFIN_ID_biomart.csv")
ZFIN_info['ZFIN ID'] = "ZFIN:" + ZFIN_info['ZFIN ID'].astype(str)
gene_pair = gene_pair.merge(ZFIN_info, how='left', left_on='Ligand ZFIN ID', right_on='ZFIN ID')

# Add ZFIN id and symbol
ZFIN_info = pd.read_csv("data/ZFIN_ID_human_orthos.txt", sep="\t", skiprows=1)
ZFIN_info = ZFIN_info[['ZFIN ID', 'ZFIN Symbol', 'ZFIN Name', 'HGNC ID']]
ZFIN_info = ZFIN_info.dropna(subset=['HGNC ID'])
ZFIN_info = ZFIN_info.drop_duplicates(subset=['HGNC ID'])
ZFIN_info['HGNC ID'] = ZFIN_info['HGNC ID'].apply(lambda x: f'HGNC:{int(x)}')


gene_pair = gene_pair.drop(columns=["ZFIN ID", "MGI ID", "HGNC ID"])

gene_pair = gene_pair.rename(columns={
                                     "MGI name": "Mouse Ligand", 
                                     "ZFIN name": "Zebrafish Ligand",
                                     "ZFIN ID": "Ligand ZFIN ID",
                                     "ZFIN Symbol": "Zebrafish Ligand",
                                     "ZFIN Name": "Zebrafish Ligand name"}
                            )

gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Receptor', right_on='Approved symbol')
gene_pair = gene_pair.merge(ZFIN_info, how='left', left_on='Ligand HGNC ID', right_on='HGNC ID')
gene_pair = gene_pair.rename(columns={"Approved name": "Receptor name",
                                      "MGI ID": "Receptor MGI ID",
                                      "ZFIN ID": "Receptor ZFIN ID"}
                            )

# Add MGI name
gene_pair = gene_pair.merge(MGI_info, how='left', left_on='Receptor MGI ID', right_on='MGI ID')
gene_pair = gene_pair.merge(ZFIN_info, how='left', left_on='Receptor ZFIN ID', right_on='ZFIN ID')
ZFIN_info

KeyError: "['HGNC ID'] not found in axis"

In [62]:
## Function to prepare datatables (cleaning and hyperlinking, adding tool tips, etc) input for the database qmds
import sys, os
sys.path.append(os.path.abspath("src"))  
from itables import init_notebook_mode
import pandas as pd
from itables import show
from itables import options
from IPython.display import HTML, display
import numpy as np
import fetchGSheet 
import warnings

# Suppress SettingWithCopyWarning
warnings.simplefilter("ignore", category=UserWarning)


# Select only the relevant columns from pop_up_info

pop_up_info = fetchGSheet.pop_up_info.rename(columns={"Mouse genome informatics (MGI) ID": "MGI ID", 
                                                      "Zebrafish genome database (ZFIN) ID": "ZFIN ID"})

pop_up_info_lim = pop_up_info[["Approved symbol", "Approved name", "MGI ID", "ZFIN ID"]]
pop_up_info_lim = pop_up_info_lim.drop_duplicates(subset="Approved symbol", keep="first")

# Drop columns where all values are NA in gene_pair
gene_pair = fetchGSheet.gene_pair.dropna(axis=1, how='all')
# Fetch species IDs from the dataset
hgnc_id = [col for col in gene_pair.columns if "HGNC ID" in col]
hgnc_id = pd.concat([gene_pair[col] for col in hgnc_id]).unique()

# Rename columns for better clarity
gene_pair = gene_pair.rename(columns={
    "Ligand receptor pair": "Human LR Pair",
    "Ligand gene symbol": "Ligand",
    "Receptor gene symbol": "Receptor",
    "Perplexity link": "Perplexity",
    "Source": "Interaction Source"
})

# Merge gene_pair with pop_up_info_lim for Ligand(L)
gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Ligand', right_on='Approved symbol')

gene_pair = gene_pair.rename(columns={"Approved name": "Ligand name", 
                                     "MGI ID": "Ligand MGI ID",
                                     "ZFIN ID": "Ligand ZFIN ID"},
                            )

# Add MGI annotation
MGI_info = pd.read_csv("data/MGI_ID_biomart.csv")
gene_pair = gene_pair.merge(MGI_info, how='left', left_on='Ligand MGI ID', right_on='MGI ID')

# Add ZFIN annotation
ZFIN_info = pd.read_csv("data/ZFIN_ID_biomart.csv")
ZFIN_info['ZFIN ID'] = "ZFIN:" + ZFIN_info['ZFIN ID'].astype(str)
gene_pair = gene_pair.merge(ZFIN_info, how='left', left_on='Ligand ZFIN ID', right_on='ZFIN ID')

# Add ZFIN id and symbol
ZFIN_info = pd.read_csv("data/ZFIN_ID_human_orthos.txt", sep="\t", skiprows=1)
ZFIN_info = ZFIN_info[['ZFIN ID', 'ZFIN Symbol', 'ZFIN Name', 'HGNC ID']]

ZFIN_info = ZFIN_info.dropna(subset=['HGNC ID'])
ZFIN_info = ZFIN_info.drop_duplicates(subset=['HGNC ID'])
ZFIN_info['HGNC ID'] = ZFIN_info['HGNC ID'].apply(lambda x: f'HGNC:{int(x)}')
gene_pair = gene_pair.merge(ZFIN_info, how='left', left_on='Ligand HGNC ID', right_on='HGNC ID')

gene_pair = gene_pair.drop(columns=["ZFIN ID", "MGI ID", "HGNC ID"])

gene_pair = gene_pair.rename(columns={
                                     "MGI name": "Mouse Ligand", 
                                     "ZFIN name": "Zebrafish Ligand",
                                     "ZFIN ID": "Ligand ZFIN ID",
                                     "ZFIN Symbol": "Zebrafish Ligand",
                                     "ZFIN Name": "Zebrafish Ligand name"}
                            )

gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Receptor', right_on='Approved symbol')
gene_pair = gene_pair.rename(columns={"Approved name": "Receptor name",
                                      "MGI ID": "Receptor MGI ID",
                                      "ZFIN ID": "Receptor ZFIN ID"}
                            )
gene_pair

Unnamed: 0,Human LR Pair,Interaction Source,PMID support,Ligand,Ligand HGNC ID,Ligand location,Receptor,Receptor HGNC ID,Receptor location,HGNC L R,...,Ligand RGD ID,Mouse Ligand,Rat Ligand,Ligand ZFIN ID,Zebrafish Ligand,Zebrafish Ligand name,Approved symbol_y,Receptor name,Receptor MGI ID,Receptor RGD ID
0,CCL3L3 ACKR2,Ramilowski_2015_Literature_supported,10364178,CCL3L3,HGNC:30554,secreted,ACKR2,HGNC:1565,plasma membrane,HGNC:30554 HGNC:1565,...,,Ccl3,,,,,ACKR2,atypical chemokine receptor 2,MGI:1891697,RGD:620323
1,DEFB103B CCR2,Ramilowski_2015_Literature_supported,23390582,DEFB103B,HGNC:31702,secreted,CCR2,HGNC:1603,plasma membrane,HGNC:31702 HGNC:1603,...,,Defb14,,,,,CCR2,C-C motif chemokine receptor 2,MGI:106185,RGD:620876
2,CCL3L3 CCR5,Ramilowski_2015_Literature_supported,"11734558, 10364178,",CCL3L3,HGNC:30554,secreted,CCR5,HGNC:1606,plasma membrane,HGNC:30554 HGNC:1606,...,,Ccl3,,,,,CCR5,C-C motif chemokine receptor 5,MGI:107182,RGD:620596
3,DEFB103B CCR6,Ramilowski_2015_Literature_supported,23390582,DEFB103B,HGNC:31702,secreted,CCR6,HGNC:1607,plasma membrane,HGNC:31702 HGNC:1607,...,,Defb14,,,,,CCR6,C-C motif chemokine receptor 6,MGI:1333797,RGD:1308562
4,DEFB4A CCR6,Ramilowski_2015_Literature_supported,1052134711714836,DEFB4A,HGNC:2767,secreted,CCR6,HGNC:1607,plasma membrane,HGNC:2767 HGNC:1607,...,RGD:1563630,Defb3,Defb5,,,,CCR6,C-C motif chemokine receptor 6,MGI:1333797,RGD:1308562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2361,KIR2DL5A PVR,ConnectomeDB2025 (this publication),36377656,KIR2DL5A,HGNC:16345,,PVR,HGNC:9705,plasma membrane,HGNC:16345 HGNC:9705,...,,Kir3dl1,,,,,PVR,PVR cell adhesion molecule,MGI:107741,RGD:3813
2362,SAA1 SCARB1,ConnectomeDB2025 (this publication),15561721,SAA1,HGNC:10513,,SCARB1,HGNC:1664,plasma membrane,HGNC:10513 HGNC:1664,...,,Saa1,,ZDB-GENE-040927-15,saa,serum amyloid A,SCARB1,scavenger receptor class B member 1,MGI:893578,RGD:2302
2363,SAA1 TLR2,ConnectomeDB2025 (this publication),18566366,SAA1,HGNC:10513,,TLR2,HGNC:11848,plasma membrane,HGNC:10513 HGNC:11848,...,,Saa1,,ZDB-GENE-040927-15,saa,serum amyloid A,TLR2,toll like receptor 2,MGI:1346060,RGD:735138
2364,SAA1 TLR4,ConnectomeDB2025 (this publication),35247611,SAA1,HGNC:10513,,TLR4,HGNC:11850,plasma membrane,HGNC:10513 HGNC:11850,...,,Saa1,,ZDB-GENE-040927-15,saa,serum amyloid A,TLR4,toll like receptor 4,MGI:96824,RGD:3870


In [63]:
### ZEBRAFISH ###

## Limit to those with either Zebrafish Ligand or Receptor
Zebrafish_columns = [col for col in gene_pair.columns if "ZFIN" in col or "Zebrafish" in col]
# Filter rows where all "Zebrafish" columns are not " "
Zebrafish_gene_pair = gene_pair000[(gene_pair000[Zebrafish_columns].map(str.strip) != "").all(axis=1)]


new_columns = Zebrafish_gene_pair.columns.tolist()

new_columns = [
    col.replace("Zebrafish ", "").strip()
    if "Ligand" in col or "Receptor" in col
    else col
    for col in new_columns
]
Zebrafish_gene_pair.columns = new_columns

# Dynamically identify columns containing "Ligand" and "Receptor" in their names 
# since it is now in span format
ligand_col = [col for col in Zebrafish_gene_pair.columns if "Ligand&nbsp;" in col][3]
receptor_col = [col for col in Zebrafish_gene_pair.columns if "Receptor&nbsp;" in col][3]
ligand_location = [col for col in Zebrafish_gene_pair.columns if "Ligand location" in col][0]
receptor_location = [col for col in Zebrafish_gene_pair.columns if "Receptor location" in col][0]

def format_lr_pair(row):
    if row[ligand_location] == 'secreted':
        return f"{row[ligand_col]} <span style='font-size: 15px;'>○</span> <span style='font-size: 24px;'>⤚</span> {row[receptor_col]}"
    elif row[ligand_location] == '':
        return f"{row[ligand_col]} <span style='font-size: 15px;'>○</span> <span style='font-size: 24px;'>⤚</span> {row[receptor_col]}"
    elif row[receptor_location] == 'plasma membrane':
        return f"{row[ligand_col]} <span style='font-size: 24px;'>⤙</span> <span style='font-size: 24px;'>⤚</span> {row[receptor_col]}"
    else:
        return f"{row[ligand_col]} \u2192 {row[receptor_col]}"

Zebrafish_gene_pair1 = Zebrafish_gene_pair.copy() 
Zebrafish_gene_pair1.loc[:, "Zebrafish LR Pair"] = Zebrafish_gene_pair1.apply(format_lr_pair, axis=1)
Zebrafish_columns = [col for col in Zebrafish_gene_pair1.columns if "ZFIN" in col]
# Reorder the DataFrame
new_order = ["Zebrafish LR Pair", ligand_col, receptor_col] + Zebrafish_columns + human_columns
Zebrafish_gene_pair1 = Zebrafish_gene_pair1[new_order]
Zebrafish_gene_pair1 = Zebrafish_gene_pair1.reset_index(drop=True)  

In [65]:
gene_pair.columns

Index(['Human LR Pair', 'Ligand', 'Receptor', 'Interaction Source',
       'PMID support', 'Ligand HGNC ID', 'Ligand location', 'Receptor HGNC ID',
       'Receptor location', 'Perplexity', 'Ligand name', 'Ligand MGI ID',
       'Ligand RGD ID', 'Mouse Ligand', 'Rat Ligand', 'Ligand ZFIN ID',
       'Ligand ZFIN ID', 'Zebrafish Ligand', 'Zebrafish Ligand name',
       'Receptor name', 'Receptor MGI ID', 'Receptor RGD ID', 'Mouse Receptor',
       'Rat Receptor', 'Ligand ZFIN ID', 'Ligand ZFIN ID',
       'Zebrafish Receptor', 'Zebrafish Receptor name', 'HGNC L R',
       'sanity check', 'curator', 'secondary source?'],
      dtype='object')