# Python Notebook

In [None]:
def add_row(change):
    global gene_pair
    # Add a new row at the top with None values
    new_row = {col: None for col in gene_pair.columns}
    gene_pair = pd.DataFrame([new_row] + gene_pair.to_dict(orient="records"))
    update_table()

# Function to remove the last row of the dataframe
def remove_row(change):
    global gene_pair
    if len(gene_pair) > 0:
        gene_pair = gene_pair[:-1]  # Remove the last row
        update_table()

In [1]:
## Function to prepare datatables (cleaning and hyperlinking, adding tool tips, etc) input for the database qmds
import sys, os
sys.path.append(os.path.abspath("src"))  
from itables import init_notebook_mode
import pandas as pd
from itables import show
from itables import options
from IPython.display import HTML, display
import numpy as np
import fetchGSheet 
import warnings

# Suppress SettingWithCopyWarning
warnings.simplefilter("ignore", category=UserWarning)


# Select only the relevant columns from pop_up_info

pop_up_info = fetchGSheet.pop_up_info.rename(columns={"Mouse genome informatics (MGI) ID": "MGI ID","Rat genome database (RGD) ID": "RGD ID"})

pop_up_info_lim = pop_up_info[["Approved symbol", "Approved name", "MGI ID", "RGD ID"]]
pop_up_info_lim = pop_up_info_lim.drop_duplicates(subset="Approved symbol", keep="first")

# Drop columns where all values are NA in gene_pair
gene_pair = fetchGSheet.gene_pair.dropna(axis=1, how='all')

# Rename columns for better clarity
gene_pair = gene_pair.rename(columns={
    "Ligand receptor pair": "LR Pair",
    "Ligand gene symbol": "Ligand",
    "Receptor gene symbol": "Receptor",
    "Perplexity link": "Perplexity"
})

# Merge gene_pair with pop_up_info_lim for Ligand(L)
gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Ligand', right_on='Approved symbol')

gene_pair = gene_pair.rename(columns={"Approved name": "Ligand name", 
                                     "MGI ID": "Ligand MGI ID",
                                     "RGD ID": "Ligand RGD ID"},
                            )

# Add MGI name
MGI_info = pd.read_csv("data/MGI_ID_biomart.csv")
gene_pair = gene_pair.merge(MGI_info, how='left', left_on='Ligand MGI ID', right_on='MGI ID')

# Add RGD name
RGD_info = pd.read_csv("data/RGD_ID_biomart.csv")
RGD_info['RGD ID'] = "RGD:" + RGD_info['RGD ID'].astype(str)
gene_pair = gene_pair.merge(RGD_info, how='left', left_on='Ligand RGD ID', right_on='RGD ID')

gene_pair = gene_pair.drop(columns=["RGD ID", "MGI ID"])

gene_pair = gene_pair.rename(columns={
                                     "MGI name": "Ligand MGI name", 
                                     "RGD name": "Ligand RGD name"}
                            )

gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Receptor', right_on='Approved symbol')

gene_pair = gene_pair.rename(columns={"Approved name": "Receptor name",
                                      "MGI ID": "Receptor MGI ID",
                                      "RGD ID": "Receptor RGD ID"}
                            )

# Add MGI name
gene_pair = gene_pair.merge(MGI_info, how='left', left_on='Receptor MGI ID', right_on='MGI ID')
gene_pair = gene_pair.merge(RGD_info, how='left', left_on='Receptor RGD ID', right_on='RGD ID')
gene_pair = gene_pair.drop(columns=["RGD ID", "MGI ID"])

gene_pair = gene_pair.rename(columns={
                                     "MGI name": "Receptor MGI name", 
                                     "RGD name": "Receptor RGD name"}
                            )
gene_pair = gene_pair.drop(columns=["Approved symbol_x", "Approved symbol_y"])

# Drop columns where all values are NA in gene_pair
gene_pair = gene_pair.dropna(axis=1, how='all')

gene_pair = gene_pair.fillna(" ")
gene_pair = gene_pair[gene_pair['LR Pair'] != ' ']

if "PMID link" in gene_pair.columns:
    gene_pair = gene_pair.drop(columns=["PMID link"])

# Add
first_columns=['LR Pair','Source', 'Ligand','Receptor', 'Perplexity']
end_columns=['HGNC L R','sanity check', 'curator','secondary source?']
gene_pair = gene_pair[first_columns + [col for col in gene_pair.columns if col not in first_columns + end_columns] + end_columns]

# number of unique vars

lrPairsCount = len(gene_pair["LR Pair"].unique())

ligandCount = len(gene_pair["Ligand"].unique())

receptorCount = len(gene_pair["Receptor"].unique())

# Mouse Orthologue
MouseLigandCount = len(gene_pair["Ligand MGI ID"].unique())

MouseReceptorCount = len(gene_pair["Receptor MGI ID"].unique())

# Rat Orthologue
RatLigandCount = len(gene_pair["Ligand RGD ID"].unique())

RatReceptorCount = len(gene_pair["Receptor RGD ID"].unique())

gene_pair["PMID support"] = [value.replace(" ", "") for value in gene_pair["PMID support"]]

source = np.array(gene_pair["PMID support"].unique())
source = source.astype(str)
source = ",".join(sorted(set(filter(lambda x: x.lower() != 'nan', source))))

# Split the string into individual elements, filter out empty strings, and get unique values
source = sorted(
    set(filter(lambda x: x.strip() and x.strip().lower() != 'nan', source.split(',')))
)
source = [value.replace(" ", "") for value in source]
sourceCount = len(source)

# for creating PMIDs
gene_pair00 = gene_pair[['LR Pair', 'PMID support']]

# create URLs for the HGNC IDs

# ligand
gene_pair["Ligand HGNC ID"] = [
    '<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/{}" target="_blank">{}</a>'.format(ligand, ligand)
    for ligand in gene_pair["Ligand HGNC ID"]
]

# receptor
gene_pair["Receptor HGNC ID"] = [
    '<a href="https://www.genenames.org/data/gene-symbol-report/#!/hgnc_id/{}" target="_blank">{}</a>'.format(receptor, receptor)
    for receptor in gene_pair["Receptor HGNC ID"]
]

# Perplexity
gene_pair["Perplexity"] = [
    '<a href="{}" target="_blank"> <img src="https://img.icons8.com/?size=35&id=0NbBuNOxUwps&format=png&color=000000" alt="Perplexity AI" /></a>'.format(url)
    for url in gene_pair["Perplexity"]
]

# Function to generate hyperlinks for the "PMID support" column
# Function to generate hyperlinks for the "PMID support" column
def generate_links_with_doi(df, gene_column, pmid_column):
    def create_link(gene, sources):
        # Replace spaces with "——" in the gene name for the link
        gene_name = gene.replace(" ", "——")
        
        if len(sources) == 1:
            source = sources[0]
            if source.startswith("https://www.biorxiv.org/content/"):
                # If the value starts with "https://doi.org/", use it as the hyperlink
                return f'<a href="{source}" target="_blank">BioRxiv preprint</a>'
            else:
                # If it's a single PMID, hyperlink the PMID text
                return f'<a href="https://comp.med.yokohama-cu.ac.jp/collab/connectomeDB/pubmed/{gene_name}_pmid_details.html">{source}</a>'
        else:
            # If multiple PMIDs, show the count and hyperlink to the page
            return f'<a href="https://comp.med.yokohama-cu.ac.jp/collab/connectomeDB/pubmed/{gene_name}_pmid_details.html" target="_blank">{len(sources)} PMIDs</a>'

    # Process each row to generate the "PMID support" column
    df["PMID support"] = [
        create_link(
            gene=row[gene_column], 
            sources=[s.strip() for s in row[pmid_column].split(',') if s.strip()]
        )
        for _, row in df.iterrows()
    ]
    return df


# Generate the links for the "PMID support" column
gene_pair = generate_links_with_doi(gene_pair, gene_column="LR Pair", pmid_column="PMID support")

gene_pair["Ligand MGI ID"] = [
        f'<a href="https://www.informatics.jax.org/marker/{mouseOrth}" target="_blank">{mouseOrth}</a>' 
        if pd.notna(mouseOrth) and mouseOrth.strip() else "" 
        for mouseOrth in gene_pair["Ligand MGI ID"]
    ]

gene_pair["Receptor MGI ID"] = [
        f'<a href="https://www.informatics.jax.org/marker/{mouseOrth}" target="_blank">{mouseOrth}</a>' 
        if pd.notna(mouseOrth) and mouseOrth.strip() else "" 
        for mouseOrth in gene_pair["Receptor MGI ID"]
    ]

gene_pair["Ligand RGD ID"] = [
        f'<a href="https://rgd.mcw.edu/rgdweb/report/gene/main.html?id={ratOrth.replace("RGD:", "")}" target="_blank">{ratOrth}</a>' 
        if pd.notna(ratOrth) and ratOrth.strip() else "" 
        for ratOrth in gene_pair["Ligand RGD ID"]
    ]

gene_pair["Receptor RGD ID"] = [
        f'<a href="https://rgd.mcw.edu/rgdweb/report/gene/main.html?id={ratOrth.replace("RGD:", "")}" target="_blank">{ratOrth}</a>' 
        if pd.notna(ratOrth) and ratOrth.strip() else "" 
        for ratOrth in gene_pair["Receptor RGD ID"]
    ]

mouse_columns = [col for col in gene_pair.columns if "MGI" in col]
rat_columns = [col for col in gene_pair.columns if "RGD" in col]

gene_pair0 = gene_pair[['LR Pair', 'Ligand', 'Receptor', 'Perplexity', 'PMID support',
       'Ligand HGNC ID', 'Ligand location', 'Receptor HGNC ID',
       'Receptor location', 'Ligand name', 'Receptor name'] + mouse_columns + rat_columns]


# gene symbol
gene_pair["Ligand"] = [
    f'<span title="{ligand_name}">{ligand_symbol}</span>'
    for ligand_name, ligand_symbol in zip(gene_pair["Ligand name"], gene_pair["Ligand"])
]

# gene symbol
gene_pair["Receptor"] = [
    f'<span title="{receptor_name}">{receptor_symbol}</span>'
    for receptor_name, receptor_symbol in zip(gene_pair["Receptor name"], gene_pair["Receptor"])
]

def replace_spaces(row):
    if row['Ligand location'] == 'secreted':
        return row['LR Pair'].replace(" ", " ○ <span style='font-size: 30px;'>⤚</span> ")
    elif row['Ligand location'] == 'plasma membrane':
        return row['LR Pair'].replace(" ", " <span style='font-size: 30px;'>⤙</span> <span style='font-size: 30px;'>⤚</span> ")
    else:
        return row['LR Pair'].replace(" ", " \u2192 ")

# Apply the function to the 'LR Pair' column
gene_pair['LR Pair'] = gene_pair.apply(replace_spaces, axis=1)

gene_pair = gene_pair.drop(columns=["Ligand name", "Receptor name"])


# Create the links to the HTML cards
gene_pair["LR Pair"] = [
    f'<a href="https://comp.med.yokohama-cu.ac.jp/collab/connectomeDB/cards/{lrPairOrig}.html">{lrPair}</a>'
    for lrPairOrig, lrPair in zip(gene_pair0["LR Pair"], gene_pair["LR Pair"])
]


# Add tooltips to the column headers
gene_pair.columns = [
    f'<span title="Ligand Receptor Pair">{col}</span>' if col == "LR Pair" else
    f'<span title="Click the logo below to run Perplexity on the LR pair">{col}&nbsp;</span>' if col == "Perplexity" else
    f'<span title="Hover on symbols below to show gene names">{col}&nbsp;&nbsp;&nbsp;</span>' if col in ["Ligand", "Receptor"] else
    f'<span title="Click on HGNC IDs below for more details">{col}&nbsp;&nbsp;</span>' if col in ["Ligand HGNC ID", "Receptor HGNC ID"] else
    f'<span title="Click on the Pubmed IDs (PMID) below for more details">{col}</span>' if col == "PMID support" else
    f'<span title="Click on the Rat Genome Database(RGD) IDs below for more details">{col}</span>' if col in ["Ligand RGD ID", "Receptor RGD ID"] else
    f'<span title="Click on the Mouse Genome Informatics(MGI) IDs below for more details">{col}</span>' if col in ["Ligand MGI ID", "Receptor MGI ID"] else
    f'<span title="Location is defined as subcellular location">{col}</span>' if col in ["Ligand location", "Receptor location"] else
    f'<span title="Double-click header of {col} to ensure all values are shown">{col}&nbsp;</span>'
    for col in gene_pair.columns
]

gene_pair = gene_pair.reset_index(drop=True)  # Remove the index

## Limit to those with either Mouse Ligand or Receptor
# Find columns with "Mouse" in the name
mouse_columns = [col for col in gene_pair.columns if "MGI" in col]
# Filter rows where all "Mouse" columns are not " "
mouse_gene_pair = gene_pair[(gene_pair[mouse_columns].map(str.strip) != "").all(axis=1)]
# Dynamically identify columns containing "Ligand" and "Receptor" in their names 
# since it is now in span format
        
ligand_col = [col for col in mouse_gene_pair.columns if "Ligand MGI name" in col][0]
receptor_col = [col for col in mouse_gene_pair.columns if "Receptor MGI name" in col][0]
ligand_location = [col for col in mouse_gene_pair.columns if "Ligand location" in col][0]
receptor_location = [col for col in mouse_gene_pair.columns if "Receptor location" in col][0]

# Combine columns into "Mouse LR Pair" with appropriate replacements
def format_lr_pair(row):
    if row[ligand_location] == 'secreted':
        return f"{row[ligand_col]} ○ <span style='font-size: 30px;'>⤚</span> {row[receptor_col]}"
    elif row[receptor_location] == 'plasma membrane':
        return f"{row[ligand_col]} <span style='font-size: 30px;'>⤙</span> <span style='font-size: 30px;'>⤚</span> {row[receptor_col]}"
    else:
        return f"{row[ligand_col]} \u2192 {row[receptor_col]}"


# Apply the function row-wise and assign to the new column using .loc
mouse_gene_pair1 = mouse_gene_pair.copy() 
mouse_gene_pair1.loc[:, "Mouse LR Pair"] = mouse_gene_pair1.apply(format_lr_pair, axis=1)
# Reorder the DataFrame
new_order = ["Mouse LR Pair"] + mouse_columns + [col for col in mouse_gene_pair1.columns if col not in mouse_columns]
mouse_gene_pair = mouse_gene_pair1[new_order]
mouse_gene_pair = mouse_gene_pair1.reset_index(drop=True)  

## Limit to those with either Rat Ligand or Receptor
rat_columns = [col for col in gene_pair.columns if "RGD" in col]
# Filter rows where all "Rat" columns are not " "
rat_gene_pair = gene_pair[(gene_pair[rat_columns].map(str.strip) != "").all(axis=1)]
# Dynamically identify columns containing "Ligand" and "Receptor" in their names 
# since it is now in span format
ligand_col = [col for col in rat_gene_pair.columns if "Ligand RGD name" in col][0]
receptor_col = [col for col in rat_gene_pair.columns if "Receptor RGD name" in col][0]
ligand_location = [col for col in mouse_gene_pair.columns if "Ligand location" in col][0]
receptor_location = [col for col in mouse_gene_pair.columns if "Receptor location" in col][0]
# Combine columns into "Mouse LR Pair" with appropriate replacements
def format_lr_pair(row):
    if row[ligand_location] == 'secreted':
        return f"{row[ligand_col]} ○ <span style='font-size: 30px;'>⤚</span> {row[receptor_col]}"
    elif row[receptor_location] == 'plasma membrane':
        return f"{row[ligand_col]} <span style='font-size: 30px;'>⤙</span> <span style='font-size: 30px;'>⤚</span> {row[receptor_col]}"
    else:
        return f"{row[ligand_col]} \u2192 {row[receptor_col]}"

rat_gene_pair1 = rat_gene_pair.copy() 
rat_gene_pair1.loc[:, "Rat LR Pair"] = rat_gene_pair1.apply(format_lr_pair, axis=1)

# Reorder the DataFrame
new_order = ["Rat LR Pair"] + rat_columns + [col for col in rat_gene_pair1.columns if col not in rat_columns]
rat_gene_pair1 = rat_gene_pair1[new_order]
rat_gene_pair1 = rat_gene_pair1.reset_index(drop=True)  

In [2]:
rat_gene_pair1

Unnamed: 0,Rat LR Pair,"<span title=""Click on the Rat Genome Database(RGD) IDs below for more details"">Ligand RGD ID</span>","<span title=""Double-click header of Ligand RGD name to ensure all values are shown"">Ligand RGD name&nbsp;</span>","<span title=""Click on the Rat Genome Database(RGD) IDs below for more details"">Receptor RGD ID</span>","<span title=""Double-click header of Receptor RGD name to ensure all values are shown"">Receptor RGD name&nbsp;</span>","<span title=""Ligand Receptor Pair"">LR Pair</span>","<span title=""Double-click header of Source to ensure all values are shown"">Source&nbsp;</span>","<span title=""Hover on symbols below to show gene names"">Ligand&nbsp;&nbsp;&nbsp;</span>","<span title=""Hover on symbols below to show gene names"">Receptor&nbsp;&nbsp;&nbsp;</span>","<span title=""Click the logo below to run Perplexity on the LR pair"">Perplexity&nbsp;</span>",...,"<span title=""Location is defined as subcellular location"">Receptor location</span>","<span title=""Click on the Mouse Genome Informatics(MGI) IDs below for more details"">Ligand MGI ID</span>","<span title=""Double-click header of Ligand MGI name to ensure all values are shown"">Ligand MGI name&nbsp;</span>","<span title=""Click on the Mouse Genome Informatics(MGI) IDs below for more details"">Receptor MGI ID</span>","<span title=""Double-click header of Receptor MGI name to ensure all values are shown"">Receptor MGI name&nbsp;</span>","<span title=""Double-click header of HGNC L R to ensure all values are shown"">HGNC L R&nbsp;</span>","<span title=""Double-click header of sanity check to ensure all values are shown"">sanity check&nbsp;</span>","<span title=""Double-click header of curator to ensure all values are shown"">curator&nbsp;</span>","<span title=""Double-click header of secondary source? to ensure all values are shown"">secondary source?&nbsp;</span>",Rat LR Pair.1
0,Defb5 ○ <span style='font-size: 30px;'>⤚</span...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Defb5,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Ccr6,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",Ramilowski_2015_Literature_supported,"<span title=""defensin beta 4A"">DEFB4A</span>","<span title=""C-C motif chemokine receptor 6"">C...","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Defb3,"<a href=""https://www.informatics.jax.org/marke...",Ccr6,HGNC:2767 HGNC:1607,ok,Al,,Defb5 ○ <span style='font-size: 30px;'>⤚</span...
1,Il17a ○ <span style='font-size: 30px;'>⤚</span...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Il17a,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Il17rc,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",Ramilowski_2015_Literature_supported,"<span title=""interleukin 17A"">IL17A</span>","<span title=""interleukin 17 receptor C"">IL17RC...","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Il17a,"<a href=""https://www.informatics.jax.org/marke...",Il17rc,HGNC:5981 HGNC:18358,,,,Il17a ○ <span style='font-size: 30px;'>⤚</span...
2,Npnt ○ <span style='font-size: 30px;'>⤚</span>...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Npnt,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Itgb1,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",Ramilowski_2015_Literature_supported,"<span title=""nephronectin"">NPNT</span>","<span title=""integrin subunit beta 1"">ITGB1</s...","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Npnt,"<a href=""https://www.informatics.jax.org/marke...",Itgb1,HGNC:27405 HGNC:6153,,,CellphoneDB,Npnt ○ <span style='font-size: 30px;'>⤚</span>...
3,Rph3a ○ <span style='font-size: 30px;'>⤚</span...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Rph3a,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Nrxn1,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",Ramilowski_2015_Literature_supported,"<span title=""rabphilin 3A"">RPH3A</span>","<span title=""neurexin 1"">NRXN1</span>","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Rph3a,"<a href=""https://www.informatics.jax.org/marke...",Nrxn1,HGNC:17056 HGNC:8008,,,CellphoneDB,Rph3a ○ <span style='font-size: 30px;'>⤚</span...
4,Defb5 ○ <span style='font-size: 30px;'>⤚</span...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Defb5,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Tlr4,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",Ramilowski_2015_Literature_supported,"<span title=""defensin beta 4A"">DEFB4A</span>","<span title=""toll like receptor 4"">TLR4</span>","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Defb3,"<a href=""https://www.informatics.jax.org/marke...",Tlr4,HGNC:2767 HGNC:11850,,,,Defb5 ○ <span style='font-size: 30px;'>⤚</span...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2144,Lrfn4 <span style='font-size: 30px;'>⤙</span> ...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Lrfn4,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Ptprf,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",ConnectomeDB2025 (this publication),"<span title=""leucine rich repeat and fibronect...","<span title=""protein tyrosine phosphatase rece...","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Lrfn4,"<a href=""https://www.informatics.jax.org/marke...",Ptprf,HGNC:28456 HGNC:9670,,,,Lrfn4 <span style='font-size: 30px;'>⤙</span> ...
2145,Lrfn4 <span style='font-size: 30px;'>⤙</span> ...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Lrfn4,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Ptprs,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",ConnectomeDB2025 (this publication),"<span title=""leucine rich repeat and fibronect...","<span title=""protein tyrosine phosphatase rece...","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Lrfn4,"<a href=""https://www.informatics.jax.org/marke...",Ptprs,HGNC:28456 HGNC:9681,,,,Lrfn4 <span style='font-size: 30px;'>⤙</span> ...
2146,Lrfn5 <span style='font-size: 30px;'>⤙</span> ...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Lrfn5,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Ptprd,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",ConnectomeDB2025 (this publication),"<span title=""leucine rich repeat and fibronect...","<span title=""protein tyrosine phosphatase rece...","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Lrfn5,"<a href=""https://www.informatics.jax.org/marke...",Ptprd,HGNC:20360 HGNC:9668,,,,Lrfn5 <span style='font-size: 30px;'>⤙</span> ...
2147,Lrfn5 <span style='font-size: 30px;'>⤙</span> ...,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Lrfn5,"<a href=""https://rgd.mcw.edu/rgdweb/report/gen...",Ptprf,"<a href=""https://comp.med.yokohama-cu.ac.jp/co...",ConnectomeDB2025 (this publication),"<span title=""leucine rich repeat and fibronect...","<span title=""protein tyrosine phosphatase rece...","<a href=""https://www.perplexity.ai/search?q=Do...",...,plasma membrane,"<a href=""https://www.informatics.jax.org/marke...",Lrfn5,"<a href=""https://www.informatics.jax.org/marke...",Ptprf,HGNC:20360 HGNC:9670,,,,Lrfn5 <span style='font-size: 30px;'>⤙</span> ...


In [28]:
## Function to convert MGI/RGD IDs/Other Species to gene symbols via biomart
import sys
from biomart import BiomartServer
sys.path.append(os.path.abspath("src"))  
import fetchGSheet
from createDataTable import pop_up_info_lim

# rat: RGD, rnorvegicus_gene_ensembl, rdg_symbol
# mouse: MGI, mmusculus_gene_ensembl, external_gene_name

ID= "RGD" 
dataset= "rnorvegicus_gene_ensembl"
name = "rgd_symbol"

'external_gene_name'
# Connect to BioMart server
server = BiomartServer("http://www.ensembl.org/biomart")

# Access the mouse dataset
mouse_dataset = server.datasets[dataset]

# MGI IDs
MGI_ID=pop_up_info_lim[ID+' ID']
#print(len(MGI_ID))
MGI_ID= MGI_ID.unique()
MGI_ID=[id for id in MGI_ID if id.startswith(ID+":")]
if ID == "RGD":
    MGI_ID = [value.replace("RGD:", "") for value in MGI_ID]
# Connect to the BioMart server
server = BiomartServer("http://www.ensembl.org/biomart")
mouse_dataset = server.datasets[dataset]

# Function to query in chunks
def query_in_chunks(mgi_ids, chunk_size=100):
    gene_mapping = {}
    for i in range(0, len(mgi_ids), chunk_size):
        chunk = mgi_ids[i:i + chunk_size]
        try:
            response = mouse_dataset.search({
                'filters': {ID.lower()+'_id': chunk},
                'attributes': [ID.lower()+'_id', name]
            })
            # Parse the response and add to gene_mapping
            for line in response.iter_lines(decode_unicode=True):
                mgi_id, gene_name = line.split("\t")
                gene_mapping[mgi_id] = gene_name
        except Exception as e:
            print(f"Error processing chunk {i // chunk_size + 1}: {e}")
    return gene_mapping

# Query in chunks
gene_mapping = query_in_chunks(MGI_ID, chunk_size=100)
print(gene_mapping)
# Display results
for mgi, gene_name in gene_mapping.items():
    print(f"{mgi}: {gene_name}")
# Convert dictionary to DataFrame and save without index
pd.DataFrame.from_dict(gene_mapping, orient='index', columns=[ID+' name']) \
    .reset_index() \
    .rename(columns={'index': ID+' ID'}) \
    .to_csv("data/"+ID+"_ID_biomart.csv", index=False)

{'1303086': 'Abcb7', '1303134': 'Abca7', '1303145': 'Abhd12b', '1303164': 'Abhd16a', '1303237': 'Abhd5', '1304586': 'Abca12', '1304598': 'Abhd15', '1304681': 'Abhd11', '1304832': 'Aars1', '1304942': 'Abcf2', '1305246': 'Abhd17b', '1305301': 'Abce1', '1305520': 'Aak1', '1305693': 'Abhd8', '1305840': 'Abcg4', '1305895': 'Abca13', '1305931': 'Abca9', '1306281': 'Aamp', '1306696': 'Abcc10', '1307069': 'Abca8', '1307174': 'Abca3', '1307273': 'Abcd4', '1307333': 'Abca8a', '1307655': 'Abcb8', '1308084': 'Abhd10', '1308210': 'Abhd17c', '1308317': 'Abhd13', '1308701': 'Aaas', '1308998': 'Abca6', '1309445': 'Abca4', '1309721': 'Abhd14a', '1309726': 'Abhd16b', '1310468': 'Abcf3', '1310617': 'Aars2', '1310655': 'Abhd3', '1310811': 'Aass', '1310988': 'Abhd1', '1311066': 'Aar2', '1311135': 'Aasdh', '1311222': 'Abcb10', '1311360': 'Aasdhppt', '1311389': 'Abhd2', '1311794': 'Abi3', '1311858': 'Abhd4', '1359323': 'Abhd6', '1359649': 'Abhd14b', '1359682': 'Abhd17a', '1561459': 'Aamdc', '1561650': 'Aarsd

In [24]:
print(mouse_dataset.filters) 

{'chromosome_name': 'Chromosome/scaffold name' (type: text, values: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, JACYVU010000238.1, JACYVU010000315.1, JACYVU010000319.1, JACYVU010000493.1, JACYVU010000587.1, JACYVU010000589.1, JACYVU010000619.1, JACYVU010000634.1, JACYVU010000642.1, JACYVU010000653.1, JACYVU010000665.1, JACYVU010000706.1, JACYVU010000731.1, JACYVU010000732.1, JACYVU010000738.1, JACYVU010000744.1, JACYVU010000754.1, MT, MU150189.1, MU150193.1, MU150196.1, MU150200.1, MU150203.1, MU150220.1, MU150222.1, MU150223.1, X, Y]), 'start': 'Start' (type: text, values: []), 'end': 'End' (type: text, values: []), 'band_start': 'Band Start' (type: text, values: []), 'band_end': 'Band End' (type: text, values: []), 'name_2': 'Name 2033' (type: list, values: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, X]), 'qtl_region': 'Qtl region' (type: drop_down_basic_filter, values: []), 'strand': 'Strand' (type: text, values: []), 'chromoso

In [None]:
import pandas as pd

# Convert dictionary to DataFrame and save without index
pd.DataFrame.from_dict(gene_mapping, orient='index', columns=['MGI Name']) \
    .reset_index() \
    .rename(columns={'index': 'MGI ID'}) \
    .to_csv("data/MGI_ID_biomart.csv", index=False)


In [15]:
server = BiomartServer( "http://uswest.ensembl.org/biomart" )
  
# if you are behind a proxy
import os
server = BiomartServer("http://www.ensembl.org/biomart")
server.show_databases()

HTTPError: 503 Server Error: Service Temporarily Unavailable for url: http://uswest.ensembl.org/biomart/martservice

In [None]:
len(gene_mapping)

In [None]:
## Function to convert MGI/RGD IDs/Other Species to gene symbols via biomart
import os
import sys
from biomart import BiomartServer
import pandas as pd

# Add source directory to the path
sys.path.append(os.path.abspath("src"))
from createDataTable import pop_up_info_lim

# Function to dynamically query BioMart
def fetch_gene_mappings(server_url, dataset_name, id_type, id_list, id_filter, attributes, chunk_size=100):
    """
    Fetch gene mappings from BioMart based on the provided IDs.

    Args:
        server_url (str): URL of the BioMart server.
        dataset_name (str): Dataset name (e.g., 'mmusculus_gene_ensembl' for mouse, 'rnorvegicus_gene_ensembl' for rat).
        id_type (str): Type of IDs ('MGI' or 'RGD').
        id_list (list): List of IDs to query.
        id_filter (str): BioMart filter for the ID type (e.g., 'mgi_id', 'rgd_id').
        attributes (list): List of attributes to fetch (e.g., ['mgi_id', 'external_gene_name']).
        chunk_size (int): Number of IDs to query in each chunk.

    Returns:
        dict: Mapping of input IDs to external gene names.
    """
    # Connect to the BioMart server
    server = BiomartServer(server_url)
    dataset = server.datasets[dataset_name]

    # Filter unique and valid IDs
    id_list = [id.strip() for id in id_list if id.startswith(id_type)]
    id_list = list(set(id_list))

    # Query in chunks
    gene_mapping = {}
    for i in range(0, len(id_list), chunk_size):
        chunk = id_list[i:i + chunk_size]
        try:
            response = dataset.search({
                'filters': {id_filter: chunk},
                'attributes': attributes
            })
            # Parse the response and add to gene_mapping
            for line in response.iter_lines(decode_unicode=True):
                gene_id, gene_name = line.split("\t")
                gene_mapping[gene_id] = gene_name
        except Exception as e:
            print(f"Error processing chunk {i // chunk_size + 1}: {e}")
    return gene_mapping

# Fetch MGI to gene name mappings for mouse
mgi_ids = pop_up_info_lim['MGI ID'].unique()
gene_mapping_mouse = fetch_gene_mappings(
    server_url="http://www.ensembl.org/biomart",
    dataset_name='mmusculus_gene_ensembl',
    id_type="MGI",
    id_list=mgi_ids,
    id_filter='mgi_id',
    attributes=['mgi_id', 'external_gene_name'],
    chunk_size=100
)
pd.DataFrame.from_dict(gene_mapping_mouse, orient='index', columns=['Gene Name']).to_csv("data/MGI_ID_biomart.csv")


# Fetch RGD to gene name mappings for rat
rgd_ids = pop_up_info_lim['RGD ID'].unique()
gene_mapping_rat = fetch_gene_mappings(
    server_url="http://www.ensembl.org/biomart",
    dataset_name='rnorvegicus_gene_ensembl',
    id_type="RGD",
    id_list=rgd_ids,
    id_filter='rgd_id',
    attributes=['rgd_id', 'external_gene_name'],
    chunk_size=100
)
pd.DataFrame.from_dict(gene_mapping_rat, orient='index', columns=['Gene Name']).to_csv("data/RGD_ID_biomart.csv")

# Print the results
# print("Mouse gene mappings:")
# for mgi, gene_name in gene_mapping_mouse.items():
#     print(f"{mgi}: {gene_name}")

# print("\nRat gene mappings:")
# for rgd, gene_name in gene_mapping_rat.items():
#     print(f"{rgd}: {gene_name}")


In [None]:
## Function to create Ligand-Receptor pair cards

import os
import jinja2
import sys
import pandas as pd
import numpy as np

sys.path.append(os.path.abspath("src"))  
import fetchGSheet
from createDataTable import gene_pair0

# Paths
TEMPLATE_PATH = 'HTML/cardTemplate.html'
OUTPUT_DIR = 'data/cards/'

def load_template(template_path):
    """Load Jinja2 template from a file."""
    with open(template_path, 'r') as file:
        return jinja2.Template(file.read())

def prepare_dataframes(gene_pair0):
    """Prepare interaction, ligand, and receptor dataframes."""
    DBlength = len(gene_pair0)
    gene_pair0["Interaction ID"] = [f"CDB{str(i).zfill(4)}" for i in range(1, DBlength + 1)]
    gene_pair0["Interaction Type"] = [
        f'{ligandLocation} {ligand} binds with {receptor} in {receptorLocation}'
        for ligand, ligandLocation, receptor, receptorLocation in zip(
            gene_pair0["Ligand"], gene_pair0["Ligand location"],
            gene_pair0["Receptor"], gene_pair0["Receptor location"]
        )
    ]
    interaction_card = gene_pair0[["Interaction ID", "LR Pair", "Interaction Type", "Perplexity", "PMID support"]]
    interaction_card["Perplexity"] = interaction_card["Perplexity"].str.replace('size=35', 'size=80')

    pop_up_info_lim = fetchGSheet.pop_up_info[
        ["Approved symbol", "Alias symbol", "Previous symbol", "Date symbol changed"]
    ].drop_duplicates(subset="Approved symbol", keep="first")
    
    ligand_card = gene_pair0[["LR Pair", "Ligand", "Ligand name", "Ligand HGNC ID", "Ligand MGI ID", "Ligand RGD ID", "Ligand location"]].merge(
        pop_up_info_lim, how='left', left_on='Ligand', right_on='Approved symbol'
    ).drop_duplicates(subset='LR Pair', keep="first").drop(columns=["Ligand", "Approved symbol"])

    ligand_card_1 = ligand_card[["LR Pair", "Alias symbol", "Date symbol changed", "Ligand name"]] 
    ligand_card_2 = ligand_card[["LR Pair", "Ligand HGNC ID", "Ligand MGI ID", "Ligand RGD ID", "Ligand location"]] 
    
    receptor_card = gene_pair0[["LR Pair", "Receptor", "Receptor name", "Receptor HGNC ID", "Receptor MGI ID", "Receptor RGD ID", "Receptor location"]].merge(
        pop_up_info_lim, how='left', left_on='Receptor', right_on='Approved symbol'
    ).drop_duplicates(subset='LR Pair', keep="first").drop(columns=["Receptor", "Approved symbol"])
    
    receptor_card_1 = receptor_card[["LR Pair", "Alias symbol", "Date symbol changed", "Receptor name"]] 
    receptor_card_2 = receptor_card[["LR Pair", "Receptor HGNC ID", "Receptor MGI ID", "Receptor RGD ID", "Receptor location"]] 

    return interaction_card, ligand_card_1, ligand_card_2, receptor_card_1, receptor_card_2

def generate_html_files(template, interaction_card, ligand_card_1, receptor_card_1, ligand_card_2, receptor_card_2, output_dir):
    """Generate HTML files for each LR Pair."""
    column_values = interaction_card["LR Pair"].dropna().unique()
    os.makedirs(output_dir, exist_ok=True)
    
    for value in column_values:
        value1, value2 = value.split()
        row0 = interaction_card[interaction_card['LR Pair'] == value]
        row1 = ligand_card_1[ligand_card_1['LR Pair'] == value]
        row2 = receptor_card_1[receptor_card_1['LR Pair'] == value]
        row3 = ligand_card_2[ligand_card_2['LR Pair'] == value]
        row4 = receptor_card_2[receptor_card_2['LR Pair'] == value]
        

        table0_data = row0.drop('LR Pair', axis=1).to_dict(orient='records')[0] if not row0.empty else {}
        table1_data = row1.drop('LR Pair', axis=1).to_dict(orient='records')[0] if not row1.empty else {}
        table2_data = row2.drop('LR Pair', axis=1).to_dict(orient='records')[0] if not row2.empty else {}
        table3_data = row3.drop('LR Pair', axis=1).to_dict(orient='records')[0] if not row3.empty else {}
        table4_data = row4.drop('LR Pair', axis=1).to_dict(orient='records')[0] if not row4.empty else {}

        rendered_content = template.render(
            value1=value1,
            value2=value2,
            table0_data=table0_data,
            table1_data=table1_data,
            table2_data=table2_data,
            table3_data=table3_data,
            table4_data=table4_data
        )
        
        output_file = os.path.join(output_dir, f"{value1} {value2}.html")
        with open(output_file, 'w') as file:
            file.write(rendered_content)

# Main execution
if __name__ == "__main__":
    template = load_template(TEMPLATE_PATH)
    interaction_card, ligand_card_1, receptor_card_1, ligand_card_2, receptor_card_2 = prepare_dataframes(gene_pair0)
    generate_html_files(template, interaction_card, ligand_card_1, receptor_card_1, ligand_card_2, receptor_card_2, OUTPUT_DIR)


In [None]:
rat_gene_pair

In [None]:
## Function to prepare datatables (cleaning and hyperlinking, adding tool tips, etc) input for the database qmds
import sys, os
sys.path.append(os.path.abspath("src"))  
from itables import init_notebook_mode
import pandas as pd
from itables import show
from itables import options
from IPython.display import HTML, display
import numpy as np
import fetchGSheet 

# Select only the relevant columns from pop_up_info

pop_up_info = fetchGSheet.pop_up_info.rename(columns={"Mouse genome informatics (MGI) ID": "MGI ID","Rat genome database (RGD) ID": "RGD ID"})

pop_up_info_lim = pop_up_info[["Approved symbol", "Approved name", "MGI ID", "RGD ID"]]
pop_up_info_lim = pop_up_info_lim.drop_duplicates(subset="Approved symbol", keep="first")

# Drop columns where all values are NA in gene_pair
gene_pair = fetchGSheet.gene_pair.dropna(axis=1, how='all')

# Rename columns for better clarity
gene_pair = gene_pair.rename(columns={
    "Ligand receptor pair": "LR Pair",
    "Ligand gene symbol": "Ligand",
    "Receptor gene symbol": "Receptor",
    "Perplexity link": "Perplexity"
})

# Merge gene_pair with pop_up_info_lim for Ligand(L)
gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Ligand', right_on='Approved symbol')

gene_pair = gene_pair.rename(columns={"Approved name": "Ligand name", 
                                     "MGI ID": "Ligand MGI ID",
                                     "RGD ID": "Ligand RGD ID"},
                            )

# Add MGI name
MGI_info = pd.read_csv("data/MGI_ID_biomart.csv")
gene_pair = gene_pair.merge(MGI_info, how='left', left_on='Ligand MGI ID', right_on='MGI ID')
gene_pair = gene_pair.drop(columns=["MGI ID"])

gene_pair = gene_pair.rename(columns={
                                     "MGI name": "Ligand MGI name"}
                            )

gene_pair = gene_pair.merge(pop_up_info_lim, how='left', left_on='Receptor', right_on='Approved symbol')

gene_pair = gene_pair.rename(columns={"Approved name": "Receptor name",
                                      "MGI ID": "Receptor MGI ID",
                                      "RGD ID": "Receptor RGD ID"}
                            )

# Add MGI name
gene_pair = gene_pair.merge(MGI_info, how='left', left_on='Receptor MGI ID', right_on='MGI ID')
gene_pair = gene_pair.drop(columns=["MGI ID"])
gene_pair = gene_pair.rename(columns={
                                     "MGI name": "Receptor MGI name"}
                            )
gene_pair.columns

In [None]:
gene_pair.columns

In [None]:
gene_pair

In [None]:
MGI_info = pd.read_csv("data/MGI_ID_biomart.csv")
MGI_info