In [8]:
import scanpy as sc
import pandas as pd

gene_ensembl = sc.queries.biomart_annotations("mmusculus",
                                              ["ensembl_gene_id", "external_gene_name"],
                                              host='www.ensembl.org'
                                              )

mouse_gene_names = [
    'C1d', 'C1qa', 'C1qb', 'C1qc', 'C1qbp', 'C1r', 'C1rl', 'C1s', 'C2', 'C3', 'C3ar1', 'C4a',
    'C4b', 'C4bp', 'C4bpb', 'Hc', 'C5ar1', 'C5ar2', 'C6', 'C7', 'C8a', 'C8b', 'C8g', 'C9',
    'Calr', 'Cd46', 'Cd55', 'Cd59a', 'Cd93', 'Cfb', 'Cfd', 'Cfh', 'Cfhr1', 'Cfhr2', 'Cfhr3',
    'Cfhr4', 'Cfhr5', 'Cfi', 'Clu', 'Colec10', 'Colec11', 'Cpb2', 'Cpn1', 'Cpn2', 'Csmd1',
    'Cfp', 'Cr1', 'Cr2', 'Fcn1', 'Fcn2', 'Fcn3', 'Itgam', 'Itgamx', 'Itgax', 'Itgb2', 'Masp1',
    'Masp2', 'Masp3', 'Mbl2', 'Serping1', 'Susd4', 'Vsig4', 'Vcp'
]

# Find the mouse_gene_names in the gene_ensembl DataFrame
mouse_gene_names = set(mouse_gene_names)
gene_ensembl_names = set(gene_ensembl["external_gene_name"].values)

# Find the intersection
intersection = mouse_gene_names.intersection(gene_ensembl_names)

# Filter the gene_ensembl DataFrame to include only the intersecting genes
filtered_gene_ensembl = gene_ensembl[gene_ensembl["external_gene_name"].isin(intersection)]

# Alphabetize the DataFrame by external_gene_name
filtered_gene_ensembl = filtered_gene_ensembl.sort_values(by="external_gene_name")

# Reset the index
filtered_gene_ensembl = filtered_gene_ensembl.reset_index(drop=True)

# Create a dictionary with gene names on the left and Ensembl codes on the right
gene_dict = dict(zip(filtered_gene_ensembl["external_gene_name"], filtered_gene_ensembl["ensembl_gene_id"]))

# Print the dictionary
print(gene_dict)

{'C1d': 'ENSMUSG00000000581', 'C1qa': 'ENSMUSG00000036887', 'C1qb': 'ENSMUSG00000036905', 'C1qbp': 'ENSMUSG00000018446', 'C1qc': 'ENSMUSG00000036896', 'C1rl': 'ENSMUSG00000038527', 'C2': 'ENSMUSG00000024371', 'C3': 'ENSMUSG00000024164', 'C3ar1': 'ENSMUSG00000040552', 'C4a': 'ENSMUSG00000015451', 'C4b': 'ENSMUSG00000073418', 'C4bp': 'ENSMUSG00000026405', 'C5ar1': 'ENSMUSG00000049130', 'C5ar2': 'ENSMUSG00000074361', 'C6': 'ENSMUSG00000022181', 'C7': 'ENSMUSG00000079105', 'C8a': 'ENSMUSG00000035031', 'C8b': 'ENSMUSG00000029656', 'C8g': 'ENSMUSG00000015083', 'C9': 'ENSMUSG00000022149', 'Calr': 'ENSMUSG00000003814', 'Cd46': 'ENSMUSG00000016493', 'Cd55': 'ENSMUSG00000026399', 'Cd59a': 'ENSMUSG00000032679', 'Cd93': 'ENSMUSG00000027435', 'Cfb': 'ENSMUSG00000090231', 'Cfd': 'ENSMUSG00000061780', 'Cfh': 'ENSMUSG00000026365', 'Cfhr1': 'ENSMUSG00000057037', 'Cfhr2': 'ENSMUSG00000033898', 'Cfhr3': 'ENSMUSG00000090623', 'Cfhr4': 'ENSMUSG00000070594', 'Cfi': 'ENSMUSG00000058952', 'Cfp': 'ENSMUSG00000