# Import packages

In [2]:
import scanpy as sc
import anndata
import numpy as np
import gc
import pandas as pd 
pd.set_option('display.max_columns', None)
from biomart import BiomartServer
from datetime import date
import os
gc.isenabled()

True

# Query the server

In [2]:
server = BiomartServer("http://www.ensembl.org/biomart")

In [3]:
ensembl = server.datasets['hsapiens_gene_ensembl']

In [4]:
columns = ['ensembl_gene_id','external_gene_name','mmusculus_homolog_ensembl_gene', 
           'mmusculus_homolog_orthology_type', 'mmusculus_homolog_associated_gene_name']

In [5]:
response = ensembl.search({'attributes' : columns,
                           'mart_instance' : 'ensembl'})

In [6]:
mapping_df = pd.read_csv(response.url, sep='\t', header=None, names=columns)

In [7]:
mapping_df

Unnamed: 0,ensembl_gene_id,external_gene_name,mmusculus_homolog_ensembl_gene,mmusculus_homolog_orthology_type,mmusculus_homolog_associated_gene_name
0,ENSG00000210049,MT-TF,,,
1,ENSG00000211459,MT-RNR1,,,
2,ENSG00000210077,MT-TV,,,
3,ENSG00000210082,MT-RNR2,,,
4,ENSG00000209082,MT-TL1,,,
...,...,...,...,...,...
75771,ENSG00000162437,RAVER2,ENSMUSG00000035275,ortholog_one2one,Raver2
75772,ENSG00000122432,SPATA1,ENSMUSG00000028188,ortholog_one2one,Spata1
75773,ENSG00000284882,,,,
75774,ENSG00000289881,,,,


In [8]:
columns2 = ['ensembl_gene_id','entrezgene_id']

New query because 'entrezgene_id' is not on the same page

In [9]:
response2 = ensembl.search({'attributes' : columns2,
                           'mart_instance' : 'ensembl'})

In [10]:
ncbi_id = pd.read_csv(response2.url, sep='\t', header=None, names=columns2)

# Import human and mouse data

In [3]:
os.path.realpath('ensembl_query.ipynb')

'/nfs/research/irene/anaelle/CrossSpeciesIC/expiMap/object_human_mouse/ensembl_query.ipynb'

In [None]:
path_project = '/nfs/research/irene/anaelle/CrossSpeciesIC/' #change this path to your directory
print(path_project)
path_scripts = os.path.join(path_project, 'expiMap', 'object_human_mouse') #change this path
print(path_scripts)
path_data = os.path.join(path_project, 'data')
print(path_data)

In [14]:
mouse_data = sc.read_h5ad(os.path.join(path_data,'mouse_raw_counts_from_cellxgene.h5ad'))

In [15]:
human_data = sc.read_h5ad(os.path.join(path_data, 'hdata2023-06-05.h5ad'))



# Get the human genes

In [118]:
human_ncbi_id = ncbi_id[ncbi_id.entrezgene_id.isin(human_data.var.entrez_id)]

In [119]:
human_ncbi_id

Unnamed: 0,ensembl_gene_id,entrezgene_id
5,ENSG00000198888,4535.0
9,ENSG00000198763,4536.0
15,ENSG00000198804,4512.0
18,ENSG00000198712,4513.0
20,ENSG00000228253,4509.0
...,...,...
75101,ENSG00000198216,777.0
75102,ENSG00000179930,127665.0
75112,ENSG00000264470,100616338.0
75113,ENSG00000162437,55225.0


In [109]:
human_mouse_df = mapping_df[mapping_df.ensembl_gene_id.isin(human_ncbi_id.ensembl_gene_id)]

In [110]:
human_mouse_df

Unnamed: 0,ensembl_gene_id,external_gene_name,mmusculus_homolog_ensembl_gene,mmusculus_homolog_orthology_type,mmusculus_homolog_associated_gene_name
5,ENSG00000198888,MT-ND1,ENSMUSG00000064341,ortholog_one2one,mt-Nd1
9,ENSG00000198763,MT-ND2,ENSMUSG00000064345,ortholog_one2one,mt-Nd2
15,ENSG00000198804,MT-CO1,ENSMUSG00000064351,ortholog_one2one,mt-Co1
18,ENSG00000198712,MT-CO2,ENSMUSG00000064354,ortholog_one2one,mt-Co2
20,ENSG00000228253,MT-ATP8,ENSMUSG00000064356,ortholog_one2one,mt-Atp8
...,...,...,...,...,...
75759,ENSG00000198216,CACNA1E,ENSMUSG00000004110,ortholog_one2one,Cacna1e
75760,ENSG00000179930,ZNF648,ENSMUSG00000066797,ortholog_one2one,Zfp648
75770,ENSG00000264470,MIR4794,,,
75771,ENSG00000162437,RAVER2,ENSMUSG00000035275,ortholog_one2one,Raver2


# Get the mouse genes

In [24]:
mouse_data.var.index

Index(['ENSMUSG00000029422', 'ENSMUSG00000114536', 'ENSMUSG00000049036',
       'ENSMUSG00000029577', 'ENSMUSG00000040746', 'ENSMUSG00000020590',
       'ENSMUSG00000096963', 'ENSMUSG00000030263', 'ENSMUSG00000038914',
       'ENSMUSG00000098447',
       ...
       'ENSMUSG00000101706', 'ENSMUSG00000073052', 'ENSMUSG00000113410',
       'ENSMUSG00000102368', 'ENSMUSG00000021033', 'ENSMUSG00000100416',
       'ENSMUSG00000096169', 'ENSMUSG00000030657', 'ENSMUSG00000037924',
       'ENSMUSG00000040693'],
      dtype='object', name='feature_id', length=30639)

In [113]:
human_mouse_df = human_mouse_df[human_mouse_df.mmusculus_homolog_ensembl_gene.isin(mouse_data.var.index)]

In [114]:
human_mouse_df

Unnamed: 0,ensembl_gene_id,external_gene_name,mmusculus_homolog_ensembl_gene,mmusculus_homolog_orthology_type,mmusculus_homolog_associated_gene_name
5,ENSG00000198888,MT-ND1,ENSMUSG00000064341,ortholog_one2one,mt-Nd1
9,ENSG00000198763,MT-ND2,ENSMUSG00000064345,ortholog_one2one,mt-Nd2
15,ENSG00000198804,MT-CO1,ENSMUSG00000064351,ortholog_one2one,mt-Co1
18,ENSG00000198712,MT-CO2,ENSMUSG00000064354,ortholog_one2one,mt-Co2
20,ENSG00000228253,MT-ATP8,ENSMUSG00000064356,ortholog_one2one,mt-Atp8
...,...,...,...,...,...
75758,ENSG00000187017,ESPN,ENSMUSG00000028943,ortholog_one2one,Espn
75759,ENSG00000198216,CACNA1E,ENSMUSG00000004110,ortholog_one2one,Cacna1e
75760,ENSG00000179930,ZNF648,ENSMUSG00000066797,ortholog_one2one,Zfp648
75771,ENSG00000162437,RAVER2,ENSMUSG00000035275,ortholog_one2one,Raver2


In [115]:
human_mouse_df.ensembl_gene_id.duplicated().sum()

4583

In [116]:
human_mouse_df.mmusculus_homolog_ensembl_gene.duplicated().sum()

3673

# Add the entrezgene_id

In [120]:
human_ncbi_id

Unnamed: 0,ensembl_gene_id,entrezgene_id
5,ENSG00000198888,4535.0
9,ENSG00000198763,4536.0
15,ENSG00000198804,4512.0
18,ENSG00000198712,4513.0
20,ENSG00000228253,4509.0
...,...,...
75101,ENSG00000198216,777.0
75102,ENSG00000179930,127665.0
75112,ENSG00000264470,100616338.0
75113,ENSG00000162437,55225.0


In [121]:
human_ncbi_id.ensembl_gene_id.duplicated().sum()

290

## Check ids duplicates

In [122]:
human_ncbi_id[human_ncbi_id.ensembl_gene_id.duplicated()]

Unnamed: 0,ensembl_gene_id,entrezgene_id
724,ENSG00000267635,100507608.0
905,ENSG00000275517,653247.0
987,ENSG00000273772,6607.0
1049,ENSG00000276910,728340.0
1058,ENSG00000274166,647042.0
...,...,...
74887,ENSG00000290825,100287102.0
74888,ENSG00000290825,100287596.0
74889,ENSG00000290825,102725121.0
74942,ENSG00000291158,388692.0


In [123]:
duplicates = human_ncbi_id[human_ncbi_id.ensembl_gene_id.isin(human_ncbi_id[human_ncbi_id.ensembl_gene_id.duplicated()].ensembl_gene_id)]

In [124]:
duplicates

Unnamed: 0,ensembl_gene_id,entrezgene_id
723,ENSG00000267635,100505724.0
724,ENSG00000267635,100507608.0
904,ENSG00000275517,5542.0
905,ENSG00000275517,653247.0
986,ENSG00000273772,6606.0
...,...,...
74889,ENSG00000290825,102725121.0
74941,ENSG00000291158,57234.0
74942,ENSG00000291158,388692.0
74975,ENSG00000158747,4681.0


In [125]:
human_duplicates = {}
for index, row in duplicates.iterrows():
    if row['ensembl_gene_id'] not in human_duplicates.keys():
        human_duplicates[row['ensembl_gene_id']] = []
    human_duplicates[row['ensembl_gene_id']].append(row.entrezgene_id)

In [126]:
len(human_duplicates.keys())

266

In [127]:
human_duplicates

{'ENSG00000267635': [100505724.0, 100507608.0],
 'ENSG00000275517': [5542.0, 653247.0],
 'ENSG00000273772': [6606.0, 6607.0],
 'ENSG00000276910': [2966.0, 728340.0],
 'ENSG00000274166': [643707.0, 647042.0, 102724117.0],
 'ENSG00000277429': [8293.0, 728492.0],
 'ENSG00000229071': [80863.0, 100507547.0],
 'ENSG00000239927': [79897.0, 202658.0],
 'ENSG00000241863': [79897.0, 202658.0],
 'ENSG00000274729': [245908.0, 504180.0],
 'ENSG00000274540': [100131608.0, 100133251.0],
 'ENSG00000275444': [1673.0, 100289462.0],
 'ENSG00000281040': [441317.0, 441327.0],
 'ENSG00000277530': [245910.0, 503614.0],
 'ENSG00000276299': [441873.0, 645359.0],
 'ENSG00000263247': [5554.0, 11272.0],
 'ENSG00000282475': [440243.0, 645202.0],
 'ENSG00000275045': [2966.0, 728340.0],
 'ENSG00000243009': [79897.0, 202658.0],
 'ENSG00000278632': [414059.0, 101060321.0],
 'ENSG00000275748': [51326.0, 100506084.0],
 'ENSG00000275003': [1652.0, 100037417.0],
 'ENSG00000278232': [1394.0, 104909134.0],
 'ENSG00000276725

## Keep only the first

In [128]:
genes_removed = []
for gene in human_duplicates.keys():
    for entrez_id in range(1,len(human_duplicates[gene])):
        genes_removed.append(human_duplicates[gene][entrez_id])

In [129]:
len(genes_removed) #Same length as the duplicates in human_ncbi_id df

290

In [130]:
human_ncbi_id[~human_ncbi_id.entrezgene_id.isin(genes_removed)].duplicated().any()

False

In [135]:
human_genes = human_ncbi_id[~human_ncbi_id.entrezgene_id.isin(genes_removed)]

In [137]:
human_genes.ensembl_gene_id.duplicated().any()

False

In [215]:
human_genes.loc[human_genes.ensembl_gene_id == 'ENSG00000198888'].entrezgene_id.iloc[0]

4535.0

In [217]:
'ENSG00000198804' in human_genes.ensembl_gene_id.values

True

In [205]:
human_genes

Unnamed: 0,ensembl_gene_id,entrezgene_id
5,ENSG00000198888,4535.0
9,ENSG00000198763,4536.0
15,ENSG00000198804,4512.0
18,ENSG00000198712,4513.0
20,ENSG00000228253,4509.0
...,...,...
75101,ENSG00000198216,777.0
75102,ENSG00000179930,127665.0
75112,ENSG00000264470,100616338.0
75113,ENSG00000162437,55225.0


## Check entrez_id doublets

In [249]:
human_mouse_homolog_genes[human_mouse_homolog_genes.human_entrezgene_id.duplicated()]

Unnamed: 0,human_ensembl_gene_id,human_entrezgene_id,human_external_gene_name,mouse_homolog_ensembl_gene,orthology_type,mouse_homolog_gene_name
16,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000113201,ortholog_one2many,Eif1ad13
17,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079031,ortholog_one2many,Eif1ad2
18,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079029,ortholog_one2many,Eif1ad7
19,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000093847,ortholog_one2many,Eif1ad15
20,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000113971,ortholog_one2many,Eif1ad4
...,...,...,...,...,...,...
21880,ENSG00000143549,7170.0,TPM3,ENSMUSG00000058126,ortholog_one2many,Tpm3-rs7
21931,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055610,ortholog_one2many,Or14a260
21932,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000062042,ortholog_one2many,Or14a256
21933,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055571,ortholog_one2many,Or14a259


In [250]:
entrez_dup = human_mouse_homolog_genes[human_mouse_homolog_genes.human_entrezgene_id.isin(human_mouse_homolog_genes[human_mouse_homolog_genes.human_entrezgene_id.duplicated()].human_entrezgene_id)]

In [251]:
entrez_dup

Unnamed: 0,human_ensembl_gene_id,human_entrezgene_id,human_external_gene_name,mouse_homolog_ensembl_gene,orthology_type,mouse_homolog_gene_name
13,ENSG00000277196,5625.0,PRODH,ENSMUSG00000003526,ortholog_one2many,Prodh
15,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000057561,ortholog_one2many,Eif1a
16,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000113201,ortholog_one2many,Eif1ad13
17,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079031,ortholog_one2many,Eif1ad2
18,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079029,ortholog_one2many,Eif1ad7
...,...,...,...,...,...,...
21930,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000062878,ortholog_one2many,Or14a257
21931,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055610,ortholog_one2many,Or14a260
21932,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000062042,ortholog_one2many,Or14a256
21933,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055571,ortholog_one2many,Or14a259


In [260]:
entrezid = {}
for index, row in entrez_dup.iterrows():
    if row['human_entrezgene_id'] not in entrezid.keys():
        entrezid[row['human_entrezgene_id']] = []
    if row.human_ensembl_gene_id not in entrezid[row['human_entrezgene_id']]:
        entrezid[row['human_entrezgene_id']].append(row.human_ensembl_gene_id)

In [261]:
len(entrezid.keys())

759

## Create new dataframe

In [206]:
human_mouse_df.head(20)

Unnamed: 0,ensembl_gene_id,external_gene_name,mmusculus_homolog_ensembl_gene,mmusculus_homolog_orthology_type,mmusculus_homolog_associated_gene_name
5,ENSG00000198888,MT-ND1,ENSMUSG00000064341,ortholog_one2one,mt-Nd1
9,ENSG00000198763,MT-ND2,ENSMUSG00000064345,ortholog_one2one,mt-Nd2
15,ENSG00000198804,MT-CO1,ENSMUSG00000064351,ortholog_one2one,mt-Co1
18,ENSG00000198712,MT-CO2,ENSMUSG00000064354,ortholog_one2one,mt-Co2
20,ENSG00000228253,MT-ATP8,ENSMUSG00000064356,ortholog_one2one,mt-Atp8
21,ENSG00000198899,MT-ATP6,ENSMUSG00000064357,ortholog_one2one,mt-Atp6
22,ENSG00000198938,MT-CO3,ENSMUSG00000064358,ortholog_one2one,mt-Co3
24,ENSG00000198840,MT-ND3,ENSMUSG00000064360,ortholog_one2one,mt-Nd3
26,ENSG00000212907,MT-ND4L,ENSMUSG00000065947,ortholog_one2one,mt-Nd4l
27,ENSG00000198886,MT-ND4,ENSMUSG00000064363,ortholog_one2one,mt-Nd4


In [183]:
human_mouse_df.loc[human_mouse_df['ensembl_gene_id']=='ENSG00000198888']

Unnamed: 0,ensembl_gene_id,external_gene_name,mmusculus_homolog_ensembl_gene,mmusculus_homolog_orthology_type,mmusculus_homolog_associated_gene_name
5,ENSG00000198888,MT-ND1,ENSMUSG00000064341,ortholog_one2one,mt-Nd1


In [228]:
human_mouse_df.loc[human_mouse_df['ensembl_gene_id']=='ENSG00000198888'].external_gene_name

5    MT-ND1
Name: external_gene_name, dtype: object

In [220]:
human_mouse_df.mmusculus_homolog_ensembl_gene.hasnans

False

In [226]:
def get_ensembl_gene_info_df():
    rows = []
    for index, row in human_mouse_df.iterrows():
        ensembl_id = row['ensembl_gene_id']
        if ensembl_id in human_genes['ensembl_gene_id'].values:
            entrez_id = human_genes.loc[human_genes['ensembl_gene_id']==ensembl_id].entrezgene_id.iloc[0]
            gene_now ={}
            gene_now['human_ensembl_gene_id'] = ensembl_id
            gene_now['human_entrezgene_id'] = entrez_id
            gene_now['human_external_gene_name'] = row['external_gene_name']
            gene_now['mouse_homolog_ensembl_gene'] = row['mmusculus_homolog_ensembl_gene']
            gene_now['orthology_type'] = row['mmusculus_homolog_orthology_type']
            gene_now['mouse_homolog_gene_name'] = row['mmusculus_homolog_associated_gene_name']
            rows.append(gene_now)
    return rows

In [227]:
rows = get_ensembl_gene_info_df()

In [229]:
human_mouse_homolog_genes = pd.DataFrame(rows)

In [230]:
human_mouse_homolog_genes

Unnamed: 0,human_ensembl_gene_id,human_entrezgene_id,human_external_gene_name,mouse_homolog_ensembl_gene,orthology_type,mouse_homolog_gene_name
0,ENSG00000198888,4535.0,MT-ND1,ENSMUSG00000064341,ortholog_one2one,mt-Nd1
1,ENSG00000198763,4536.0,MT-ND2,ENSMUSG00000064345,ortholog_one2one,mt-Nd2
2,ENSG00000198804,4512.0,MT-CO1,ENSMUSG00000064351,ortholog_one2one,mt-Co1
3,ENSG00000198712,4513.0,MT-CO2,ENSMUSG00000064354,ortholog_one2one,mt-Co2
4,ENSG00000228253,4509.0,MT-ATP8,ENSMUSG00000064356,ortholog_one2one,mt-Atp8
...,...,...,...,...,...,...
21936,ENSG00000187017,83715.0,ESPN,ENSMUSG00000028943,ortholog_one2one,Espn
21937,ENSG00000198216,777.0,CACNA1E,ENSMUSG00000004110,ortholog_one2one,Cacna1e
21938,ENSG00000179930,127665.0,ZNF648,ENSMUSG00000066797,ortholog_one2one,Zfp648
21939,ENSG00000162437,55225.0,RAVER2,ENSMUSG00000035275,ortholog_one2one,Raver2


In [231]:
human_mouse_homolog_genes.human_ensembl_gene_id.duplicated().any()

True

In [234]:
human_mouse_homolog_genes.mouse_homolog_ensembl_gene.duplicated().any()

True

In [232]:
human_mouse_homolog_genes.human_entrezgene_id.hasnans

False

In [233]:
human_mouse_homolog_genes.mouse_homolog_ensembl_gene.hasnans

False

In [235]:
human_mouse_homolog_genes.orthology_type.hasnans

False

In [239]:
human_mouse_homolog_genes.orthology_type.unique()

array(['ortholog_one2one', 'ortholog_one2many', 'ortholog_many2many'],
      dtype=object)

## Check entrez duplicates

In [249]:
human_mouse_homolog_genes[human_mouse_homolog_genes.human_entrezgene_id.duplicated()]

Unnamed: 0,human_ensembl_gene_id,human_entrezgene_id,human_external_gene_name,mouse_homolog_ensembl_gene,orthology_type,mouse_homolog_gene_name
16,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000113201,ortholog_one2many,Eif1ad13
17,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079031,ortholog_one2many,Eif1ad2
18,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079029,ortholog_one2many,Eif1ad7
19,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000093847,ortholog_one2many,Eif1ad15
20,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000113971,ortholog_one2many,Eif1ad4
...,...,...,...,...,...,...
21880,ENSG00000143549,7170.0,TPM3,ENSMUSG00000058126,ortholog_one2many,Tpm3-rs7
21931,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055610,ortholog_one2many,Or14a260
21932,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000062042,ortholog_one2many,Or14a256
21933,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055571,ortholog_one2many,Or14a259


In [250]:
entrez_dup = human_mouse_homolog_genes[human_mouse_homolog_genes.human_entrezgene_id.isin(human_mouse_homolog_genes[human_mouse_homolog_genes.human_entrezgene_id.duplicated()].human_entrezgene_id)]

In [251]:
entrez_dup

Unnamed: 0,human_ensembl_gene_id,human_entrezgene_id,human_external_gene_name,mouse_homolog_ensembl_gene,orthology_type,mouse_homolog_gene_name
13,ENSG00000277196,5625.0,PRODH,ENSMUSG00000003526,ortholog_one2many,Prodh
15,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000057561,ortholog_one2many,Eif1a
16,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000113201,ortholog_one2many,Eif1ad13
17,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079031,ortholog_one2many,Eif1ad2
18,ENSG00000198692,9086.0,EIF1AY,ENSMUSG00000079029,ortholog_one2many,Eif1ad7
...,...,...,...,...,...,...
21930,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000062878,ortholog_one2many,Or14a257
21931,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055610,ortholog_one2many,Or14a260
21932,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000062042,ortholog_one2many,Or14a256
21933,ENSG00000241128,388761.0,OR14A2,ENSMUSG00000055571,ortholog_one2many,Or14a259


In [260]:
entrezid = {}
for index, row in entrez_dup.iterrows():
    if row['human_entrezgene_id'] not in entrezid.keys():
        entrezid[row['human_entrezgene_id']] = []
    if row.human_ensembl_gene_id not in entrezid[row['human_entrezgene_id']]:
        entrezid[row['human_entrezgene_id']].append(row.human_ensembl_gene_id)

In [261]:
len(entrezid.keys())

759

In [262]:
real_duplicates = {}
for gene in entrezid.keys():
    if len(entrezid[gene])>1:
        real_duplicates[gene] = entrezid[gene]

In [263]:
real_duplicates

{5625.0: ['ENSG00000277196', 'ENSG00000100033'],
 54471.0: ['ENSG00000100335', 'ENSG00000285025'],
 8195.0: ['ENSG00000125863', 'ENSG00000285508', 'ENSG00000285723'],
 6606.0: ['ENSG00000205571', 'ENSG00000172062'],
 29057.0: ['ENSG00000268350', 'ENSG00000179304'],
 1159.0: ['ENSG00000237289', 'ENSG00000223572'],
 552900.0: ['ENSG00000169627', 'ENSG00000183336'],
 79008.0: ['ENSG00000132207', 'ENSG00000181625'],
 3963.0: ['ENSG00000178934', 'ENSG00000205076'],
 1673.0: ['ENSG00000177257', 'ENSG00000171711'],
 55894.0: ['ENSG00000177243', 'ENSG00000176797'],
 245908.0: ['ENSG00000186599', 'ENSG00000186562'],
 245910.0: ['ENSG00000198129', 'ENSG00000186572'],
 10156.0: ['ENSG00000105808', 'ENSG00000170667'],
 643862.0: ['ENSG00000262461',
  'ENSG00000275976',
  'ENSG00000186645',
  'ENSG00000274570',
  'ENSG00000273520'],
 641776.0: ['ENSG00000286038', 'ENSG00000286014', 'ENSG00000286137'],
 51326.0: ['ENSG00000185829', 'ENSG00000228696']}

In [264]:
human_mouse_homolog_genes

Unnamed: 0,human_ensembl_gene_id,human_entrezgene_id,human_external_gene_name,mouse_homolog_ensembl_gene,orthology_type,mouse_homolog_gene_name
0,ENSG00000198888,4535.0,MT-ND1,ENSMUSG00000064341,ortholog_one2one,mt-Nd1
1,ENSG00000198763,4536.0,MT-ND2,ENSMUSG00000064345,ortholog_one2one,mt-Nd2
2,ENSG00000198804,4512.0,MT-CO1,ENSMUSG00000064351,ortholog_one2one,mt-Co1
3,ENSG00000198712,4513.0,MT-CO2,ENSMUSG00000064354,ortholog_one2one,mt-Co2
4,ENSG00000228253,4509.0,MT-ATP8,ENSMUSG00000064356,ortholog_one2one,mt-Atp8
...,...,...,...,...,...,...
21936,ENSG00000187017,83715.0,ESPN,ENSMUSG00000028943,ortholog_one2one,Espn
21937,ENSG00000198216,777.0,CACNA1E,ENSMUSG00000004110,ortholog_one2one,Cacna1e
21938,ENSG00000179930,127665.0,ZNF648,ENSMUSG00000066797,ortholog_one2one,Zfp648
21939,ENSG00000162437,55225.0,RAVER2,ENSMUSG00000035275,ortholog_one2one,Raver2


# Write the results

In [241]:
human_mouse_homolog_genes.to_csv(os.path.join(path_data, 'homolog_human_mouse_ensembl_gene.csv'))