In [78]:
# OPTIONAL: Load the "autoreload" eX_orig[alias]tension so that code can change
%load_ext autoreload

# OPTIONAL: always reload modules so that as you change code in src, it gets loaded
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [79]:
import biomart
import os
import re
import matplotlib

import pandas as pd
import pickle as pkl
import numpy as np
import collections as cx
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from goatools.go_search import GoSearch
from scipy import stats

from src.models.outliers import impute_zero_values
from src.visualization.plots import psuedocontrolcomparisonplot
from src.config import get_interim_dir, get_experiment_artifacts
from src.config import get_hussmann_supplementary_xlsx, HOME_PATH, FILTER_COUNT, METHOD, PROFILE_TYPE


In [80]:
FILTER_COUNT, METHOD, PROFILE_TYPE

(700, 'robust_cov', 'paired-replicates')

# GO Analysis

Used the following notebooks as examples:
- https://github.com/tanghaibao/goatools/blob/main/notebooks/goea_nbt3102.ipynb
- https://github.com/tanghaibao/goatools/blob/main/notebooks/cell_cycle.ipynb

See more examples here:
- https://github.com/tanghaibao/goatools

### Download ontologies and associations

In [81]:
# Get http://geneontology.org/ontology/go-basic.obo
from goatools.base import download_go_basic_obo
obo_fname = download_go_basic_obo(obo="artifacts/go-basic.obo")


# Get ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz
from goatools.base import download_ncbi_associations
fin_gene2go = download_ncbi_associations(gene2go="artifacts/gene2go")

  EXISTS: artifacts/go-basic.obo
  EXISTS: artifacts/gene2go


### Load ontologies and associations

In [82]:
from goatools.obo_parser import GODag

obodag = GODag("artifacts/go-basic.obo")

artifacts/go-basic.obo: fmt(1.2) rel(2023-01-01) 46,739 Terms


In [83]:
from __future__ import print_function
from goatools.anno.genetogo_reader import Gene2GoReader

# Read NCBI's gene2go. Store annotations in a list of namedtuples
objanno = Gene2GoReader(fin_gene2go, taxids=[10090])

# Get namespace2association where:
#    namespace is:
#        BP: biological_process               
#        MF: molecular_function
#        CC: cellular_component
#    assocation is a dict:
#        key: NCBI GeneID
#        value: A set of GO IDs associated with that gene
ns2assoc = objanno.get_ns2assc()

for nspc, id2gos in ns2assoc.items():
    print("{NS} {N:,} annotated mouse genes".format(NS=nspc, N=len(id2gos)))

HMS:0:00:05.282768 429,754 annotations, 29,773 genes, 19,208 GOs, 1 taxids READ: artifacts/gene2go 
MF 17,372 annotated mouse genes
CC 19,353 annotated mouse genes
BP 18,599 annotated mouse genes


### Load Background Gene Set

In [84]:
results_df = pd.read_pickle(get_experiment_artifacts() + "augmented_outlier_results.pkl")
results_df

Sample,Global,Global,Global,Global,Global,T1,T1,T1,T1,T2,T2,T2,T2,T3,T3,T3,T3,Gene Sets,Gene Sets,Gene Sets
Measure,Max Score,Mean Consistency,Mean Score,Mean Std CLR Consistency,Mean Std Consistency,CLR Consistency,Consistency,distances,pvalues,CLR Consistency,Consistency,distances,pvalues,CLR Consistency,Consistency,distances,pvalues,Adamson,GeneSubset2,GeneSubsetSD30
Gene,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
0610009B22Rik,-0.202316,-0.263600,-0.487511,-0.864817,-0.918179,-0.849354,-0.897694,0.169394,0.999409,-0.074988,0.184059,2.782549,0.733467,0.141639,-0.077164,6.205324,0.286750,False,False,False
0610010K14Rik,-0.506267,0.404967,-0.554160,0.147015,0.639911,0.689295,0.672534,1.420544,0.922053,-0.149701,-0.019421,2.793915,0.731721,-0.117280,0.561789,2.871384,0.719807,False,False,False
0610030E20Rik,-0.096154,0.068685,-0.334093,-0.104114,-0.108152,0.366741,0.528662,8.092794,0.151195,-0.132531,-0.043404,3.102720,0.684153,-0.118037,-0.279202,3.765686,0.583620,False,False,False
0610040J01Rik,0.585708,0.337484,0.246902,-0.339103,0.485153,0.380555,0.580776,12.609331,0.027328,-0.650751,-0.067379,6.834351,0.233254,0.090293,0.499054,14.848909,0.011028,False,False,False
1110004F10Rik,0.243536,-0.194733,0.024213,-0.119202,-0.727883,-0.216158,-0.155832,9.343101,0.096137,0.086380,-0.054858,6.242655,0.283318,0.283791,-0.373509,11.095744,0.049514,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Znhit1,0.521094,0.083298,0.292026,1.047208,-0.055802,,,,,0.612933,0.664153,13.463578,0.019401,0.474388,-0.497556,9.115033,0.104563,True,False,False
Zranb2,0.953777,0.065813,0.670830,0.916843,-0.120845,,,,,0.351070,-0.176781,12.086810,0.033617,0.631090,0.308406,18.886149,0.002018,False,False,False
Zrsr1,0.251496,-0.220415,-0.017179,-0.678197,-0.742873,,,,,-0.582349,0.044216,5.123631,0.400979,0.157301,-0.485046,11.183051,0.047869,False,False,False
Zup1,0.072598,-0.140818,-0.145972,-0.724366,-0.572357,,,,,-0.206005,-0.095072,8.828292,0.116112,-0.281189,-0.186564,4.425913,0.489857,False,False,False


In [85]:
GeneID2nt_mus = results_df.index.to_list()
GeneID2nt_mus[:5]

['0610009B22Rik',
 '0610010K14Rik',
 '0610030E20Rik',
 '0610040J01Rik',
 '1110004F10Rik']

In [86]:
def get_ensembl_mappings():
    filename = "artifacts/gene_symbol_2_ids.pkl"
    if os.path.exists(filename):
        return pkl.load(open(filename, 'rb'))


    # Set up connection to server                                               
    server = biomart.BiomartServer('http://www.ensembl.org/biomart')         
    mart = server.datasets['mmusculus_gene_ensembl']                            
                                                                                
    # List the types of data we want                                            
    attributes = ['ensembl_transcript_id', 'mgi_symbol', 
                  'ensembl_gene_id', 'entrezgene_id']
                                                                                
    # Get the mapping between the attributes                                    
    response = mart.search({'attributes': attributes})                          
    data = response.raw.data.decode('ascii')                                    
                                                                                
    gene_symbol_to_ensemble = {}                                                  
    # Store the data in a dict                                                  
    for line in data.splitlines():                                              
        line = line.split('\t')                                                 
        # The entries are in the same order as in the `attributes` variable
        transcript_id = line[0]                                                 
        gene_symbol = line[1]                                                   
        ensembl_gene = line[2]                                                  
        entrezgene_id = line[3] 
                                                                                
        # Some of these keys may be an empty string. If you want, you can 
        # avoid having a '' key in your dict by ensuring the 
        # transcript/gene/peptide ids have a nonzero length before
        # adding them to the dict
        gene_symbol_to_ensemble[gene_symbol] = {
            "transcript_id": transcript_id,
            "ensembl_gene": ensembl_gene,
            "entrezgene_id": entrezgene_id,
        }

    pkl.dump(gene_symbol_to_ensemble, open(filename, 'wb'))
                                                                                
    return gene_symbol_to_ensemble

mappings = get_ensembl_mappings()

In [87]:
# [GeneID2nt_mus]
background_gene_ids = []
for g in GeneID2nt_mus:
    if g in mappings:
        try:
            background_gene_ids.append(int(mappings[g]['entrezgene_id']))
        except:
            continue

len(background_gene_ids)

18000

In [88]:
background_gene_ids[:5]

[66050, 104457, 68364, 76261, 56372]

In [89]:
geneid2symbol = {}
for g in GeneID2nt_mus:
    if g in mappings:
        try:
            geneid2symbol[int(mappings[g]['entrezgene_id'])] = g
        except:
            continue


### Initialise a GOEA object

In [90]:
from goatools.goea.go_enrichment_ns import GOEnrichmentStudyNS

goeaobj = GOEnrichmentStudyNS(
        background_gene_ids, # List of mouse genes
        ns2assoc, # geneid/GO associations
        obodag, # Ontologies
        propagate_counts = False,
        alpha = 0.05, # default significance cut-off
        methods = ['fdr_bh']) # defult multipletest correction method


Load BP Ontology Enrichment Analysis ...
 86% 15,471 of 18,000 population items found in association

Load CC Ontology Enrichment Analysis ...
 91% 16,334 of 18,000 population items found in association

Load MF Ontology Enrichment Analysis ...
 82% 14,838 of 18,000 population items found in association


### Lets study genes which are outlying

In [91]:
outliers = results_df.sort_values(("Global", "Mean Score"), ascending=False).index.to_list()[:500]

In [92]:
outliers[:10]

['Atp6v1g1',
 'Metap2',
 'H2ac18',
 'Xrcc5',
 'H2ax',
 'Qars',
 'Mad2l2',
 'Atp2a2',
 'Xrcc6',
 'Krtap4-8']

In [93]:
# [outliers]
geneids_study = []
for g in outliers:
    if g in mappings:
        try:
            geneids_study.append(int(mappings[g]['entrezgene_id']))
        except:
            continue

len(geneids_study)

496

In [94]:
goea_results_all = goeaobj.run_study(geneids_study)
goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]


Runing BP Ontology Analysis: current study set of 496 IDs.
 89%    440 of    496 study items found in association
100%    496 of    496 study items found in population(18000)
Calculating 12,632 uncorrected p-values using fisher_scipy_stats
  12,632 terms are associated with 15,470 of 18,000 population items
   1,629 terms are associated with    440 of    496 study items
  METHOD fdr_bh:
      71 GO terms found significant (< 0.05=alpha) ( 65 enriched +   6 purified): statsmodels fdr_bh
     265 study items associated with significant GO IDs (enriched)
      18 study items associated with significant GO IDs (purified)

Runing CC Ontology Analysis: current study set of 496 IDs.
 94%    464 of    496 study items found in association
100%    496 of    496 study items found in population(18000)
Calculating 1,786 uncorrected p-values using fisher_scipy_stats
   1,786 terms are associated with 16,333 of 18,000 population items
     488 terms are associated with    464 of    496 study items
 

In [95]:
print('{N} of {M:,} results were significant'.format(
    N=len(goea_results_sig),
    M=len(goea_results_all)))

223 of 18,964 results were significant


In [96]:
print('Significant results: {E} enriched, {P} purified'.format(
    E=sum(1 for r in goea_results_sig if r.enrichment=='e'),
    P=sum(1 for r in goea_results_sig if r.enrichment=='p')))

Significant results: 204 enriched, 19 purified


In [97]:
ctr = cx.Counter([r.NS for r in goea_results_sig])
print('Significant results[{TOTAL}] = {BP} BP + {MF} MF + {CC} CC'.format(
    TOTAL=len(goea_results_sig),
    BP=ctr['BP'],  # biological_process
    MF=ctr['MF'],  # molecular_function
    CC=ctr['CC'])) # cellular_component

Significant results[223] = 71 BP + 37 MF + 115 CC


In [98]:
from goatools.godag_plot import plot_gos, plot_results, plot_goid2goobj

plot_results(get_experiment_artifacts() + "/outliers_{NS}.png", goea_results_sig, 
# We can further configure the plot...
    id2symbol=geneid2symbol, # Print study gene Symbols, not Entrez GeneIDs
    study_items=15, # Only only 6 gene Symbols max on GO terms
    items_p_line=3, # Print 3 genes per line
    )

   71 usr 299 GOs  WROTE: /Users/colm/repos/MUSICian/notebooks/exploratory/outlier_detection/artifacts/paired-replicates.robust_cov.700//outliers_BP.png
  115 usr 178 GOs  WROTE: /Users/colm/repos/MUSICian/notebooks/exploratory/outlier_detection/artifacts/paired-replicates.robust_cov.700//outliers_CC.png
   37 usr 101 GOs  WROTE: /Users/colm/repos/MUSICian/notebooks/exploratory/outlier_detection/artifacts/paired-replicates.robust_cov.700//outliers_MF.png


In [99]:
goeaobj.wr_xlsx("{}/outliers.xlsx".format(get_experiment_artifacts()), goea_results_sig)
goeaobj.wr_txt("{}/outliers.txt".format(get_experiment_artifacts()), goea_results_sig)

    223 items WROTE: /Users/colm/repos/MUSICian/notebooks/exploratory/outlier_detection/artifacts/paired-replicates.robust_cov.700//outliers.xlsx
    223 GOEA results for   450 study items. WROTE: /Users/colm/repos/MUSICian/notebooks/exploratory/outlier_detection/artifacts/paired-replicates.robust_cov.700//outliers.txt


### Print all GO Terms related to repair

In [100]:
go2geneids_mus = objanno.get_id2gos(namespace='BP', go2geneids=True)
print("{N:} GO terms associated with mouse NCBI Entrez GeneIDs".format(N=len(go2geneids_mus)))

12788 IDs in loaded association branch, BP
12788 GO terms associated with mouse NCBI Entrez GeneIDs


In [101]:
srchhelp = GoSearch("artifacts/go-basic.obo", go2items=go2geneids_mus)

artifacts/go-basic.obo: fmt(1.2) rel(2023-01-01) 46,739 Terms; optional_attrs(comment def relationship synonym xref)


In [102]:
# Compile search pattern for 'cell cycle'
dsb_repair_all = re.compile(r'double-strand break repair|interstrand cross-link repair', flags=re.IGNORECASE)

In [103]:
# Find ALL GOs and GeneIDs associated with 'double-strand break'.

# Details of search are written to a log file
fout_allgos = "artifacts/DSB_repair_gos_mouse.txt"
with open(fout_allgos, "w") as log:
    # Search for 'double-strand break' in GO terms
    gos = srchhelp.get_matching_gos(dsb_repair_all, prt=log)
    gos.remove("GO:0006281") # remove DNA repair GO Term, as this is a blanket term for all DNA repair genes
    # gos_all = srchhelp.add_children_gos(gos)
    # Get Entrez GeneIDs for double-strand break GOs
    dsb_repair_geneids = srchhelp.get_items(gos)
print("{N} mouse NCBI Entrez GeneIDs related to 'double-strand break' found.".format(N=len(dsb_repair_geneids)))

307 mouse NCBI Entrez GeneIDs related to 'double-strand break' found.


In [104]:
dsb_genes = []
for geneid in dsb_repair_geneids:
    if geneid in geneid2symbol:
        dsb_genes.append(geneid2symbol[geneid])
len(dsb_genes)

295

In [105]:
"Fanca" in dsb_genes

True

In [106]:
# Compile search pattern for 'cell cycle'
repair_all = re.compile(r'repair', flags=re.IGNORECASE)
repair_not = re.compile(r'membrane', flags=re.IGNORECASE)
# Find ALL GOs and GeneIDs associated with 'double-strand break'.

# Details of search are written to a log file
fout_allgos = "artifacts/repair_gos_mouse.txt" 
with open(fout_allgos, "w") as log:
    # Search for 'cell cycle' in GO terms
    gos_repair_all = srchhelp.get_matching_gos(repair_all, prt=log)
    # Find any GOs matching 'cell cycle-independent' (e.g., "lysosome")
    gos_no_repair = srchhelp.get_matching_gos(repair_not, gos=gos_repair_all, prt=log)
    # Remove GO terms that are not "cell cycle" GOs
    gos = gos_repair_all.difference(gos_no_repair)
    # Add children GOs of cell cycle GOs
    # gos_all = srchhelp.add_children_gos(gos)
    # Get Entrez GeneIDs for cell cycle GOs
    repair_geneids = srchhelp.get_items(gos)
print("{N} mouse NCBI Entrez GeneIDs related to 'repair' found.".format(N=len(repair_geneids)))

954 mouse NCBI Entrez GeneIDs related to 'repair' found.


In [107]:
repair_genes = []
for geneid in repair_geneids:
    if geneid in geneid2symbol:
        repair_genes.append(geneid2symbol[geneid])
len(repair_genes)

899

In [108]:
# Compile search pattern for 'cell cycle'
ddr_all = re.compile(r'DNA damage response', flags=re.IGNORECASE)
# Find ALL GOs and GeneIDs associated with 'double-strand break'.

# Details of search are written to a log file
fout_allgos = "artifacts/DDR_gos_mouse.txt"
with open(fout_allgos, "w") as log:
    # Search for 'double-strand break' in GO terms
    gos = srchhelp.get_matching_gos(ddr_all, prt=log)
    # gos_all = srchhelp.add_children_gos(gos)
    # Get Entrez GeneIDs for double-strand break GOs
    ddr_geneids = srchhelp.get_items(gos)
print("{N} mouse NCBI Entrez GeneIDs related to 'double-strand break' found.".format(N=len(ddr_geneids)))

69 mouse NCBI Entrez GeneIDs related to 'double-strand break' found.


In [109]:
ddr_genes = []
for geneid in ddr_geneids:
    if geneid in geneid2symbol:
        ddr_genes.append(geneid2symbol[geneid])
len(ddr_genes)

66

In [110]:
len(gos)

10

In [111]:
full_outliers_df = pd.read_pickle(get_experiment_artifacts() + "augmented_outlier_results.pkl")
full_outliers_df[("Global", "isGODSBRepair")] = np.isin(full_outliers_df.index.to_list(), dsb_genes)
full_outliers_df[("Global", "isGORepair")] = np.isin(full_outliers_df.index.to_list(), repair_genes)
full_outliers_df[("Global", "isGODDR")] = np.isin(full_outliers_df.index.to_list(), ddr_genes)
outliers_df = full_outliers_df.sort_values(("Global", "Mean Score")).iloc[:500, :]

In [112]:
outliers_df

Sample,Global,Global,Global,Global,Global,T1,T1,T1,T1,T2,...,T3,T3,T3,T3,Gene Sets,Gene Sets,Gene Sets,Global,Global,Global
Measure,Max Score,Mean Consistency,Mean Score,Mean Std CLR Consistency,Mean Std Consistency,CLR Consistency,Consistency,distances,pvalues,CLR Consistency,...,CLR Consistency,Consistency,distances,pvalues,Adamson,GeneSubset2,GeneSubsetSD30,isGODSBRepair,isGORepair,isGODDR
Gene,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Gnas,-0.682165,-0.365017,-0.724782,-1.152150,-1.097203,-0.087964,-0.011177,0.967983,0.965107,-0.678992,...,-0.468922,-0.854803,0.204672,0.999063,False,False,False,False,False,False
Cdc14a,-0.688188,-0.658062,-0.713284,-1.605206,-1.781324,-0.249295,-0.485407,0.894752,0.970600,-0.737263,...,-0.847485,-0.691329,0.523468,0.991239,False,False,False,False,False,False
Espn,-0.679413,0.370946,-0.709745,0.806143,0.558242,0.341924,0.310200,0.726430,0.981487,0.812719,...,0.161350,0.130807,0.972209,0.964776,False,False,False,False,False,False
Speg,-0.670757,-0.091595,-0.693301,0.005035,-0.489151,0.445218,0.125287,1.099064,0.954187,0.246237,...,-0.455303,-0.127308,1.067146,0.956970,False,False,False,False,False,False
Tmcc2,-0.644603,-0.595784,-0.681887,-1.581179,-1.646458,-0.466684,-0.471128,1.424668,0.921596,-0.883009,...,-0.417541,-0.383930,0.992192,0.963193,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Ucn,-0.489273,-0.392723,-0.567435,-1.030664,-1.199954,-0.474781,-0.736003,1.959461,0.854726,-0.170891,...,-0.404211,-0.266168,1.707091,0.888005,False,False,False,False,False,False
Id4,-0.407423,-0.027436,-0.567366,-0.295128,-0.326538,-0.085838,0.396985,1.581146,0.903520,-0.025999,...,0.015124,-0.467283,1.152882,0.949317,False,False,False,False,False,False
Pepd,-0.340463,-0.437536,-0.567363,-0.551726,-1.286981,-0.539444,-0.573926,0.792931,0.977482,0.426140,...,-0.304368,-0.650044,1.129592,0.951451,False,False,False,False,False,False
Ccdc178,-0.504646,-0.352856,-0.567157,-0.762499,-1.089405,-0.169156,-0.504584,2.537201,0.770882,0.211746,...,-0.782181,-0.814613,2.889173,0.717067,False,False,False,False,False,False


In [113]:
print("{}/{} Genes marked is DSB repair in GO".format(outliers_df[("Global", "isGODSBRepair")].sum(), len(dsb_genes)))

1/295 Genes marked is DSB repair in GO


In [114]:
# Plot subset starting from these significant GO terms
goid_subset = [
    'GO:0000724', # BP double-strand break repair via HR
    'GO:0006303', # BP double-strand break repair via NHEJ
    'GO:0036297', # BP intra-strand cross-link repair
    'GO:0006302', # BP double-strand break repair
    'GO:0006281', # BP DNA repair
    'GO:0000727', # BP double-strand break repair via break induced replication
]
plot_gos(get_experiment_artifacts() + "/top_outliers_subset_BP.pdf", 
    goid_subset, # Source GO ids
    obodag, 
    id2symbol=geneid2symbol, # Print study gene Symbols, not Entrez GeneIDs
    study_items=15, # Only only 6 gene Symbols max on GO terms
    items_p_line=3, # Print 3 genes per line
    goea_results=goea_results_all) # Use pvals for coloring

    6 usr  29 GOs  WROTE: /Users/colm/repos/MUSICian/notebooks/exploratory/outlier_detection/artifacts/paired-replicates.robust_cov.700//top_outliers_subset_BP.pdf


In [115]:
full_outliers_df.to_pickle("{}/augmented_with_GO_outlier_results.pkl".format(get_experiment_artifacts()))
full_outliers_df.shape

(18078, 23)

In [116]:
full_outliers_df

Sample,Global,Global,Global,Global,Global,T1,T1,T1,T1,T2,...,T3,T3,T3,T3,Gene Sets,Gene Sets,Gene Sets,Global,Global,Global
Measure,Max Score,Mean Consistency,Mean Score,Mean Std CLR Consistency,Mean Std Consistency,CLR Consistency,Consistency,distances,pvalues,CLR Consistency,...,CLR Consistency,Consistency,distances,pvalues,Adamson,GeneSubset2,GeneSubsetSD30,isGODSBRepair,isGORepair,isGODDR
Gene,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
0610009B22Rik,-0.202316,-0.263600,-0.487511,-0.864817,-0.918179,-0.849354,-0.897694,0.169394,0.999409,-0.074988,...,0.141639,-0.077164,6.205324,0.286750,False,False,False,False,False,False
0610010K14Rik,-0.506267,0.404967,-0.554160,0.147015,0.639911,0.689295,0.672534,1.420544,0.922053,-0.149701,...,-0.117280,0.561789,2.871384,0.719807,False,False,False,False,False,False
0610030E20Rik,-0.096154,0.068685,-0.334093,-0.104114,-0.108152,0.366741,0.528662,8.092794,0.151195,-0.132531,...,-0.118037,-0.279202,3.765686,0.583620,False,False,False,False,False,False
0610040J01Rik,0.585708,0.337484,0.246902,-0.339103,0.485153,0.380555,0.580776,12.609331,0.027328,-0.650751,...,0.090293,0.499054,14.848909,0.011028,False,False,False,False,False,False
1110004F10Rik,0.243536,-0.194733,0.024213,-0.119202,-0.727883,-0.216158,-0.155832,9.343101,0.096137,0.086380,...,0.283791,-0.373509,11.095744,0.049514,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Znhit1,0.521094,0.083298,0.292026,1.047208,-0.055802,,,,,0.612933,...,0.474388,-0.497556,9.115033,0.104563,True,False,False,False,True,True
Zranb2,0.953777,0.065813,0.670830,0.916843,-0.120845,,,,,0.351070,...,0.631090,0.308406,18.886149,0.002018,False,False,False,False,False,False
Zrsr1,0.251496,-0.220415,-0.017179,-0.678197,-0.742873,,,,,-0.582349,...,0.157301,-0.485046,11.183051,0.047869,False,False,False,False,False,False
Zup1,0.072598,-0.140818,-0.145972,-0.724366,-0.572357,,,,,-0.206005,...,-0.281189,-0.186564,4.425913,0.489857,False,False,False,False,False,False
