Notebook to select genes and species
====================================

### Import yspecies utility classes and enums ###

In [1]:
from typing import *
from yspecies import *
from yspecies.enums import *
from yspecies.dataset import *
from yspecies.misc import *

## Parameters cell ##
Parameters are overiddent by papermill when run inside DVC stages

In [2]:
# Parameters
drop_species = ["Bos_grunniens", "Ornithorhynchus_anatinus", "Erinaceus_europaeus"]
min_samples_per_species = 2
tissues = ["Heart", "Liver", "Kidney", "Brain", "Lung", "Skin", "Blood"]
drop_samples = ["SRR032135","SRR5928355","SRR5928357","SRR5928359","SRR306394","SRR306397","SRR306400" ,"SRR8309416","SRR8309417","SRR9066909","SRR5767265","SRR5767266","SRR5767268","SRR5767269","SRR5767270","SRR867587" ,"SRR908037" ,"SRR908038" ]

orthology =  Orthology.one2oneplus_directed.name
reference_species = "Homo_sapiens"
animal_class = AnimalClass.mammals.name
na_threshold = 0.1

reading parameters

In [3]:
ortho = Orthology[orthology]
cl = AnimalClass[animal_class]

#### Load dependencies ####

In [4]:
from dataclasses import dataclass
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
#settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import pprint
pp = pprint.PrettyPrinter(indent=4)

#### Load pathes ####

In [6]:
from pathlib import Path
locations: Locations = Locations("./") if Path("./data").exists() else Locations("../")

## Load gene tables ##

In [7]:
load_path = locations.input.genes.by_class  / cl.name.capitalize() / (ortho.name + ".tsv")
load_path

PosixPath('../data/input/genes/by_animal_class/Mammalia/one2oneplus_directed.tsv')

### Load species and samples ###

In [8]:
species = load_table(locations.input.species)
species_in_class = species[species.animal_class.eq(cl.name)]
print("Selected "+ cl.name + " class with " + str(species_in_class.shape[0]) + " species" )
species_in_class.head(3)

Selected Mammalia class with 48 species


Unnamed: 0,species,common_name,animal_class,order,family,lifespan,ensembl_url,mass_g,metabolic_rate,temperature,temperature_kelvin,gestation_days,taxon,female_maturity_days,male_maturity_days,litters_per_year,inter_birth_interval,birth_weight_g,weaning_weight_g
10,Homo_sapiens,Human,Mammalia,Primates,Hominidae,122.5,https://www.ensembl.org/Homo_sapiens,70000.0,82.78,37.0,310.15,280.0,taxon#9606,1.0,5110.0,0.3,639.0,3312.5,
11,Loxodonta_africana,African bush elephant,Mammalia,Proboscidea,Elephantidae,65.0,https://www.ensembl.org/Loxodonta_africana,,,36.2,309.35,670.0,taxon#9785,1.0,3650.0,0.2,1707.0,105000.0,
12,Gorilla_gorilla,Gorilla,Mammalia,Primates,Hominidae,60.1,https://www.ensembl.org/Gorilla_gorilla,,,35.5,308.65,256.0,taxon#9595,1.0,4015.0,0.3,1397.0,2061.4,


In [9]:
samples = load_table(locations.input.samples)
samples_ext = samples.merge(species, on="species", how="inner")
samples_in_class = samples_ext[samples_ext["animal_class"] ==  cl.name]
selected_samples = samples_in_class[~samples_in_class.run.isin(drop_samples) & ~samples_in_class.run.isin(drop_samples)]
samples_in_class.head(3)

Unnamed: 0,bioproject,series,run,species,tissue,sample_name,characteristics,sequencer,age,sex,tumor,source,study,study_title,salmon_version,library_layout,library_selection,library_strategy,lib_type,bootstrap,protocol,common_name,animal_class,order,family,lifespan,ensembl_url,mass_g,metabolic_rate,temperature,temperature_kelvin,gestation_days,taxon,female_maturity_days,male_maturity_days,litters_per_year,inter_birth_interval,birth_weight_g,weaning_weight_g
0,PRJNA247712,PRJNA247712,SRR1287653,Ailuropoda_melanoleuca,Blood,SL01,no;Model organism or animal;19;female;blood;SL...,Illumina_HiSeq_2000,19,female,no,blood,https://trace.ncbi.nlm.nih.gov/Traces/sra/?stu...,The giant panda blood Transcriptome,1.1.0,PAIRED,PCR,RNA-Seq,A,96,,Giant panda,Mammalia,Carnivora,Ursidae,36.8,https://www.ensembl.org/Ailuropoda_melanoleuca,,,,,48.0,taxon#9646,1.5,2192.0,0.7,548.0,110.0,
1,PRJNA247712,PRJNA247712,SRR1287654,Ailuropoda_melanoleuca,Blood,XB01,no;Model organism or animal;12;male;blood;XB01...,Illumina_HiSeq_2000,12,male,no,blood,https://trace.ncbi.nlm.nih.gov/Traces/sra/?stu...,The giant panda blood Transcriptome,1.1.0,PAIRED,PCR,RNA-Seq,A,96,,Giant panda,Mammalia,Carnivora,Ursidae,36.8,https://www.ensembl.org/Ailuropoda_melanoleuca,,,,,48.0,taxon#9646,1.5,2192.0,0.7,548.0,110.0,
2,PRJNA247712,PRJNA247712,SRR1287655,Ailuropoda_melanoleuca,Blood,XB02,no;Model organism or animal;6;female;blood;XB0...,Illumina_HiSeq_2000,6,female,no,blood,https://trace.ncbi.nlm.nih.gov/Traces/sra/?stu...,The giant panda blood Transcriptome,1.1.0,PAIRED,PCR,RNA-Seq,A,128,,Giant panda,Mammalia,Carnivora,Ursidae,36.8,https://www.ensembl.org/Ailuropoda_melanoleuca,,,,,48.0,taxon#9646,1.5,2192.0,0.7,548.0,110.0,


In [10]:
by_tissue_count_initial = samples_in_class.groupby(["tissue","species"]).run.count().sort_values(ascending=False) #initial (before dropping species and tissues)
by_tissue_count_initial

tissue       species                   
Liver        Rattus_norvegicus             21
Brain        Mus_musculus                  16
Liver        Mus_musculus                  15
Brain        Rattus_norvegicus             15
Liver        Mesocricetus_auratus          14
Brain        Cavia_porcellus               14
             Homo_sapiens                  12
Liver        Monodelphis_domestica         11
             Microcebus_murinus            10
Brain        Cavia_aperea                  10
Kidney       Rattus_norvegicus              9
Brain        Ornithorhynchus_anatinus       9
Liver        Mus_caroli                     8
Brain        Pan_paniscus                   7
Heart        Monodelphis_domestica          7
Kidney       Monodelphis_domestica          7
Heart        Ornithorhynchus_anatinus       7
Brain        Monodelphis_domestica          7
Kidney       Ornithorhynchus_anatinus       7
Blood        Tursiops_truncatus             6
Lung         Monodelphis_domestica      

### Check issues with species annotations ###
Some species may need annotations (for instance lifespan is not known)

In [11]:
samples_anti = samples.merge(species, on="species", how='outer', indicator=True)
samples_species = samples_anti[['_merge','run', 'species', 'tissue', 'sample_name',
       'characteristics', 'sequencer', 'age', 'sex',"species"]].sort_values(by="_merge")
need_annotation = samples_species[samples_species["_merge"]=="left_only"]
print(need_annotation.shape)
need_annotation

(2, 10)


Unnamed: 0,_merge,run,species,tissue,sample_name,characteristics,sequencer,age,sex,species.1
413,left_only,SRR1552212,Notamacropus_eugenii,Liver,meu_daughter,no;Missing;1656;Model organism or animal;12 we...,Illumina_HiSeq_2000,12 weeks,female,Notamacropus_eugenii
412,left_only,SRR1041778,Notamacropus_eugenii,Liver,GSM1278059,no;individual 1;liver;liver;male,Illumina_HiSeq_2000,,,Notamacropus_eugenii


### Select samples ###

In [12]:
def filter_samples(samples: pd.DataFrame, tissues: List[str], drop_species: List[str], drop_samples: List[str]) -> Tuple[pd.DataFrame,pd.DataFrame]:
    if (tissues == ["all"] or tissues == [] or tissues is None):
        filter = ~samples.run.isin(drop_samples)  & ~samples.species.isin(drop_species)
    else:
        filter = ~samples.run.isin(drop_samples) & ~samples.species.isin(drop_species) & samples.tissue.isin(tissues)
    return (samples[filter].set_index("run"),samples[~filter].set_index("run") )

In [13]:
#tissues = ["Heart", "Liver", "Kidney", "Brain", "Lung"]
(samples_filtered, samples_dropped) = filter_samples(samples_in_class, tissues, drop_species, drop_samples)
if (not min_samples_per_species is None) and min_samples_per_species > 1:    
    by_species_count = samples_in_class.groupby(["species"]).run.count().sort_values(ascending=False)   
    to_drop = by_species_count[by_species_count <= min_samples_per_species].index.to_list()
    selected_samples = samples_filtered[~samples_filtered.species.isin(to_drop)]    
    extra_dropped = samples_filtered[samples_filtered.species.isin(to_drop)]
    samples_dropped = pd.concat([samples_dropped, extra_dropped])
else:
    selected_samples = samples_filtered
tab([["initial","selected", "dropped"],[samples_in_class.shape,samples_filtered.shape,samples_dropped.shape]])
selected_samples.head(3)

0,1,2
initial,selected,dropped
"(560, 39)","(466, 38)","(101, 38)"


Unnamed: 0_level_0,bioproject,series,species,tissue,sample_name,characteristics,sequencer,age,sex,tumor,source,study,study_title,salmon_version,library_layout,library_selection,library_strategy,lib_type,bootstrap,protocol,common_name,animal_class,order,family,lifespan,ensembl_url,mass_g,metabolic_rate,temperature,temperature_kelvin,gestation_days,taxon,female_maturity_days,male_maturity_days,litters_per_year,inter_birth_interval,birth_weight_g,weaning_weight_g
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
SRR1287653,PRJNA247712,PRJNA247712,Ailuropoda_melanoleuca,Blood,SL01,no;Model organism or animal;19;female;blood;SL...,Illumina_HiSeq_2000,19,female,no,blood,https://trace.ncbi.nlm.nih.gov/Traces/sra/?stu...,The giant panda blood Transcriptome,1.1.0,PAIRED,PCR,RNA-Seq,A,96,,Giant panda,Mammalia,Carnivora,Ursidae,36.8,https://www.ensembl.org/Ailuropoda_melanoleuca,,,,,48.0,taxon#9646,1.5,2192.0,0.7,548.0,110.0,
SRR1287654,PRJNA247712,PRJNA247712,Ailuropoda_melanoleuca,Blood,XB01,no;Model organism or animal;12;male;blood;XB01...,Illumina_HiSeq_2000,12,male,no,blood,https://trace.ncbi.nlm.nih.gov/Traces/sra/?stu...,The giant panda blood Transcriptome,1.1.0,PAIRED,PCR,RNA-Seq,A,96,,Giant panda,Mammalia,Carnivora,Ursidae,36.8,https://www.ensembl.org/Ailuropoda_melanoleuca,,,,,48.0,taxon#9646,1.5,2192.0,0.7,548.0,110.0,
SRR1287655,PRJNA247712,PRJNA247712,Ailuropoda_melanoleuca,Blood,XB02,no;Model organism or animal;6;female;blood;XB0...,Illumina_HiSeq_2000,6,female,no,blood,https://trace.ncbi.nlm.nih.gov/Traces/sra/?stu...,The giant panda blood Transcriptome,1.1.0,PAIRED,PCR,RNA-Seq,A,128,,Giant panda,Mammalia,Carnivora,Ursidae,36.8,https://www.ensembl.org/Ailuropoda_melanoleuca,,,,,48.0,taxon#9646,1.5,2192.0,0.7,548.0,110.0,


In [14]:
species_in_samples = selected_samples.species.drop_duplicates()#.to_list()
species_in_samples.shape
selected_species = species.set_index("species").loc[species_in_samples]
dropped_species = species_in_class[~species_in_class.species.isin(species_in_samples)].set_index("species")
print("droped species"+str(dropped_species.index.to_list()))

tab([["species_in_samples", "selected_species"],[str(species_in_samples.shape), str(selected_species.shape)]])


droped species['Pongo_pygmaeus', 'Lynx_canadensis', 'Bos_grunniens', 'Ornithorhynchus_anatinus', 'Dasypus_novemcinctus', 'Chinchilla_lanigera', 'Sarcophilus_harrisii', 'Erinaceus_europaeus']


0,1
species_in_samples,selected_species
"(40,)","(40, 18)"


In [15]:
species_in_class[~species_in_class.species.isin(species_in_samples)]

Unnamed: 0,species,common_name,animal_class,order,family,lifespan,ensembl_url,mass_g,metabolic_rate,temperature,temperature_kelvin,gestation_days,taxon,female_maturity_days,male_maturity_days,litters_per_year,inter_birth_interval,birth_weight_g,weaning_weight_g
14,Pongo_pygmaeus,Orangutan,Mammalia,Primates,Hominidae,59.0,https://www.ensembl.org/Pongo_abelii,,,,,249.0,taxon#9602,1.0,2555.0,0.2,1414.0,1736.5,
28,Lynx_canadensis,Canada lynx,Mammalia,Carnivora,Felidae,26.8,https://www.ensembl.org/Lynx_canadensis,,,,,63.0,taxon#61383,3.5,573.0,1.0,365.0,204.0,
29,Bos_grunniens,Yak,Mammalia,Artiodactyla,Bovidae,26.3,https://www.ensembl.org/Bos_grunniens,,,38.0,311.15,274.0,taxon#30521,1.0,738.0,0.8,,18000.0,
34,Ornithorhynchus_anatinus,Duck-billed platypus,Mammalia,Monotremata,Ornithorhynchidae,22.6,https://www.ensembl.org/Ornithorhynchus_anatinus,1030.3,1.931,34.0,307.15,17.0,taxon#9258,2.0,548.0,,365.0,,
35,Dasypus_novemcinctus,Nine-banded armadillo,Mammalia,Cingulata,Dasypodidae,22.3,https://www.ensembl.org/Dasypus_novemcinctus,3413.7,4.655,34.5,307.65,133.0,taxon#9361,4.0,365.0,1.0,,66.0,
43,Chinchilla_lanigera,Long-tailed chinchilla,Mammalia,Rodentia,Chinchillidae,17.2,https://www.ensembl.org/Chinchilla_lanigera,436.7,1.31,34.7,307.85,111.0,taxon#34839,2.0,240.0,2.0,,35.0,
44,Sarcophilus_harrisii,Tasmanian devil,Mammalia,Dasyuromorphia,Dasyuridae,13.0,https://www.ensembl.org/Sarcophilus_harrisii,6126.8,8.664,35.8,308.95,26.0,taxon#9305,3.0,730.0,1.0,365.0,0.024,
46,Erinaceus_europaeus,Western European hedgehog,Mammalia,Erinaceomorpha,Erinaceidae,11.7,https://www.ensembl.org/Erinaceus_europaeus,1213.5,2.434,34.0,307.15,30.0,taxon#9365,1.5,253.0,1.6,,14.7,


Count how many samples do we have by tissue and species

In [16]:
def samples_in(*cols):
    spc = by_species_count.set_index("species")
    return sum([spc.loc[c].run for c in cols])


In [17]:
by_species_tissue_count = selected_samples.sort_values(["species", "tissue"], ascending=False).groupby(["tissue", "common_name", "species"]).size()
by_species_tissue_count 

tissue  common_name                     species                   
Blood   African bush elephant           Loxodonta_africana             4
        Black snub-nosed monkey         Rhinopithecus_bieti            1
        Bottlenosed dolphin             Tursiops_truncatus             6
        Domestic cattle                 Bos_taurus                     1
        Giant panda                     Ailuropoda_melanoleuca         3
        Horse                           Equus_caballus                 1
        Rhesus monkey                   Macaca_mulatta                 4
        Wild boar                       Sus_scrofa                     6
Brain   American black bear             Ursus_americanus               2
        Black snub-nosed monkey         Rhinopithecus_bieti            1
        Brazilian guinea pig            Cavia_aperea                  10
        Chimpanzee                      Pan_troglodytes                2
        Domestic cat                    Felis_catus      

# Genes #

In [18]:
genes = load_table(locations.input.genes.by_class  / cl.name.capitalize() / (ortho.name + ".tsv"), reference_species, "str")
print(genes.shape)
genes.head()

(21924, 47)


Unnamed: 0_level_0,Loxodonta_africana,Gorilla_gorilla,Pan_troglodytes,Pongo_pygmaeus,Equus_caballus,Pan_paniscus,Tursiops_truncatus,Macaca_mulatta,Macaca_fascicularis,Macaca_nemestrina,Ailuropoda_melanoleuca,Ursus_americanus,Heterocephalus_glaber,Rhinolophus_ferrumequinum,Vombatus_ursinus,Felis_catus,Sus_scrofa,Lynx_canadensis,Bos_grunniens,Canis_lupus_familiaris,Rhinopithecus_bieti,Ovis_aries,Callithrix_jacchus,Ornithorhynchus_anatinus,Dasypus_novemcinctus,Phascolarctos_cinereus,Capra_hircus,Suricata_suricatta,Bos_taurus,Aotus_nancymaae,Otolemur_garnettii,Microcebus_murinus,Chinchilla_lanigera,Sarcophilus_harrisii,Cavia_porcellus,Erinaceus_europaeus,Tupaia_belangeri,Oryctolagus_cuniculus,Ictidomys_tridecemlineatus,Meriones_unguiculatus,Cavia_aperea,Monodelphis_domestica,Mus_musculus,Mesocricetus_auratus,Rattus_norvegicus,Mus_spicilegus,Mus_caroli
Homo_sapiens,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1
ENSG00000242265,,ENSGGOG00000040792,ENSPTRG00000023648,,ENSECAG00000001603,ENSPPAG00000030053,ENSTTRG00000003114,ENSMMUG00000032524,ENSMFAG00000045560,ENSMNEG00000037924,ENSAMEG00000019304,,,ENSRFEG00010011916,ENSVURG00010024800,ENSFCAG00000041002,ENSSSCG00000036049,ENSLCNG00005003562,ENSBGRG00000010269,ENSCAFG00030017055;ENSCAFG00040013027;ENSCAFG0...,ENSRBIG00000013492,ENSOARG00000002475,ENSCJAG00000033086,,ENSDNOG00000036759,ENSPCIG00000028966,,ENSSSUG00005000112,ENSBTAG00000053735,ENSANAG00000021637,ENSOGAG00000031169,ENSMICG00000001469,ENSCLAG00000017695,,,,,ENSOCUG00000027535,ENSSTOG00000022801,ENSMUGG00000022866;ENSMUGG00000022860,ENSCAPG00000010493,,ENSMUSG00000092035,ENSMAUG00000018656,,ENSMSIG00000000323,MGP_CAROLIEiJ_G0028078
ENSG00000139990,ENSLAFG00000023301,ENSGGOG00000009358,ENSPTRG00000006478,ENSPPYG00000005936,ENSECAG00000010524,ENSPPAG00000042013,ENSTTRG00000013941,ENSMMUG00000019346,ENSMFAG00000003413,ENSMNEG00000028207,ENSAMEG00000014443,ENSUAMG00000010770,ENSHGLG00100003336;ENSHGLG00100017958;ENSHGLG0...,ENSRFEG00010004811,ENSVURG00010023351,ENSFCAG00000029516,ENSSSCG00000039701,ENSLCNG00005004208,ENSBGRG00000007044,ENSCAFG00030015021;ENSCAFG00040013486;ENSCAFG0...,ENSRBIG00000044703,ENSOARG00000021180,ENSCJAG00000042954,ENSOANG00000012620,ENSDNOG00000018872,ENSPCIG00000010372,ENSCHIG00000021848,ENSSSUG00005006529,ENSBTAG00000048519,ENSANAG00000030404,ENSOGAG00000008452,ENSMICG00000046667,ENSCLAG00000006604,ENSSHAG00000000665,ENSCPOG00000032392,ENSEEUG00000011616,,ENSOCUG00000010281,ENSSTOG00000026551,ENSMUGG00000023917,ENSCAPG00000014053,ENSMODG00000010219,ENSMUSG00000049106,ENSMAUG00000004350,ENSRNOG00000004556,ENSMSIG00000015740,MGP_CAROLIEiJ_G0017895
ENSG00000073921,ENSLAFG00000000754,ENSGGOG00000009116,ENSPTRG00000004141,ENSPPYG00000003744,ENSECAG00000019264,ENSPPAG00000036162,ENSTTRG00000001554,ENSMMUG00000019618,ENSMFAG00000002978,ENSMNEG00000029285,ENSAMEG00000008778,ENSUAMG00000018059,ENSHGLG00100018440,ENSRFEG00010012989,ENSVURG00010006226,ENSFCAG00000029123,ENSSSCG00000014913,ENSLCNG00005007055,ENSBGRG00000024679,ENSCAFG00030009174;ENSCAFG00000004486;ENSCAFG0...,ENSRBIG00000028102,ENSOARG00000004654,ENSCJAG00000014262,ENSOANG00000013205,ENSDNOG00000038474,ENSPCIG00000018862,ENSCHIG00000015634,ENSSSUG00005014072,ENSBTAG00000001657,ENSANAG00000019901,ENSOGAG00000013950,ENSMICG00000017261,ENSCLAG00000014073,ENSSHAG00000004629,ENSCPOG00000020758,ENSEEUG00000013932,ENSTBEG00000000600,ENSOCUG00000023809,ENSSTOG00000008689,ENSMUGG00000002616,ENSCAPG00000000355,ENSMODG00000004297,ENSMUSG00000039361,ENSMAUG00000011448,ENSRNOG00000018322,ENSMSIG00000002381,MGP_CAROLIEiJ_G0029981
ENSG00000139687,ENSLAFG00000013579,ENSGGOG00000016440,ENSPTRG00000005864,ENSPPYG00000005357,ENSECAG00000014575,ENSPPAG00000002165,ENSTTRG00000004232,ENSMMUG00000007891,ENSMFAG00000004544,ENSMNEG00000030265,ENSAMEG00000003640,ENSUAMG00000011956,ENSHGLG00100008595,ENSRFEG00010000766,ENSVURG00010023466,ENSFCAG00000024529,ENSSSCG00000009401,ENSLCNG00005000165,ENSBGRG00000020100,ENSCAFG00040011728;ENSCAFG00000004436;ENSCAFG0...,ENSRBIG00000033901,ENSOARG00000008246,ENSCJAG00000009076,ENSOANG00000031580;ENSOANG00000015555,ENSDNOG00000013345,ENSPCIG00000029307,ENSCHIG00000004374,ENSSSUG00005003247,ENSBTAG00000006640,ENSANAG00000022056,ENSOGAG00000007461,ENSMICG00000001747,ENSCLAG00000006971,ENSSHAG00000014024,ENSCPOG00000008683,ENSEEUG00000000076,ENSTBEG00000014454,ENSOCUG00000016404,ENSSTOG00000000927,ENSMUGG00000019124,ENSCAPG00000014795,ENSMODG00000011168,ENSMUSG00000022105,ENSMAUG00000020865,ENSRNOG00000016029,ENSMSIG00000007265,MGP_CAROLIEiJ_G0019499
ENSG00000119977,ENSLAFG00000001639,ENSGGOG00000034939,ENSPTRG00000002786,ENSPPYG00000002510,ENSECAG00000016594,ENSPPAG00000043730,ENSTTRG00000003261,ENSMMUG00000003103,ENSMFAG00000001575,ENSMNEG00000037026,ENSAMEG00000015948,ENSUAMG00000021674,ENSHGLG00100006578,ENSRFEG00010015913,ENSVURG00010001079,ENSFCAG00000023675,ENSSSCG00000010496,ENSLCNG00005009552,ENSBGRG00000025629,ENSCAFG00000008406;ENSCAFG00040023697;ENSCAFG0...,ENSRBIG00000034323,ENSOARG00000006497,ENSCJAG00000016206,ENSOANG00000019944,ENSDNOG00000017850,ENSPCIG00000020525,ENSCHIG00000021683,ENSSSUG00005007325,ENSBTAG00000011841,ENSANAG00000030005,ENSOGAG00000003757,ENSMICG00000015970,ENSCLAG00000002407,,ENSCPOG00000021974,ENSEEUG00000002616,ENSTBEG00000010577,ENSOCUG00000005964,ENSSTOG00000007961,ENSMUGG00000008987,,ENSMODG00000004976,ENSMUSG00000025008,ENSMAUG00000021808,ENSRNOG00000047112;ENSRNOG00000060994,ENSMSIG00000005758,MGP_CAROLIEiJ_G0022941


In [19]:
def gc(df: pd.DataFrame, species: pd.DataFrame = None, index_field: str = None) -> pd.DataFrame:
    frame = df.notna().sum().sort_values(ascending=False).to_frame("genes")
    result = frame.reset_index().rename( columns= {"index": "species"})
    if species is None:
        return result
    else:
        field = "common_name" if index_field is None else index_field        
        return result.merge(species, on="species", how="inner").set_index(field)

### Missing value analysis ###

In [20]:
def show_missing(df: pd.DataFrame, title: str = "missing plot", ax = None):
     return sns.heatmap(df.notnull(), cbar=False,ax=ax).set_title(title)      
  
def compare_missing(one: pd.DataFrame, two: pd.DataFrame, one_title: str, two_title: str):
    print("Missing values comparison:")
    print("before: "+str(one.shape) +" || after: "+str(two.shape))    
    print("samples lost:"+str(samples_in(drop_species)))    
    print("Figures:")
    fig, axs = plt.subplots(1,2,figsize=(20,12))
    plt.subplots_adjust(left=0.125, bottom=0.3, right=0.9, top=1.1, wspace=0.3, hspace=0.8)        
    sns.heatmap(one.notnull(), cbar=False,ax=axs[0]).set_title(one_title)    
    sns.heatmap(two.notnull(), cbar=False,ax=axs[1]).set_title(two_title)


In [21]:
#todo: fix issues with charts

### Genes with deleted species and too many NA ###

In [22]:
filtered_genes = genes.drop(columns = dropped_species.index).dropna(thresh=0.001)
filtered_genes.shape

(21882, 39)

Computing the number of not_na_values

In [23]:
not_na_number = (1-na_threshold) * filtered_genes.shape[1]
not_na_number

35.1

In [25]:
selected_genes = genes.drop(columns = dropped_species.index).dropna(thresh=not_na_number)
print("before = "+str(genes.dropna(thresh=1).shape))
print("after = "+str(selected_genes.shape))
selected_genes.head()

before = (21924, 47)
after = (12243, 39)


Unnamed: 0_level_0,Loxodonta_africana,Gorilla_gorilla,Pan_troglodytes,Equus_caballus,Pan_paniscus,Tursiops_truncatus,Macaca_mulatta,Macaca_fascicularis,Macaca_nemestrina,Ailuropoda_melanoleuca,Ursus_americanus,Heterocephalus_glaber,Rhinolophus_ferrumequinum,Vombatus_ursinus,Felis_catus,Sus_scrofa,Canis_lupus_familiaris,Rhinopithecus_bieti,Ovis_aries,Callithrix_jacchus,Phascolarctos_cinereus,Capra_hircus,Suricata_suricatta,Bos_taurus,Aotus_nancymaae,Otolemur_garnettii,Microcebus_murinus,Cavia_porcellus,Tupaia_belangeri,Oryctolagus_cuniculus,Ictidomys_tridecemlineatus,Meriones_unguiculatus,Cavia_aperea,Monodelphis_domestica,Mus_musculus,Mesocricetus_auratus,Rattus_norvegicus,Mus_spicilegus,Mus_caroli
Homo_sapiens,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1
ENSG00000139990,ENSLAFG00000023301,ENSGGOG00000009358,ENSPTRG00000006478,ENSECAG00000010524,ENSPPAG00000042013,ENSTTRG00000013941,ENSMMUG00000019346,ENSMFAG00000003413,ENSMNEG00000028207,ENSAMEG00000014443,ENSUAMG00000010770,ENSHGLG00100003336;ENSHGLG00100017958;ENSHGLG0...,ENSRFEG00010004811,ENSVURG00010023351,ENSFCAG00000029516,ENSSSCG00000039701,ENSCAFG00030015021;ENSCAFG00040013486;ENSCAFG0...,ENSRBIG00000044703,ENSOARG00000021180,ENSCJAG00000042954,ENSPCIG00000010372,ENSCHIG00000021848,ENSSSUG00005006529,ENSBTAG00000048519,ENSANAG00000030404,ENSOGAG00000008452,ENSMICG00000046667,ENSCPOG00000032392,,ENSOCUG00000010281,ENSSTOG00000026551,ENSMUGG00000023917,ENSCAPG00000014053,ENSMODG00000010219,ENSMUSG00000049106,ENSMAUG00000004350,ENSRNOG00000004556,ENSMSIG00000015740,MGP_CAROLIEiJ_G0017895
ENSG00000073921,ENSLAFG00000000754,ENSGGOG00000009116,ENSPTRG00000004141,ENSECAG00000019264,ENSPPAG00000036162,ENSTTRG00000001554,ENSMMUG00000019618,ENSMFAG00000002978,ENSMNEG00000029285,ENSAMEG00000008778,ENSUAMG00000018059,ENSHGLG00100018440,ENSRFEG00010012989,ENSVURG00010006226,ENSFCAG00000029123,ENSSSCG00000014913,ENSCAFG00030009174;ENSCAFG00000004486;ENSCAFG0...,ENSRBIG00000028102,ENSOARG00000004654,ENSCJAG00000014262,ENSPCIG00000018862,ENSCHIG00000015634,ENSSSUG00005014072,ENSBTAG00000001657,ENSANAG00000019901,ENSOGAG00000013950,ENSMICG00000017261,ENSCPOG00000020758,ENSTBEG00000000600,ENSOCUG00000023809,ENSSTOG00000008689,ENSMUGG00000002616,ENSCAPG00000000355,ENSMODG00000004297,ENSMUSG00000039361,ENSMAUG00000011448,ENSRNOG00000018322,ENSMSIG00000002381,MGP_CAROLIEiJ_G0029981
ENSG00000139687,ENSLAFG00000013579,ENSGGOG00000016440,ENSPTRG00000005864,ENSECAG00000014575,ENSPPAG00000002165,ENSTTRG00000004232,ENSMMUG00000007891,ENSMFAG00000004544,ENSMNEG00000030265,ENSAMEG00000003640,ENSUAMG00000011956,ENSHGLG00100008595,ENSRFEG00010000766,ENSVURG00010023466,ENSFCAG00000024529,ENSSSCG00000009401,ENSCAFG00040011728;ENSCAFG00000004436;ENSCAFG0...,ENSRBIG00000033901,ENSOARG00000008246,ENSCJAG00000009076,ENSPCIG00000029307,ENSCHIG00000004374,ENSSSUG00005003247,ENSBTAG00000006640,ENSANAG00000022056,ENSOGAG00000007461,ENSMICG00000001747,ENSCPOG00000008683,ENSTBEG00000014454,ENSOCUG00000016404,ENSSTOG00000000927,ENSMUGG00000019124,ENSCAPG00000014795,ENSMODG00000011168,ENSMUSG00000022105,ENSMAUG00000020865,ENSRNOG00000016029,ENSMSIG00000007265,MGP_CAROLIEiJ_G0019499
ENSG00000119977,ENSLAFG00000001639,ENSGGOG00000034939,ENSPTRG00000002786,ENSECAG00000016594,ENSPPAG00000043730,ENSTTRG00000003261,ENSMMUG00000003103,ENSMFAG00000001575,ENSMNEG00000037026,ENSAMEG00000015948,ENSUAMG00000021674,ENSHGLG00100006578,ENSRFEG00010015913,ENSVURG00010001079,ENSFCAG00000023675,ENSSSCG00000010496,ENSCAFG00000008406;ENSCAFG00040023697;ENSCAFG0...,ENSRBIG00000034323,ENSOARG00000006497,ENSCJAG00000016206,ENSPCIG00000020525,ENSCHIG00000021683,ENSSSUG00005007325,ENSBTAG00000011841,ENSANAG00000030005,ENSOGAG00000003757,ENSMICG00000015970,ENSCPOG00000021974,ENSTBEG00000010577,ENSOCUG00000005964,ENSSTOG00000007961,ENSMUGG00000008987,,ENSMODG00000004976,ENSMUSG00000025008,ENSMAUG00000021808,ENSRNOG00000047112;ENSRNOG00000060994,ENSMSIG00000005758,MGP_CAROLIEiJ_G0022941
ENSG00000242866,ENSLAFG00000031348,ENSGGOG00000004658,ENSPTRG00000006998;ENSPTRG00000048910,ENSECAG00000009715,ENSPPAG00000038965,ENSTTRG00000005846,ENSMMUG00000021840,ENSMFAG00000045628,ENSMNEG00000011427,ENSAMEG00000015280,ENSUAMG00000014982,ENSHGLG00100003594,ENSRFEG00010004480,ENSVURG00010005917,ENSFCAG00000006605,ENSSSCG00000004702,ENSCAFG00030016976;ENSCAFG00040019489;ENSCAFG0...,ENSRBIG00000043785,ENSOARG00000010634,ENSCJAG00000002654,ENSPCIG00000018293,ENSCHIG00000022061,ENSSSUG00005021029,ENSBTAG00000007503,ENSANAG00000032033,ENSOGAG00000006099,ENSMICG00000010153,ENSCPOG00000014222,,ENSOCUG00000003112,ENSSTOG00000006829,ENSMUGG00000019615,ENSCAPG00000014834,ENSMODG00000017770,ENSMUSG00000033498,ENSMAUG00000018007,ENSRNOG00000014845,ENSMSIG00000028866,MGP_CAROLIEiJ_G0024190


# Gene Expressions #

In [26]:
expressions = load_table(locations.input.expressions.by_class / (cl.name + ".tsv"), "reference_gene")
expressions.head(3)

Unnamed: 0_level_0,SRR1521445,SRR306838,SRR306839,SRR306840,SRR306841,SRR306842,SRR306843,SRR306844,SRR306845,SRR306847,SRR3715877,SRR5008362,SRR5120939,SRR5120940,SRR5885325,SRR5961830,SRR5961875,SRR5961972,SRR787277,SRR8702484,SRR8991100,SRR6307195,SRR6307196,SRR6307197,SRR6307204,SRR306800,SRR306801,SRR306802,SRR306803,SRR306804,SRR306805,SRR306806,SRR306807,SRR306808,SRR306809,SRR306810,SRR649365,DRR031591,SRR032135,SRR1510173,SRR1758916,SRR1758921,SRR1758922,SRR2040586,SRR306791,SRR306793,SRR095666,SRR3403827,SRR3403828,SRR4039470,SRR4039471,SRR4039473,SRR636850,SRR636900,SRR636945,SRR306826,SRR306827,SRR306828,SRR306829,SRR306831,SRR306832,SRR306833,SRR306834,SRR306835,SRR306836,SRR8750397,SRR8750398,SRR8750399,SRR3195085,SRR3195086,SRR3195096,SRR3195103,SRR3195112,SRR3195113,SRR5080320,SRR5080324,SRR5080392,SRR5080564,SRR1047652,SRR223518,SRR223519,SRR223520,SRR223521,SRR223522,SRR299126,SRR3197665,SRR5990543,SRR6007051,SRR6073386,SRR6073415,SRR1758941,SRR1758942,SRR1758943,SRR1758954,SRR1758957,SRR223512,SRR223513,SRR223514,SRR223515,SRR223516,SRR223517,SRR1759005,SRR1759006,SRR1759007,SRR1759011,SRR1759012,SRR1287653,SRR1287654,SRR1287655,SRR2308103,SRR636887,SRR636888,SRR636932,SRR636933,SRR636977,SRR636978,ERR1331676,SRR2124226,SRR306394,SRR306395,SRR306396,SRR306397,SRR306398,SRR306399,SRR306400,SRR306401,SRR306402,SRR306403,SRR306404,SRR306406,SRR1048140,SRR1048142,SRR2754983,SRR2757329,ERR1331678,ERR1331679,ERR2716205,ERR2716206,ERR2716207,ERR2716208,ERR2716211,ERR2716213,ERR2716214,SRR1334837,SRR636854,SRR636855,SRR636856,SRR636904,SRR636905,SRR636906,SRR636948,SRR636949,SRR636950,SRR3160008,SRR3160018,SRR3160041,SRR3160052,SRR3160061,SRR3194634,SRR3194682,SRR3194698,SRR3194730,SRR3194765,SRR3194791,SRR653846,SRR9720682,SRR1747117,SRR1747118,SRR1747119,SRR1747120,SRR1747121,SRR361433,SRR361434,SRR361435,SRR361436,SRR5118366,SRR5190423,SRR5190425,SRR5190427,SRR5190428,SRR5190430,SRR5190431,SRR5190489,SRR5190491,SRR5190493,SRR5190507,SRR5190508,SRR5190509,SRR636842,SRR636843,SRR636892,SRR636893,SRR636937,SRR636938,SRR1300759,SRR1300763,SRR1300765,SRR1300766,SRR1300767,SRR1300768,ERR2074874,ERR2075072,ERR2075082,ERR2075792,ERR2075986,ERR489282,ERR489283,SRR1013904,SRR1758977,SRR1758978,SRR1758979,SRR1758981,SRR1758982,SRR1758983,SRR1758984,SRR1758985,SRR3109717,SRR3109718,SRR5190444,SRR5190446,SRR5928355,SRR5928357,SRR5928359,SRR765910,SRR866213,SRR306724,SRR306726,SRR306728,SRR306729,SRR306730,SRR306731,SRR306732,SRR306733,SRR306735,SRR306737,SRR5412224,SRR5412225,SRR5412226,SRR5412227,SRR5412228,SRR5412229,SRR5412230,SRR5412231,SRR5412232,SRR5412233,SRR5412234,SRR5412235,SRR5412236,SRR553592,SRR553593,SRR553594,SRR553595,SRR649381,SRR6206908,SRR6206918,SRR1205138,SRR1205218,SRR1205222,SRR1205223,SRR1205998,SRR8708135,SRR1822406,SRR3109726,SRR3109728,SRR489494,SRR5190441,SRR5190453,SRR9024741,SRR9024746,SRR9024747,SRR9024749,SRR9024753,SRR9024755,ERR1331716,SRR1981979,SRR1981981,SRR1981987,SRR1981988,SRR3109705,SRR3109709,SRR3109710,SRR3419167,SRR4444968,SRR4444969,SRR4444970,SRR4444971,SRR636839,SRR636840,SRR636934,SRR636935,SRR924544,ERR1331694,SRR1758992,SRR1758995,SRR1758996,SRR1758997,SRR1758998,SRR4444954,SRR4444955,SRR4444956,SRR4444957,SRR4444958,SRR4444959,SRR4444960,SRR4444961,SRR4249993,ERR1331704,ERR1331711,ERR1331683,ERR1331712,ERR1331715,ERR162228,ERR162229,ERR162240,ERR162246,ERR162251,ERR162267,ERR266373,ERR266376,ERR266380,ERR266381,ERR266394,SRR1200908,SRR1200909,SRR1200911,SRR636846,SRR636847,SRR636848,SRR636896,SRR636897,SRR636898,SRR636941,SRR636942,SRR636943,SRR636852,SRR636853,SRR636902,SRR636903,SRR636946,SRR636947,ERR1331667,ERR1331707,ERR1331708,ERR1331709,SRR6261040,SRR6261041,SRR6261042,SRR6261043,ERR1331671,ERR266370,SRR1786019,SRR1789057,SRR1789059,SRR1789326,SRR1789331,SRR6293952,SRR6293953,SRR6293961,SRR6293972,SRR6293973,SRR6293975,SRR636865,SRR636866,SRR636867,SRR636913,SRR636914,SRR636915,SRR636958,SRR636959,SRR636960,SRR8309416,SRR8309417,SRR9066909,SRR9066910,SRR9066919,SRR9066929,SRR9066930,SRR9066931,SRR9066934,ERR162213,ERR162217,ERR162222,ERR162245,ERR162261,ERR266353,ERR266372,ERR266378,ERR266393,ERR266398,ERR1331668,ERR1331710,ERR1331725,SRR306742,SRR306744,SRR306747,SRR306749,SRR306751,SRR306752,SRR306753,SRR306754,SRR449433,SRR449434,SRR449435,SRR449436,SRR5412205,SRR5412206,SRR5412207,SRR5412209,SRR5412210,SRR5412211,SRR5412212,SRR5412213,SRR5412215,SRR5412216,SRR5412218,SRR5952126,SRR5952127,SRR5952128,SRR5952129,SRR5952130,SRR6206899,SRR6206904,SRR6206909,SRR6206914,SRR867587,SRR908037,SRR908038,ERR2004718,ERR2004719,ERR2004720,SRR1653996,SRR1654032,SRR2925196,SRR2925197,SRR2925198,SRR2925199,SRR2925200,SRR2925201,SRR2925202,SRR2925203,SRR2925204,SRR2925205,SRR2925206,SRR2925207,SRR2925208,SRR2925209,SRR2925210,SRR2925212,SRR2925244,SRR2925245,SRR2925247,SRR2925248,SRR2925249,SRR2925251,SRR2925252,SRR2925253,SRR2925254,SRR2925255,SRR2925258,SRR2925259,SRR2925260,SRR2925267,SRR5115667,SRR5115668,SRR5115669,SRR5115678,SRR1549160,SRR1549162,SRR3468363,SRR3468366,SRR3468367,SRR3468368,SRR3468369,SRR3468372,SRR3468373,SRR3468374,SRR3468375,SRR636857,SRR636858,SRR636859,SRR636907,SRR636908,SRR636909,SRR636951,SRR636952,SRR636953,SRR1041772,SRR1170173,SRR1284264,SRR1284265,SRR1284266,SRR1284267,SRR1284274,SRR1284275,SRR1873516,SRR3144810,SRR3144811,SRR3144812,SRR3144813,SRR3144814,SRR3144815,SRR3144816,SRR3144817,SRR3144818,SRR3144819,SRR3144820,SRR3144827,SRR3144828,SRR3144830,SRR3144831,SRR3144832,SRR5291530,SRR5291531,SRR5291532,SRR5335843,SRR5335844,SRR5520660,SRR5520661,SRR5520662,SRR5520663,SRR5520664,SRR5520665,SRR5520667,SRR5520668,SRR5520669,SRR5767265,SRR5767266,SRR5767268,SRR5767269,SRR5767270,SRR594419,SRR594421,SRR594422,SRR594424,SRR594428,SRR594430,SRR594431,SRR594433,SRR594437,SRR594439,SRR594440,SRR594442,SRR954817,ERR1101653,ERR1101654,ERR1101655,ERR1990031,ERR1990032,ERR1990033,ERR1990034,ERR1990035,ERR1990036,ERR1990037,ERR1990038,ERR3350029,ERR3350030,ERR476402,ERR476404,ERR476406,ERR476408
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1,Unnamed: 502_level_1,Unnamed: 503_level_1,Unnamed: 504_level_1,Unnamed: 505_level_1,Unnamed: 506_level_1,Unnamed: 507_level_1,Unnamed: 508_level_1,Unnamed: 509_level_1,Unnamed: 510_level_1,Unnamed: 511_level_1,Unnamed: 512_level_1,Unnamed: 513_level_1,Unnamed: 514_level_1,Unnamed: 515_level_1,Unnamed: 516_level_1,Unnamed: 517_level_1,Unnamed: 518_level_1,Unnamed: 519_level_1,Unnamed: 520_level_1,Unnamed: 521_level_1,Unnamed: 522_level_1,Unnamed: 523_level_1,Unnamed: 524_level_1,Unnamed: 525_level_1,Unnamed: 526_level_1,Unnamed: 527_level_1,Unnamed: 528_level_1,Unnamed: 529_level_1,Unnamed: 530_level_1,Unnamed: 531_level_1,Unnamed: 532_level_1,Unnamed: 533_level_1,Unnamed: 534_level_1,Unnamed: 535_level_1,Unnamed: 536_level_1,Unnamed: 537_level_1,Unnamed: 538_level_1,Unnamed: 539_level_1,Unnamed: 540_level_1,Unnamed: 541_level_1,Unnamed: 542_level_1,Unnamed: 543_level_1,Unnamed: 544_level_1,Unnamed: 545_level_1,Unnamed: 546_level_1,Unnamed: 547_level_1,Unnamed: 548_level_1,Unnamed: 549_level_1,Unnamed: 550_level_1,Unnamed: 551_level_1,Unnamed: 552_level_1,Unnamed: 553_level_1,Unnamed: 554_level_1,Unnamed: 555_level_1,Unnamed: 556_level_1,Unnamed: 557_level_1,Unnamed: 558_level_1,Unnamed: 559_level_1,Unnamed: 560_level_1
ENSG00000242265,66.674,64.092,53.658,70.99,84.953,50.449,68.668,135.634,204.1,0.0,16.024,0.923,32.867,21.619,6.444,100.856,117.872,162.944,36.204,1.001,128.581,,,,,15.12,22.624,46.506,9.765,0.092,0.0,2.071,0.902,0.039,0.107,15.002,11.004,0.089,0.0,1.043,13.49,1.334,5.131,0.0,,,0.0,13.879,35.508,0.068,0.117,0.25,0.0,4.817,51.349,32.909,8.555,9.194,27.621,0.154,0.071,0.922,0.964,0.299,0.322,32.446,17.277,5.095,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,6.754,0.0,0.0,5.232,0.0,0.0,11.191,96.321,0.037,0.249,0.0,0.0,14.492,0.593,5.251,0.865,12.169,25.395,0.592,12.607,3.157,38.468,0.86,107.366,305.91,42.682,0.344,4.874,0.0,0.05,0.0,0.639,,,,,,,,,,,,,,,,,,,,,7.323,101.961,0.154,0.221,0.091,0.097,0.0,0.0,0.075,0.0,0.082,0.0,0.0,1.496,0.269,0.197,0.45,1.249,0.956,2.067,119.011,126.105,143.272,1.278,2.625,1.646,17.47,5.302,0.009,0.0,0.0,0.007,0.013,0.0,2.265,0.0,,,,,,,,,,,,,,,,,,,,,,,0.331,0.409,0.307,0.134,150.683,112.717,0.078,3.548,0.099,49.275,0.818,2.48,1.009,30.427,1.606,41.765,1.122,17.114,15.677,0.0,38.499,522.386,54.366,2.202,5.098,6.733,0.272,8.031,28.476,54.005,24.741,45.605,3.118,127.074,8.207,5.302,0.098,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,2.884,0.0,0.0,65.654,0.067,0.039,0.028,,,,,,,8.81,0.287,0.0,0.973,0.071,0.083,3.722,64.098,0.027,3.178,5.87,3.952,30.139,48.949,0.639,0.191,0.191,0.0,0.124,5.975,2.151,85.455,87.521,0.335,0.934,111.997,3.209,2.983,0.305,0.989,0.088,0.029,0.03,0.0,0.071,0.162,0.029,0.0,3.009,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.258,62.537,0.0,0.295,0.271,0.095,2.919,94.217,69.874,54.744,102.203,88.91,89.661,0.135,0.0,0.044,0.83,0.74,1.512,49.853,54.56,96.156,66.496,64.09,0.0,0.0,0.455,4.057,40.991,0.0,0.039,122.379,82.811,98.328,98.949,57.826,34.315,39.192,33.452,36.656,33.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.165,0.141,0.174,0.0,0.0,11.413,20.959,22.785,10.771,13.219,15.534,6.583,15.873,5.291,24.071,6.867,26.148,16.607,22.408,10.248,11.773,0.828,0.497,0.489,0.367,0.172,0.299,0.578,0.516,0.215,0.32,0.238,0.281,0.14,0.276,0.0,0.021,0.0,0.031,0.273,0.299,0.0,0.0,0.0,0.0,0.011,0.013,0.026,0.0,0.036,0.201,0.056,0.14,0.413,0.478,0.345,56.092,28.293,52.553,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,30.901,29.825,0.235,7.197,0.017,0.385,0.997,9.513,0.076,1.386,2.32,0.0,0.0,0.019,0.03,0.019,0.041
ENSG00000139990,11.921,43.003,12.512,16.837,12.673,15.001,11.918,18.714,16.423,0.0,45.73,4.765,37.904,33.205,10.578,26.655,20.683,29.373,26.026,5.864,1.152,23.392,23.248,28.392,26.727,7.052,15.276,15.281,9.368,2.469,1.886,14.138,5.804,7.661,4.824,98.129,7.107,1.988,0.0,4.027,24.841,15.498,18.4,10.297,12.11,20.537,10.952,23.269,26.777,6.364,6.454,8.405,10.978,22.965,25.05,18.743,10.771,7.164,14.552,4.156,2.091,4.9,8.332,7.049,3.267,16.954,12.996,9.255,2.446,5.964,2.875,5.564,5.885,12.364,3.566,4.078,3.568,5.91,38.762,42.647,19.092,39.266,55.38,37.023,0.273,45.235,23.621,31.644,30.583,36.029,39.724,31.108,37.643,23.543,49.901,16.239,15.198,29.072,19.94,125.389,18.734,45.506,35.629,28.389,17.291,39.833,19.259,12.205,16.477,3.719,4.896,4.806,19.723,22.371,13.76,18.697,22.199,13.579,12.146,7.287,7.132,22.201,5.848,5.946,19.834,10.207,10.591,5.689,7.854,11.135,11.238,28.047,7.145,10.9,15.487,19.797,17.528,17.668,14.706,14.579,14.611,21.431,22.212,1.922,20.733,27.188,22.578,15.754,13.462,30.296,20.972,23.783,19.943,11.099,15.289,29.002,28.343,37.124,19.688,7.573,25.361,12.725,17.866,14.089,18.822,253.946,,,,,,,,,,,,,,,,,,,,,,,9.928,10.159,9.062,9.272,24.311,24.072,3.332,13.486,8.367,19.088,17.993,19.302,36.475,22.43,30.609,20.89,10.722,31.561,32.062,6.106,39.066,49.285,41.836,19.332,25.339,32.399,11.106,42.087,33.45,29.927,15.906,12.038,13.074,23.773,22.05,17.798,12.172,1.764,1.216,2.864,5.317,2.901,0.977,6.238,3.732,1.49,1.678,5.33,3.643,0.83,1.753,20.631,1.447,4.231,3.313,5.525,5.003,2.44,2.281,5.557,7.411,2.077,1.294,2.284,3.895,10.313,27.267,16.205,32.735,23.816,5.6,12.788,17.194,19.063,28.446,26.078,2.948,27.423,12.859,21.023,15.032,2.525,49.993,5.557,15.878,9.758,37.047,28.72,38.545,29.252,16.938,20.577,16.663,14.872,4.708,4.615,6.911,7.273,17.34,15.485,28.729,26.575,26.383,16.81,46.75,28.762,27.492,10.117,51.906,4.934,4.364,3.659,3.574,3.977,3.634,3.655,2.763,14.536,8.219,9.852,10.486,8.695,10.957,69.35,67.121,64.839,62.611,63.301,58.243,17.722,13.43,18.045,13.184,13.515,26.721,26.773,21.741,10.123,8.305,7.02,7.363,8.518,9.09,15.83,16.134,18.797,10.662,13.597,20.201,24.525,47.116,56.076,,,,,,,,,11.112,26.446,2.214,9.046,5.697,4.684,9.298,37.075,38.622,39.558,38.782,34.394,37.391,10.827,13.081,12.302,15.975,13.568,16.749,35.273,36.926,43.829,23.445,23.833,5.015,4.935,14.551,9.824,19.86,6.311,6.223,73.312,62.895,62.065,77.96,18.399,22.165,23.291,25.537,26.628,17.15,18.846,17.251,16.317,15.608,19.598,11.023,2.695,12.8,1.21,6.971,5.772,8.794,8.594,1.797,1.047,6.604,19.106,4.873,4.663,3.52,7.822,5.925,7.445,6.456,7.159,6.546,36.887,38.866,40.815,26.422,25.159,15.461,5.075,7.473,5.578,26.046,15.319,10.631,11.262,7.941,10.994,0.0,0.0,22.621,30.664,29.665,20.489,21.168,29.393,19.17,27.798,12.964,29.092,11.643,24.595,20.882,29.139,17.775,18.972,7.525,4.784,6.387,9.587,8.78,8.423,7.657,6.966,6.668,7.074,6.93,9.11,7.423,7.439,14.193,10.044,13.821,8.878,8.263,8.746,11.76,9.434,11.035,9.504,3.352,9.382,5.439,9.914,9.049,24.597,21.918,24.968,15.743,16.833,11.384,27.184,28.367,33.584,3.615,21.448,5.917,5.533,5.396,5.349,5.322,4.275,24.486,19.06,18.509,20.721,20.917,19.406,20.444,18.214,19.794,21.06,21.269,22.023,7.536,7.075,4.889,5.684,6.263,13.477,15.855,15.4,25.274,26.04,6.155,6.054,4.346,6.035,5.125,4.272,3.498,5.163,6.363,16.603,9.823,11.895,18.874,12.968,28.112,7.768,9.448,19.153,28.562,10.476,11.675,21.649,23.836,6.58,10.298,19.465,6.669,38.1,40.705,6.174,20.72,11.594,27.991,18.069,23.46,12.864,32.671,20.878,6.269,5.975,7.414,7.35,5.803,5.279
ENSG00000073921,78.535,91.182,104.142,99.109,84.034,232.882,78.732,47.023,61.764,22.763,229.144,51.601,124.517,113.369,95.72,465.005,645.336,532.382,575.293,23.87,0.0,340.223,236.314,194.281,370.384,32.379,71.118,32.029,32.897,4.728,5.893,92.48,60.521,22.375,21.91,59.973,67.606,2.997,0.0,14.89,205.81,66.837,122.353,79.189,25.836,44.37,239.687,289.437,436.165,15.125,18.11,25.638,47.785,176.94,92.277,51.01,27.972,33.881,31.3,22.647,9.635,24.996,39.021,29.294,12.482,47.844,50.182,28.162,30.842,48.948,58.819,61.369,57.332,70.825,8.516,13.15,13.831,25.792,18.893,108.472,30.528,80.578,113.139,51.18,0.422,265.529,167.817,104.618,141.915,107.983,361.418,151.816,253.669,90.281,259.595,79.435,126.233,212.658,75.753,176.343,133.73,229.952,134.24,248.087,145.012,307.94,79.624,49.298,67.619,12.938,16.568,17.936,215.582,231.286,106.101,82.01,183.124,77.832,92.387,64.075,81.755,173.151,56.632,65.578,86.341,44.61,44.711,32.679,83.181,58.51,5.91,125.795,30.691,51.947,218.828,190.846,160.478,161.97,191.658,193.972,118.054,128.077,127.765,57.138,80.292,65.076,61.83,91.716,78.796,106.247,82.601,84.257,79.435,38.773,46.574,174.916,254.806,183.282,111.69,62.87,163.297,100.378,132.454,117.52,217.14,126.973,,,,,,,,,,,,,,,,,,,,,,,50.013,54.577,126.172,65.911,156.413,88.584,37.404,172.563,27.849,68.078,261.368,172.549,180.502,224.726,200.595,250.468,29.102,213.279,217.763,115.397,147.356,142.256,163.067,93.36,184.394,274.518,80.011,229.943,290.748,287.137,174.886,126.343,78.635,105.662,83.681,132.684,18.583,18.298,8.47,19.744,28.125,8.992,8.816,27.483,26.313,17.69,9.54,25.089,26.997,5.733,13.628,27.301,8.275,39.561,20.765,36.644,34.186,22.021,13.832,23.036,16.642,6.373,10.426,27.408,17.53,84.626,102.135,28.929,80.167,65.005,18.252,80.183,87.657,112.568,285.654,317.63,29.883,226.465,160.853,41.149,63.451,14.048,56.879,17.864,43.717,175.983,98.851,153.515,88.565,171.742,133.212,167.49,155.011,45.169,24.31,23.101,15.414,14.25,102.132,103.386,114.247,114.111,135.985,234.597,202.061,316.168,331.203,79.885,432.63,65.453,63.867,16.039,13.7,30.169,28.26,14.506,14.125,37.943,125.226,121.366,253.896,168.672,211.211,56.365,45.259,128.915,146.834,140.577,29.884,41.539,43.532,49.222,36.669,40.724,155.171,155.435,37.737,123.727,89.877,76.818,109.329,92.287,105.757,68.359,55.137,67.271,41.31,44.132,104.976,117.517,48.355,88.146,176.429,164.884,98.194,105.492,87.375,34.545,83.273,11.513,219.23,84.282,8.633,98.411,14.654,13.543,96.612,97.129,106.484,112.262,106.945,103.679,100.734,33.781,32.006,33.283,138.074,102.757,122.033,71.137,70.743,86.266,68.344,73.392,15.684,68.441,58.108,39.012,85.027,28.177,54.562,13.12,95.295,0.0,0.0,59.738,41.64,55.298,51.849,80.856,27.251,82.732,95.897,97.047,46.35,37.643,24.724,13.424,68.641,33.397,26.158,26.308,19.86,28.825,9.47,7.742,42.928,60.803,18.364,17.699,22.795,47.569,43.062,37.684,33.064,38.125,32.434,168.373,166.686,211.708,138.312,118.414,63.428,30.681,70.273,32.829,14.729,12.648,10.557,90.033,25.424,138.98,0.0,0.426,77.776,91.227,85.148,71.603,82.778,82.95,67.832,84.075,52.983,87.262,54.227,86.399,77.808,87.66,72.858,68.789,46.927,25.019,49.913,44.899,65.124,51.89,32.227,43.722,45.902,57.248,49.625,53.438,83.177,83.562,70.674,72.736,78.876,84.794,59.245,53.756,45.323,44.887,44.498,32.944,18.938,45.952,28.16,45.765,43.779,69.997,63.382,64.728,130.744,126.445,91.654,82.522,72.958,94.51,139.912,331.497,68.103,57.818,56.667,56.358,59.945,56.241,238.639,94.275,105.024,99.052,103.306,98.303,85.587,102.137,92.932,105.978,90.313,89.244,34.949,29.375,35.603,35.544,23.817,99.389,102.852,132.483,128.054,174.202,56.191,56.888,44.002,43.675,50.741,19.159,40.388,48.472,58.862,441.514,363.276,409.131,430.749,357.049,175.583,31.362,253.473,130.131,161.274,58.04,231.554,180.195,165.729,43.953,214.495,177.03,40.353,137.726,131.451,49.241,113.324,104.432,153.457,362.355,129.32,96.708,172.908,406.723,35.755,28.073,31.914,36.706,27.978,30.673


## Filtering expressions ##

In [27]:
expressions_filtered = expressions.loc[selected_genes.index].drop(columns=samples_dropped.index)
print(expressions_filtered.shape)
assert expressions_filtered.shape[0]==selected_genes.shape[0], "for all selected genes we should have expression values!"
assert expressions_filtered.shape[1]==selected_samples.shape[0], "for all selected samples we should have expression values!"
expressions_filtered.head(3)

(12243, 459)


Unnamed: 0_level_0,SRR1521445,SRR306838,SRR306839,SRR306840,SRR306841,SRR306842,SRR306843,SRR306844,SRR306845,SRR306847,SRR3715877,SRR5008362,SRR5120939,SRR5120940,SRR5885325,SRR5961830,SRR5961875,SRR5961972,SRR787277,SRR8702484,SRR8991100,SRR6307195,SRR6307196,SRR6307197,SRR6307204,SRR306800,SRR306801,SRR306802,SRR306803,SRR306804,SRR306805,SRR306806,SRR306807,SRR306808,SRR306809,SRR649365,DRR031591,SRR1510173,SRR1758916,SRR1758921,SRR1758922,SRR2040586,SRR095666,SRR3403827,SRR3403828,SRR636850,SRR636900,SRR636945,SRR306826,SRR306827,SRR306828,SRR306829,SRR306831,SRR306832,SRR306833,SRR306834,SRR306835,SRR306836,SRR8750397,SRR8750398,SRR8750399,SRR3195085,SRR3195086,SRR3195096,SRR3195103,SRR3195112,SRR3195113,SRR5080320,SRR5080324,SRR5080392,SRR5080564,SRR1047652,SRR223518,SRR223519,SRR223520,SRR223521,SRR299126,SRR5990543,SRR6007051,SRR6073386,SRR6073415,SRR1758941,SRR1758942,SRR1758943,SRR1758954,SRR1758957,SRR223512,SRR223514,SRR223515,SRR1759005,SRR1759006,SRR1759007,SRR1759011,SRR1759012,SRR1287653,SRR1287654,SRR1287655,SRR2308103,SRR636887,SRR636888,SRR636932,SRR636933,SRR636977,SRR636978,ERR1331676,SRR2124226,SRR306395,SRR306396,SRR306398,SRR306399,SRR306401,SRR306402,SRR306404,SRR306406,SRR1048140,SRR1048142,SRR2754983,SRR2757329,ERR1331678,ERR1331679,ERR2716205,ERR2716206,ERR2716207,ERR2716208,ERR2716213,ERR2716214,SRR1334837,SRR636854,SRR636855,SRR636856,SRR636904,SRR636905,SRR636906,SRR636948,SRR636949,SRR636950,SRR3160008,SRR3160018,SRR3160041,SRR3160052,SRR3160061,SRR3194634,SRR3194682,SRR3194698,SRR3194730,SRR3194765,SRR3194791,SRR653846,SRR636842,SRR636843,SRR636892,SRR636893,SRR636937,SRR636938,SRR1300759,SRR1300763,SRR1300765,SRR1300766,SRR1300767,SRR1300768,ERR2074874,ERR2075072,ERR2075082,ERR2075792,ERR2075986,ERR489282,ERR489283,SRR1013904,SRR1758977,SRR1758978,SRR1758979,SRR1758981,SRR1758982,SRR1758983,SRR1758984,SRR1758985,SRR3109717,SRR3109718,SRR5190444,SRR5190446,SRR765910,SRR1205138,SRR1205218,SRR1205222,SRR1205223,SRR1205998,SRR8708135,SRR1822406,SRR3109726,SRR3109728,SRR489494,SRR5190441,SRR5190453,SRR9024741,SRR9024746,SRR9024747,SRR9024753,SRR9024755,ERR1331716,SRR1981979,SRR1981981,SRR1981987,SRR1981988,SRR3109705,SRR3109709,SRR3109710,SRR4444968,SRR4444969,SRR4444970,SRR4444971,SRR636839,SRR636840,SRR636934,SRR636935,SRR924544,ERR1331694,SRR1758992,SRR1758995,SRR1758996,SRR1758997,SRR1758998,SRR4444954,SRR4444955,SRR4444956,SRR4444957,SRR4444958,SRR4444959,SRR4444960,SRR4444961,ERR1331683,ERR1331712,ERR1331715,ERR162228,ERR162229,ERR162240,ERR162246,ERR162251,ERR162267,ERR266373,ERR266376,ERR266380,ERR266381,ERR266394,SRR1200908,SRR1200909,SRR636846,SRR636847,SRR636848,SRR636896,SRR636897,SRR636898,SRR636941,SRR636942,SRR636943,ERR1331667,ERR1331707,ERR1331708,ERR1331709,SRR6261040,SRR6261041,SRR6261042,ERR1331671,ERR266370,SRR1786019,SRR1789057,SRR1789059,SRR1789326,SRR1789331,SRR6293952,SRR6293953,SRR6293961,SRR6293972,SRR6293973,SRR6293975,SRR636865,SRR636866,SRR636867,SRR636913,SRR636914,SRR636915,SRR636958,SRR636959,SRR636960,SRR9066910,SRR9066919,SRR9066929,SRR9066931,SRR9066934,ERR162213,ERR162217,ERR162222,ERR162245,ERR162261,ERR266353,ERR266372,ERR266378,ERR266393,ERR266398,ERR1331668,ERR1331710,ERR1331725,SRR306742,SRR306744,SRR306747,SRR306749,SRR306751,SRR306752,SRR306753,SRR306754,SRR449433,SRR449434,SRR449435,SRR449436,SRR5412205,SRR5412206,SRR5412207,SRR5412209,SRR5412210,SRR5412211,SRR5412212,SRR5412213,SRR5412215,SRR5412216,SRR5412218,SRR5952126,SRR5952127,SRR5952128,SRR5952129,SRR5952130,SRR6206899,SRR6206904,SRR6206909,SRR6206914,ERR2004718,ERR2004719,ERR2004720,SRR1653996,SRR1654032,SRR2925196,SRR2925197,SRR2925198,SRR2925199,SRR2925200,SRR2925201,SRR2925202,SRR2925203,SRR2925204,SRR2925205,SRR2925206,SRR2925207,SRR2925208,SRR2925209,SRR2925210,SRR2925212,SRR2925244,SRR2925245,SRR2925247,SRR2925248,SRR2925249,SRR2925251,SRR2925252,SRR2925253,SRR2925254,SRR2925255,SRR2925258,SRR2925259,SRR2925260,SRR2925267,SRR5115667,SRR5115668,SRR5115669,SRR5115678,SRR1549160,SRR1549162,SRR3468363,SRR3468366,SRR3468367,SRR3468368,SRR3468369,SRR3468372,SRR3468373,SRR3468374,SRR3468375,SRR636857,SRR636858,SRR636859,SRR636907,SRR636908,SRR636909,SRR636951,SRR636952,SRR636953,SRR1041772,SRR1170173,SRR1284264,SRR1284265,SRR1284266,SRR1284267,SRR1284274,SRR1284275,SRR1873516,SRR3144810,SRR3144811,SRR3144812,SRR3144813,SRR3144814,SRR3144815,SRR3144816,SRR3144817,SRR3144818,SRR3144819,SRR3144820,SRR3144827,SRR3144828,SRR3144830,SRR3144831,SRR3144832,SRR5291530,SRR5291531,SRR5291532,SRR5520660,SRR5520661,SRR5520662,SRR5520663,SRR5520664,SRR5520665,SRR5520667,SRR5520668,SRR5520669,SRR594419,SRR594421,SRR594422,SRR594424,SRR594428,SRR594430,SRR594431,SRR594433,SRR594437,SRR594439,SRR594440,SRR594442,SRR954817,ERR1101653,ERR1101654,ERR1101655,ERR1990031,ERR1990032,ERR1990033,ERR1990034,ERR1990035,ERR1990036,ERR1990037,ERR1990038,ERR3350029,ERR3350030,ERR476402,ERR476404,ERR476406,ERR476408
Homo_sapiens,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1
ENSG00000139990,11.921,43.003,12.512,16.837,12.673,15.001,11.918,18.714,16.423,0.0,45.73,4.765,37.904,33.205,10.578,26.655,20.683,29.373,26.026,5.864,1.152,23.392,23.248,28.392,26.727,7.052,15.276,15.281,9.368,2.469,1.886,14.138,5.804,7.661,4.824,7.107,1.988,4.027,24.841,15.498,18.4,10.297,10.952,23.269,26.777,10.978,22.965,25.05,18.743,10.771,7.164,14.552,4.156,2.091,4.9,8.332,7.049,3.267,16.954,12.996,9.255,2.446,5.964,2.875,5.564,5.885,12.364,3.566,4.078,3.568,5.91,38.762,42.647,19.092,39.266,55.38,0.273,23.621,31.644,30.583,36.029,39.724,31.108,37.643,23.543,49.901,16.239,29.072,19.94,45.506,35.629,28.389,17.291,39.833,19.259,12.205,16.477,3.719,4.896,4.806,19.723,22.371,13.76,18.697,22.199,13.579,7.287,7.132,5.848,5.946,10.207,10.591,7.854,11.135,11.238,28.047,7.145,10.9,15.487,19.797,17.528,17.668,14.706,14.579,21.431,22.212,1.922,20.733,27.188,22.578,15.754,13.462,30.296,20.972,23.783,19.943,11.099,15.289,29.002,28.343,37.124,19.688,7.573,25.361,12.725,17.866,14.089,18.822,9.928,10.159,9.062,9.272,24.311,24.072,3.332,13.486,8.367,19.088,17.993,19.302,36.475,22.43,30.609,20.89,10.722,31.561,32.062,6.106,39.066,49.285,41.836,19.332,25.339,32.399,11.106,42.087,33.45,29.927,15.906,12.038,17.798,16.205,32.735,23.816,5.6,12.788,17.194,19.063,28.446,26.078,2.948,27.423,12.859,21.023,15.032,2.525,5.557,15.878,9.758,37.047,28.72,38.545,29.252,16.938,20.577,16.663,4.708,4.615,6.911,7.273,17.34,15.485,28.729,26.575,26.383,16.81,46.75,28.762,27.492,10.117,51.906,4.934,4.364,3.659,3.574,3.977,3.634,3.655,2.763,10.486,8.695,10.957,69.35,67.121,64.839,62.611,63.301,58.243,17.722,13.43,18.045,13.184,13.515,26.721,26.773,10.123,8.305,7.02,7.363,8.518,9.09,15.83,16.134,18.797,,,,,,,,11.112,26.446,2.214,9.046,5.697,4.684,9.298,37.075,38.622,39.558,38.782,34.394,37.391,10.827,13.081,12.302,15.975,13.568,16.749,35.273,36.926,43.829,4.935,14.551,9.824,6.311,6.223,73.312,62.895,62.065,77.96,18.399,22.165,23.291,25.537,26.628,17.15,18.846,17.251,16.317,15.608,19.598,11.023,2.695,12.8,1.21,6.971,5.772,8.794,8.594,1.797,1.047,6.604,19.106,4.873,4.663,3.52,7.822,5.925,7.445,6.456,7.159,6.546,36.887,38.866,40.815,26.422,25.159,15.461,5.075,7.473,5.578,11.262,7.941,10.994,0.0,0.0,22.621,30.664,29.665,20.489,21.168,29.393,19.17,27.798,12.964,29.092,11.643,24.595,20.882,29.139,17.775,18.972,7.525,4.784,6.387,9.587,8.78,8.423,7.657,6.966,6.668,7.074,6.93,9.11,7.423,7.439,14.193,10.044,13.821,8.878,8.263,8.746,11.76,9.434,11.035,9.504,3.352,9.382,5.439,9.914,9.049,24.597,21.918,24.968,15.743,16.833,11.384,27.184,28.367,33.584,3.615,21.448,5.917,5.533,5.396,5.349,5.322,4.275,24.486,19.06,18.509,20.721,20.917,19.406,20.444,18.214,19.794,21.06,21.269,22.023,7.536,7.075,4.889,5.684,6.263,13.477,15.855,15.4,6.155,6.054,4.346,6.035,5.125,4.272,3.498,5.163,6.363,28.112,7.768,9.448,19.153,28.562,10.476,11.675,21.649,23.836,6.58,10.298,19.465,6.669,38.1,40.705,6.174,20.72,11.594,27.991,18.069,23.46,12.864,32.671,20.878,6.269,5.975,7.414,7.35,5.803,5.279
ENSG00000073921,78.535,91.182,104.142,99.109,84.034,232.882,78.732,47.023,61.764,22.763,229.144,51.601,124.517,113.369,95.72,465.005,645.336,532.382,575.293,23.87,0.0,340.223,236.314,194.281,370.384,32.379,71.118,32.029,32.897,4.728,5.893,92.48,60.521,22.375,21.91,67.606,2.997,14.89,205.81,66.837,122.353,79.189,239.687,289.437,436.165,47.785,176.94,92.277,51.01,27.972,33.881,31.3,22.647,9.635,24.996,39.021,29.294,12.482,47.844,50.182,28.162,30.842,48.948,58.819,61.369,57.332,70.825,8.516,13.15,13.831,25.792,18.893,108.472,30.528,80.578,113.139,0.422,167.817,104.618,141.915,107.983,361.418,151.816,253.669,90.281,259.595,79.435,212.658,75.753,229.952,134.24,248.087,145.012,307.94,79.624,49.298,67.619,12.938,16.568,17.936,215.582,231.286,106.101,82.01,183.124,77.832,64.075,81.755,56.632,65.578,44.61,44.711,83.181,58.51,5.91,125.795,30.691,51.947,218.828,190.846,160.478,161.97,191.658,193.972,128.077,127.765,57.138,80.292,65.076,61.83,91.716,78.796,106.247,82.601,84.257,79.435,38.773,46.574,174.916,254.806,183.282,111.69,62.87,163.297,100.378,132.454,117.52,217.14,50.013,54.577,126.172,65.911,156.413,88.584,37.404,172.563,27.849,68.078,261.368,172.549,180.502,224.726,200.595,250.468,29.102,213.279,217.763,115.397,147.356,142.256,163.067,93.36,184.394,274.518,80.011,229.943,290.748,287.137,174.886,126.343,132.684,28.929,80.167,65.005,18.252,80.183,87.657,112.568,285.654,317.63,29.883,226.465,160.853,41.149,63.451,14.048,17.864,43.717,175.983,98.851,153.515,88.565,171.742,133.212,167.49,155.011,24.31,23.101,15.414,14.25,102.132,103.386,114.247,114.111,135.985,234.597,202.061,316.168,331.203,79.885,432.63,65.453,63.867,16.039,13.7,30.169,28.26,14.506,14.125,253.896,168.672,211.211,56.365,45.259,128.915,146.834,140.577,29.884,41.539,43.532,49.222,36.669,40.724,155.171,155.435,123.727,89.877,76.818,109.329,92.287,105.757,68.359,55.137,67.271,176.429,164.884,98.194,105.492,87.375,34.545,83.273,219.23,84.282,8.633,98.411,14.654,13.543,96.612,97.129,106.484,112.262,106.945,103.679,100.734,33.781,32.006,33.283,138.074,102.757,122.033,71.137,70.743,86.266,68.441,58.108,39.012,28.177,54.562,13.12,95.295,0.0,0.0,59.738,41.64,55.298,51.849,80.856,27.251,82.732,95.897,97.047,46.35,37.643,24.724,13.424,68.641,33.397,26.158,26.308,19.86,28.825,9.47,7.742,42.928,60.803,18.364,17.699,22.795,47.569,43.062,37.684,33.064,38.125,32.434,168.373,166.686,211.708,138.312,118.414,63.428,30.681,70.273,32.829,90.033,25.424,138.98,0.0,0.426,77.776,91.227,85.148,71.603,82.778,82.95,67.832,84.075,52.983,87.262,54.227,86.399,77.808,87.66,72.858,68.789,46.927,25.019,49.913,44.899,65.124,51.89,32.227,43.722,45.902,57.248,49.625,53.438,83.177,83.562,70.674,72.736,78.876,84.794,59.245,53.756,45.323,44.887,44.498,32.944,18.938,45.952,28.16,45.765,43.779,69.997,63.382,64.728,130.744,126.445,91.654,82.522,72.958,94.51,139.912,331.497,68.103,57.818,56.667,56.358,59.945,56.241,238.639,94.275,105.024,99.052,103.306,98.303,85.587,102.137,92.932,105.978,90.313,89.244,34.949,29.375,35.603,35.544,23.817,99.389,102.852,132.483,56.191,56.888,44.002,43.675,50.741,19.159,40.388,48.472,58.862,175.583,31.362,253.473,130.131,161.274,58.04,231.554,180.195,165.729,43.953,214.495,177.03,40.353,137.726,131.451,49.241,113.324,104.432,153.457,362.355,129.32,96.708,172.908,406.723,35.755,28.073,31.914,36.706,27.978,30.673
ENSG00000139687,10.85,416.822,5.173,14.01,12.452,11.376,1.167,13.521,15.757,0.0,49.916,8.809,27.603,21.872,7.969,55.082,56.189,54.231,104.4,2.812,0.0,45.602,83.83,99.998,56.112,4.236,8.574,6.533,4.779,0.734,0.972,7.171,4.367,2.895,1.864,4.442,1.869,1.364,26.547,16.24,17.415,5.313,3.673,26.441,47.205,3.856,12.415,8.557,5.487,4.613,4.62,6.622,2.1,1.358,1.843,2.175,1.388,0.819,6.671,4.571,2.507,3.919,1.794,3.045,4.015,3.307,10.292,3.478,3.603,2.543,3.139,1.334,18.888,5.993,12.691,13.645,1.304,40.614,17.881,5.978,18.394,52.842,36.911,66.033,22.579,59.967,10.861,23.135,7.335,59.388,40.835,39.116,22.927,62.398,15.847,8.482,9.444,0.16,0.06,0.069,8.393,11.463,4.688,1.194,12.098,4.414,3.835,4.661,8.33,7.639,6.36,6.949,7.551,11.347,0.443,14.493,3.795,5.111,15.546,16.075,16.789,18.769,6.482,6.698,19.986,20.596,6.321,2.751,2.519,2.612,4.41,3.527,4.808,9.169,9.014,10.0,7.14,9.542,17.665,10.468,25.456,11.902,3.381,15.766,6.536,9.878,7.432,8.631,10.255,10.327,8.671,8.763,40.31,24.83,2.319,7.603,3.513,10.065,6.078,11.859,24.536,3.302,7.653,4.751,1.579,25.756,21.891,10.817,30.005,38.233,30.85,32.692,38.531,35.298,17.729,57.734,12.058,9.548,3.274,1.79,11.208,1.66,11.128,14.554,4.06,4.333,10.997,9.371,5.643,6.925,2.111,7.242,3.362,2.384,9.484,0.094,0.01,1.334,16.714,24.036,21.955,50.138,23.319,3.147,5.152,4.005,0.752,1.005,0.135,0.301,2.362,3.139,19.074,24.386,14.098,38.906,22.131,21.532,23.265,5.811,36.771,4.195,4.395,0.764,0.577,1.366,1.16,0.664,0.806,11.348,11.465,10.759,6.11,6.52,30.512,26.697,34.939,5.489,14.07,13.563,16.808,11.949,12.022,21.168,21.219,7.186,6.08,5.353,10.875,11.076,10.927,19.243,16.892,23.817,25.618,25.291,22.266,21.614,10.775,10.234,24.884,8.797,7.853,3.051,4.488,2.062,5.518,8.879,14.434,13.595,15.926,13.858,11.981,13.23,2.741,2.974,2.103,20.719,17.428,19.265,15.711,14.239,17.812,10.159,13.145,6.811,2.129,9.364,8.659,49.268,5.579,13.993,34.17,16.669,20.088,19.882,21.687,12.466,12.658,12.702,15.101,8.616,4.45,3.873,4.352,7.231,1.955,3.153,1.944,2.633,4.457,1.81,1.211,0.383,4.218,3.978,4.449,6.848,1.937,2.065,0.989,3.486,3.894,3.534,23.281,25.139,29.311,16.949,18.693,6.606,6.067,5.916,4.684,12.819,6.792,12.057,0.0,0.336,18.407,16.443,18.088,15.743,17.336,14.92,15.938,18.351,10.589,18.288,12.572,13.941,15.085,15.454,19.506,12.986,9.915,7.157,7.608,9.438,9.771,9.599,6.829,8.474,5.97,11.239,9.217,15.039,39.962,34.916,7.068,6.728,6.491,4.473,9.955,8.27,15.018,13.972,14.684,11.312,7.721,12.364,9.657,11.01,11.988,24.49,29.406,23.873,20.942,22.814,19.91,18.02,16.511,18.757,17.925,31.594,1.978,1.96,2.05,1.956,2.44,1.882,27.798,9.641,8.328,9.969,9.258,9.062,9.219,8.085,7.797,9.117,9.911,7.79,4.447,3.661,2.824,4.568,3.468,18.801,20.097,20.394,2.974,2.431,1.991,2.055,2.516,1.599,3.255,2.899,2.228,17.209,1.396,16.395,17.343,14.433,8.18,18.228,27.14,15.405,3.928,11.493,28.44,3.528,27.923,27.856,3.674,45.193,5.586,25.057,66.914,26.466,7.875,29.112,73.049,2.399,1.28,2.525,4.138,2.591,1.88


In [28]:
exp = expressions_filtered.T
exp.index.names = ["run"]
print(exp.shape)
exp.columns

(459, 12243)


Index(['ENSG00000139990', 'ENSG00000073921', 'ENSG00000139687',
       'ENSG00000119977', 'ENSG00000242866', 'ENSG00000135506',
       'ENSG00000162426', 'ENSG00000165995', 'ENSG00000073756',
       'ENSG00000138050',
       ...
       'ENSG00000204889', 'ENSG00000275023', 'ENSG00000275489',
       'ENSG00000278259', 'ENSG00000199158', 'ENSG00000207008',
       'ENSG00000207605', 'ENSG00000207691', 'ENSG00000207726',
       'ENSG00000208892'],
      dtype='object', name='Homo_sapiens', length=12243)

In [29]:
selected_expressions = exp
selected_expressions[selected_expressions.columns[0:10]].tail(5)

Homo_sapiens,ENSG00000139990,ENSG00000073921,ENSG00000139687,ENSG00000119977,ENSG00000242866,ENSG00000135506,ENSG00000162426,ENSG00000165995,ENSG00000073756,ENSG00000138050
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ERR3350030,5.975,28.073,1.28,0.858,0.0,87.18,0.0,0.0,0.0,2.087
ERR476402,7.414,31.914,2.525,0.632,0.0,80.971,0.0,0.399,0.0,2.039
ERR476404,7.35,36.706,4.138,0.715,0.0,68.186,0.0,0.0,0.0,3.011
ERR476406,5.803,27.978,2.591,1.354,0.0,88.41,0.0,0.0,0.0,3.003
ERR476408,5.279,30.673,1.88,0.409,0.0,100.924,0.0,0.106,0.0,1.854


In [30]:
assert selected_genes.index.size == exp.shape[1], "expressions should have all genes we selected!"
assert selected_samples.shape[0] == exp.shape[0], "all expressions should have samples metadata"

# Writing selected species, samples, genes, expressions #

In [31]:
selected_species.to_csv(locations.interim.species, sep = "\t", index = True)
selected_genes.to_csv(locations.interim.genes, sep = "\t", index = True)
selected_samples.to_csv(locations.interim.samples, sep="\t", index = True)
selected_expressions.to_csv(locations.interim.expressions, sep="\t", index = True)

