In [1]:
from typing import *
from yspecies import *
from yspecies.enums import *
from yspecies.dataset import *
from yspecies.misc import *

In [2]:
from dataclasses import dataclass
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
#settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import pprint
pp = pprint.PrettyPrinter(indent=4)

In [4]:
from pathlib import Path
locations: Locations = Locations("./") if Path("./data").exists() else Locations("../")

In [5]:
external = shap_results = locations.output.external 
intersections = locations.output.intersections
interim = locations.interim

In [6]:
species = pd.read_csv(interim.dir / "selected_species.tsv", sep="\t")[["species", "common_name", "lifespan"]]
species.head(10)

Unnamed: 0,species,common_name,lifespan
0,Ailuropoda_melanoleuca,Giant panda,36.8
1,Aotus_nancymaae,Nancy Ma's night monkey,20.0
2,Bos_taurus,Domestic cattle,20.0
3,Callithrix_jacchus,White-tufted-ear marmoset,22.8
4,Canis_lupus_familiaris,Domestic dog,24.0
5,Capra_hircus,Domestic goat,20.8
6,Cavia_aperea,Brazilian guinea pig,6.0
7,Cavia_porcellus,Guinea pig,12.0
8,Equus_caballus,Horse,57.0
9,Felis_catus,Domestic cat,30.0


In [12]:
i_pro= pd.read_csv(intersections / "pro_intersections_5_and_7.tsv",sep="\t").sort_values("MLS_kendall_tau", ascending=False)[["reference_gene", "symbol"]].rename(columns={"reference_gene": "Homo_sapiens"}).set_index("Homo_sapiens")
i_anti= pd.read_csv(intersections / "anti_intersections_5_and_7.tsv",sep="\t").sort_values("MLS_kendall_tau", ascending=True)[["reference_gene", "symbol"]].rename(columns={"reference_gene": "Homo_sapiens"}).set_index("Homo_sapiens")
i_anti.head(10)

Unnamed: 0_level_0,symbol
Homo_sapiens,Unnamed: 1_level_1
ENSG00000107815,TWNK
ENSG00000089234,BRAP
ENSG00000198663,C6orf89
ENSG00000214827,MTCP1
ENSG00000149577,SIDT2
ENSG00000152580,IGSF10
ENSG00000164879,CA3
ENSG00000121897,LIAS
ENSG00000204231,RXRB
ENSG00000111832,RWDD1


In [8]:
pro = pd.read_csv(external.dir / "for_proteins" / "pro.tsv",sep="\t")
anti = pd.read_csv(external.dir / "for_proteins" / "anti.tsv",sep="\t")
pro.head(10)

Unnamed: 0,Homo_sapiens,Loxodonta_africana,Gorilla_gorilla,Pan_troglodytes,Pongo_pygmaeus,Equus_caballus,Pan_paniscus,Tursiops_truncatus,Macaca_mulatta,Macaca_fascicularis,Macaca_nemestrina,Ailuropoda_melanoleuca,Ursus_americanus,Heterocephalus_glaber,Rhinolophus_ferrumequinum,Vombatus_ursinus,Felis_catus,Sus_scrofa,Lynx_canadensis,Bos_grunniens,Canis_lupus_familiaris,Rhinopithecus_bieti,Ovis_aries,Callithrix_jacchus,Ornithorhynchus_anatinus,Dasypus_novemcinctus,Phascolarctos_cinereus,Capra_hircus,Suricata_suricatta,Bos_taurus,Aotus_nancymaae,Otolemur_garnettii,Microcebus_murinus,Chinchilla_lanigera,Sarcophilus_harrisii,Cavia_porcellus,Erinaceus_europaeus,Tupaia_belangeri,Oryctolagus_cuniculus,Ictidomys_tridecemlineatus,Meriones_unguiculatus,Cavia_aperea,Monodelphis_domestica,Mus_musculus,Mesocricetus_auratus,Rattus_norvegicus,Mus_spicilegus,Mus_caroli
0,ENSG00000133256,ENSLAFG00000009245,ENSGGOG00000005730,ENSPTRG00000046026,ENSPPYG00000014504,ENSECAG00000017337,ENSPPAG00000037791,ENSTTRG00000002928,ENSMMUG00000003282,ENSMFAG00000039102,ENSMNEG00000045444,ENSAMEG00000016646,ENSUAMG00000015877,ENSHGLG00100013558,ENSRFEG00010013462,ENSVURG00010014792;ENSVURG00010019541,ENSFCAG00000008224,ENSSSCG00000036645,ENSLCNG00005001203,ENSBGRG00000020982,ENSCAFG00030021135;ENSCAFG00000016782;ENSCAFG0...,ENSRBIG00000034185,ENSOARG00000016949,ENSCJAG00000001990,ENSOANG00000006518,ENSDNOG00000002553,ENSPCIG00000014391,ENSCHIG00000020394,ENSSSUG00005006261,ENSBTAG00000017480,ENSANAG00000029619,ENSOGAG00000012260,ENSMICG00000010725,ENSCLAG00000012451,ENSSHAG00000012453,ENSCPOG00000022569,ENSEEUG00000009058,ENSTBEG00000005887,ENSOCUG00000038876,ENSSTOG00000020412,ENSMUGG00000019544,ENSCAPG00000012847,ENSMODG00000005968,ENSMUSG00000029491,ENSMAUG00000018594,ENSRNOG00000000065,ENSMSIG00000020631,MGP_CAROLIEiJ_G0027518
1,ENSG00000160323,ENSLAFG00000011278,ENSGGOG00000012541,ENSPTRG00000022942,ENSPPYG00000019736,ENSECAG00000008699,ENSPPAG00000030531,ENSTTRG00000007685,ENSMMUG00000000937,ENSMFAG00000000040,ENSMNEG00000034336,ENSAMEG00000011605,ENSUAMG00000018673,ENSHGLG00100015138,ENSRFEG00010016293,ENSVURG00010012105,ENSFCAG00000026549,ENSSSCG00000021241,ENSLCNG00005021964,ENSBGRG00000003687,ENSCAFG00000019776;ENSCAFG00040001104;ENSCAFG0...,ENSRBIG00000040768,ENSOARG00000003336,ENSCJAG00000009791,ENSOANG00000002455,ENSDNOG00000048133,ENSPCIG00000017381,ENSCHIG00000025823,ENSSSUG00005023329,ENSBTAG00000015093,ENSANAG00000032285,ENSOGAG00000014043,ENSMICG00000011799,ENSCLAG00000007542,ENSSHAG00000006104,ENSCPOG00000010285,ENSEEUG00000012163,ENSTBEG00000004466,ENSOCUG00000039392,ENSSTOG00000003765,ENSMUGG00000004109,ENSCAPG00000007044,ENSMODG00000039571,ENSMUSG00000014852,ENSMAUG00000002369,ENSRNOG00000005780,ENSMSIG00000019355,MGP_CAROLIEiJ_G0023333
2,ENSG00000129187,ENSLAFG00000018439,ENSGGOG00000016412,ENSPTRG00000016624,ENSPPYG00000015223,ENSECAG00000017516;ENSECAG00000000947,ENSPPAG00000022425,ENSTTRG00000001460,ENSMMUG00000000284,ENSMFAG00000034653,ENSMNEG00000033220,ENSAMEG00000008118,ENSUAMG00000013234,ENSHGLG00100018129,ENSRFEG00010017093,ENSVURG00010007106,ENSFCAG00000044388,ENSSSCG00000015775,ENSLCNG00005001180,ENSBGRG00000007650,ENSCAFG00000031175;ENSCAFG00030011853;ENSCAFG0...,ENSRBIG00000044833,ENSOARG00000006992,ENSCJAG00000005526,ENSOANG00000006238,ENSDNOG00000000721,ENSPCIG00000013744,ENSCHIG00000010700,ENSSSUG00005000048,ENSBTAG00000020282,ENSANAG00000026089,ENSOGAG00000000583,ENSMICG00000006929,ENSCLAG00000012757,ENSSHAG00000015218;ENSSHAG00000003799,ENSCPOG00000040207,ENSEEUG00000006654,ENSTBEG00000015910,ENSOCUG00000004708,ENSSTOG00000005462,ENSMUGG00000025677,ENSCAPG00000005870,ENSMODG00000004124,ENSMUSG00000031562,ENSMAUG00000015286,ENSRNOG00000013215,ENSMSIG00000030915,MGP_CAROLIEiJ_G0031048
3,ENSG00000069275,ENSLAFG00000021663,ENSGGOG00000039124,ENSPTRG00000048501,ENSPPYG00000000299,ENSECAG00000010336,ENSPPAG00000031082,ENSTTRG00000010469,ENSMMUG00000055835,ENSMFAG00000041134,ENSMNEG00000012609,ENSAMEG00000012950,ENSUAMG00000022802,ENSHGLG00100001378,ENSRFEG00010012324,ENSVURG00010012794,ENSFCAG00000006726,ENSSSCG00000022398,ENSLCNG00005016351,,ENSCAFG00000010168;ENSCAFG00040018276,ENSRBIG00000032021,ENSOARG00000004262,ENSCJAG00000010948,ENSOANG00000002148,ENSDNOG00000037376;ENSDNOG00000043734;ENSDNOG0...,ENSPCIG00000030124,ENSCHIG00000006508;ENSCHIG00000015711,ENSSSUG00005003023,ENSBTAG00000008001,ENSANAG00000026439,ENSOGAG00000010466,ENSMICG00000035443,ENSCLAG00000017127,ENSSHAG00000015999,ENSCPOG00000003127,ENSEEUG00000002767,ENSTBEG00000015641,ENSOCUG00000014895,ENSSTOG00000009264,ENSMUGG00000021169,ENSCAPG00000013700,ENSMODG00000015623,ENSMUSG00000026434,ENSMAUG00000020322,ENSRNOG00000047287,ENSMSIG00000024170,MGP_CAROLIEiJ_G0014577
4,ENSG00000006282,ENSLAFG00000022872,ENSGGOG00000003379,ENSPTRG00000009404,ENSPPYG00000008278,ENSECAG00000008281,ENSPPAG00000031622,ENSTTRG00000000215,ENSMMUG00000017252,ENSMFAG00000044525,ENSMNEG00000037398,ENSAMEG00000007546,ENSUAMG00000010010,ENSHGLG00100006048,ENSRFEG00010020941,ENSVURG00010022794;ENSVURG00010022770,ENSFCAG00000012338,ENSSSCG00000017565,ENSLCNG00005022222,ENSBGRG00000007324,ENSCAFG00040025834;ENSCAFG00030019974;ENSCAFG0...,ENSRBIG00000041592,ENSOARG00000003861,ENSCJAG00000018727,ENSOANG00000006951;ENSOANG00000005899;ENSOANG0...,ENSDNOG00000032944,ENSPCIG00000030204,ENSCHIG00000013775,ENSSSUG00005022901,ENSBTAG00000004155,ENSANAG00000027831,ENSOGAG00000009482,ENSMICG00000009762,ENSCLAG00000014759,ENSSHAG00000005339,ENSCPOG00000024264,ENSEEUG00000007417,ENSTBEG00000001851,ENSOCUG00000002725,ENSSTOG00000004098,ENSMUGG00000000966,ENSCAPG00000003748,ENSMODG00000020908,ENSMUSG00000020867,ENSMAUG00000013601,ENSRNOG00000003273,ENSMSIG00000014363,MGP_CAROLIEiJ_G0016981
5,ENSG00000157343,ENSLAFG00000032578,ENSGGOG00000015515,ENSPTRG00000018089,ENSPPYG00000016533,ENSECAG00000011493,ENSPPAG00000005870,ENSTTRG00000014321,ENSMMUG00000059809,ENSMFAG00000038125,ENSMNEG00000041244,ENSAMEG00000000561,ENSUAMG00000000997,ENSHGLG00100004605,ENSRFEG00010020075,ENSVURG00010011083,ENSFCAG00000032771,ENSSSCG00000001550,ENSLCNG00005005456,ENSBGRG00000014897,ENSCAFG00040016468;ENSCAFG00030014026;ENSCAFG0...,ENSRBIG00000029208,ENSOARG00000012346,ENSCJAG00000015745,ENSOANG00000015681,ENSDNOG00000035258,ENSPCIG00000030892,ENSCHIG00000003988,ENSSSUG00005001998,ENSBTAG00000010877,ENSANAG00000038298,ENSOGAG00000025864,ENSMICG00000035092,ENSCLAG00000012362,ENSSHAG00000012640,ENSCPOG00000022443,ENSEEUG00000004958,ENSTBEG00000003413,ENSOCUG00000005519,ENSSTOG00000013713,ENSMUGG00000019797,ENSCAPG00000003747,ENSMODG00000013752,ENSMUSG00000024223,ENSMAUG00000020880,ENSRNOG00000000508,ENSMSIG00000008938,MGP_CAROLIEiJ_G0021322
6,ENSG00000107551,ENSLAFG00000012041,ENSGGOG00000012833,ENSPTRG00000002452,ENSPPYG00000002223,ENSECAG00000007066,ENSPPAG00000029784,ENSTTRG00000011719,ENSMMUG00000017489,ENSMFAG00000031803,ENSMNEG00000032496,ENSAMEG00000001110,ENSUAMG00000026742,ENSHGLG00100003631,ENSRFEG00010021862,ENSVURG00010019959,ENSFCAG00000003482,ENSSSCG00000029662,ENSLCNG00005012201,ENSBGRG00000018526,ENSCAFG00030019738;ENSCAFG00040025568;ENSCAFG0...,ENSRBIG00000040175,ENSOARG00000002866,ENSCJAG00000014680,ENSOANG00000007637,ENSDNOG00000018539,ENSPCIG00000017259,ENSCHIG00000019714,ENSSSUG00005003918,ENSBTAG00000002669,ENSANAG00000027310,ENSOGAG00000002357,ENSMICG00000007181,ENSCLAG00000001632,ENSSHAG00000005030,ENSCPOG00000032526,ENSEEUG00000004054,ENSTBEG00000011122,ENSOCUG00000010260,ENSSTOG00000012353,ENSMUGG00000021605,ENSCAPG00000016927,ENSMODG00000010000,ENSMUSG00000042129,ENSMAUG00000010012,ENSRNOG00000013526,ENSMSIG00000027994,MGP_CAROLIEiJ_G0028777
7,ENSG00000166436,ENSLAFG00000000423,ENSGGOG00000016600,ENSPTRG00000050097,ENSPPYG00000003506,ENSECAG00000021487,ENSPPAG00000037497,ENSTTRG00000004599,ENSMMUG00000021827,ENSMFAG00000002122,ENSMNEG00000040307,ENSAMEG00000004220,ENSUAMG00000026019,ENSHGLG00100005248,ENSRFEG00010003471,ENSVURG00010005315,ENSFCAG00000008406,ENSSSCG00000014567,ENSLCNG00005008366,ENSBGRG00000007026,ENSCAFG00030010325;ENSCAFG00000006919;ENSCAFG0...,ENSRBIG00000029540,ENSOARG00000014647,ENSCJAG00000011782,ENSOANG00000006570,ENSDNOG00000019758,ENSPCIG00000005815,ENSCHIG00000013414,ENSSSUG00005013450,ENSBTAG00000020890,ENSANAG00000036957,ENSOGAG00000011436,ENSMICG00000002287,ENSCLAG00000016612,ENSSHAG00000017621,ENSCPOG00000007602,ENSEEUG00000001614,ENSTBEG00000001707,ENSOCUG00000005433,ENSSTOG00000001206,ENSMUGG00000020187,ENSCAPG00000009033,ENSMODG00000007442,ENSMUSG00000031026,ENSMAUG00000020247,ENSRNOG00000014373,ENSMSIG00000027247,MGP_CAROLIEiJ_G0030308
8,ENSG00000169189,ENSLAFG00000018542,ENSGGOG00000006501,ENSPTRG00000007912,ENSPPYG00000007212,ENSECAG00000024006,ENSPPAG00000043297,ENSTTRG00000008983,ENSMMUG00000015832,ENSMFAG00000041758,ENSMNEG00000031272,ENSAMEG00000004050,ENSUAMG00000019553,ENSHGLG00100002681,ENSRFEG00010017834,ENSVURG00010007211,ENSFCAG00000007733,ENSSSCG00000036275,ENSLCNG00005016574,ENSBGRG00000018739,ENSCAFG00000017462;ENSCAFG00030023301;ENSCAFG0...,ENSRBIG00000030369,ENSOARG00000018797,ENSCJAG00000020515,ENSOANG00000010987,ENSDNOG00000013271,ENSPCIG00000020451,ENSCHIG00000019879,ENSSSUG00005020060,ENSBTAG00000002176,ENSANAG00000030899,ENSOGAG00000004831,ENSMICG00000009548,ENSCLAG00000013740,ENSSHAG00000009856,ENSCPOG00000002283,ENSEEUG00000003359,ENSTBEG00000011415,ENSOCUG00000001500,ENSSTOG00000026576,ENSMUGG00000008963,ENSCAPG00000005451,ENSMODG00000015463,ENSMUSG00000030750,ENSMAUG00000011302,ENSRNOG00000015218,ENSMSIG00000017987,MGP_CAROLIEiJ_G0030433
9,ENSG00000183696,ENSLAFG00000000594,ENSGGOG00000028074,ENSPTRG00000045643;ENSPTRG00000019177,ENSPPYG00000017568,ENSECAG00000008193,ENSPPAG00000034370,ENSTTRG00000012047,ENSMMUG00000007037,ENSMFAG00000003183,ENSMNEG00000031273,ENSAMEG00000004732,ENSUAMG00000014876,ENSHGLG00100008123,ENSRFEG00010012811,ENSVURG00010014378,ENSFCAG00000011892,ENSSSCG00000030388,ENSLCNG00005004248,ENSBGRG00000021024,ENSCAFG00040015238;ENSCAFG00030009353;ENSCAFG0...,ENSRBIG00000038996,ENSOARG00000002061;ENSOARG00000016081,ENSCJAG00000014682,ENSOANG00000003089,ENSDNOG00000010163,ENSPCIG00000013786;ENSPCIG00000029427,ENSCHIG00000014663,ENSSSUG00005001092,ENSBTAG00000008428,ENSANAG00000021741,ENSOGAG00000016469,ENSMICG00000015020,ENSCLAG00000010724,ENSSHAG00000005832,ENSCPOG00000025236,ENSEEUG00000004386,ENSTBEG00000010143,ENSOCUG00000014902,ENSSTOG00000005502,ENSMUGG00000004256,ENSCAPG00000011085,ENSMODG00000009417,ENSMUSG00000020407,ENSMAUG00000009149,ENSRNOG00000004972,ENSMSIG00000010602,MGP_CAROLIEiJ_G0016091


In [14]:
pro = pd.read_csv("/data/species/pro.tsv",sep="\t")
anti = pd.read_csv("/data/species/anti.tsv",sep="\t")
pro_t = i_pro.reset_index().merge(pro, on="Homo_sapiens").set_index("symbol").T.reset_index().rename(columns={"index": "species"})
anti_t = i_anti.reset_index().merge(anti, on="Homo_sapiens").set_index("symbol").T.reset_index().rename(columns={"index": "species"})
anti_t

symbol,species,TWNK,BRAP,C6orf89,MTCP1,SIDT2,IGSF10,CA3,LIAS,RXRB,RWDD1,SMPDL3A,ACTC1,COX5B,CDADC1,CTPS2,ADPRM,ABHD16A
0,Homo_sapiens,ENSG00000107815,ENSG00000089234,ENSG00000198663,ENSG00000214827,ENSG00000149577,ENSG00000152580,ENSG00000164879,ENSG00000121897,ENSG00000204231,ENSG00000111832,ENSG00000172594,ENSG00000159251,ENSG00000135940,ENSG00000102543,ENSG00000047230,ENSG00000170222,ENSG00000204427
1,Loxodonta_africana,ENSLAFG00000015859,ENSLAFG00000006701,ENSLAFG00000000208,ENSLAFG00000028512,ENSLAFG00000030954,ENSLAFG00000000038,ENSLAFG00000004404,ENSLAFG00000012023,ENSLAFG00000002141,ENSLAFG00000027334,ENSLAFG00000010744,ENSLAFG00000021518,ENSLAFG00000009386,ENSLAFG00000012861,ENSLAFG00000013533,ENSLAFG00000008416,ENSLAFG00000009495
2,Gorilla_gorilla,ENSGGOG00000015421,ENSGGOG00000007087,ENSGGOG00000013941,ENSGGOG00000037091,ENSGGOG00000014318,ENSGGOG00000002875,ENSGGOG00000010673,ENSGGOG00000014977,ENSGGOG00000009823,ENSGGOG00000026889,ENSGGOG00000011523,ENSGGOG00000023984,ENSGGOG00000001142,ENSGGOG00000005192,ENSGGOG00000006951,ENSGGOG00000009051,ENSGGOG00000009970
3,Pan_troglodytes,ENSPTRG00000029751,ENSPTRG00000005461,ENSPTRG00000024272,ENSPTRG00000022457,ENSPTRG00000004319,ENSPTRG00000015539,ENSPTRG00000052102,ENSPTRG00000015994,ENSPTRG00000018034,ENSPTRG00000018534,ENSPTRG00000018565,ENSPTRG00000006890,ENSPTRG00000012257;ENSPTRG00000048245,ENSPTRG00000005871,ENSPTRG00000045235;ENSPTRG00000021698,ENSPTRG00000008775,ENSPTRG00000017977
4,Pongo_pygmaeus,ENSPPYG00000002576,ENSPPYG00000004964,ENSPPYG00000016555,ENSPPYG00000020898,ENSPPYG00000003908,ENSPPYG00000014216,ENSPPYG00000018717,ENSPPYG00000014676,ENSPPYG00000017239,ENSPPYG00000016953,ENSPPYG00000016980,ENSPPYG00000006318,ENSPPYG00000012078,ENSPPYG00000005365,ENSPPYG00000020154,ENSPPYG00000007985,ENSPPYG00000016450
5,Equus_caballus,ENSECAG00000017734,ENSECAG00000007549,ENSECAG00000024122,ENSECAG00000043072,ENSECAG00000021466,ENSECAG00000023161,ENSECAG00000016228,ENSECAG00000015796,ENSECAG00000023041,ENSECAG00000006587,ENSECAG00000023408,ENSECAG00000013022,ENSECAG00000007968,ENSECAG00000020688,ENSECAG00000017669,ENSECAG00000025154,ENSECAG00000013375
6,Pan_paniscus,ENSPPAG00000035055,ENSPPAG00000042331,ENSPPAG00000042318,ENSPPAG00000029273,ENSPPAG00000029761,ENSPPAG00000033391,ENSPPAG00000043034,ENSPPAG00000040519,ENSPPAG00000038590,ENSPPAG00000035047,ENSPPAG00000013616,ENSPPAG00000029795,ENSPPAG00000041629,ENSPPAG00000028450,ENSPPAG00000031030,ENSPPAG00000043053,ENSPPAG00000040009
7,Tursiops_truncatus,ENSTTRG00000015112,ENSTTRG00000010873,ENSTTRG00000002299,ENSTTRG00000008159,ENSTTRG00000000973,ENSTTRG00000003206,ENSTTRG00000005803,ENSTTRG00000014293,ENSTTRG00000004291,ENSTTRG00000016549,ENSTTRG00000000252,ENSTTRG00000014799,ENSTTRG00000013490,ENSTTRG00000004052,ENSTTRG00000013328,ENSTTRG00000000069,ENSTTRG00000014105
8,Macaca_mulatta,ENSMMUG00000064033,ENSMMUG00000017107,ENSMMUG00000005004,ENSMMUG00000010251,ENSMMUG00000002053,ENSMMUG00000001107,ENSMMUG00000014890,ENSMMUG00000013851,ENSMMUG00000003858,ENSMMUG00000007415,ENSMMUG00000054098,ENSMMUG00000007027,ENSMMUG00000050877,ENSMMUG00000013470,ENSMMUG00000004797,ENSMMUG00000057593,ENSMMUG00000015686
9,Macaca_fascicularis,ENSMFAG00000032824,ENSMFAG00000036173,ENSMFAG00000042966,ENSMFAG00000001317,ENSMFAG00000032882,ENSMFAG00000002055,ENSMFAG00000000412,ENSMFAG00000040861,ENSMFAG00000003583,ENSMFAG00000042598,ENSMFAG00000043472,ENSMFAG00000039604,ENSMFAG00000034597;ENSMFAG00000036411;ENSMFAG0...,ENSMFAG00000032976,ENSMFAG00000009688,ENSMFAG00000042103,ENSMFAG00000000198


In [10]:
pro_genes = species.sort_values("lifespan", ascending=False).merge(pro_t.rename(columns={"symbol": "species"}),on="species", how="outer").set_index("species")
pro_genes

Unnamed: 0_level_0,common_name,lifespan,METTL5,TRIM66,ADAMTS13,DCTD,NUCKS1,SPATA20,ARMC12,CLDN16,NAALADL1,RASSF4,PDE6B,MYOM1,TMEM182,NSMCE1,POLE,UPP1,SUMO1,CPVL,MOV10L1,MBD4,AC020929.1
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Homo_sapiens,Human,122.5,ENSG00000138382,ENSG00000166436,ENSG00000160323,ENSG00000129187,ENSG00000069275,ENSG00000006282,ENSG00000157343,ENSG00000113946,ENSG00000168060,ENSG00000107551,ENSG00000133256,ENSG00000101605,ENSG00000170417,ENSG00000169189,ENSG00000177084,ENSG00000183696,ENSG00000116030,ENSG00000106066,ENSG00000073146,ENSG00000129071,ENSG00000160460
Loxodonta_africana,African bush elephant,65.0,ENSLAFG00000018044,ENSLAFG00000000423,ENSLAFG00000011278,ENSLAFG00000018439,ENSLAFG00000021663,ENSLAFG00000022872,ENSLAFG00000032578,ENSLAFG00000008037,ENSLAFG00000001142,ENSLAFG00000012041,ENSLAFG00000009245,ENSLAFG00000014655,ENSLAFG00000011215,ENSLAFG00000018542,ENSLAFG00000010802,ENSLAFG00000000594,ENSLAFG00000022855,ENSLAFG00000002811,ENSLAFG00000014159,ENSLAFG00000010609,ENSLAFG00000020815
Gorilla_gorilla,Gorilla,60.1,ENSGGOG00000022484,ENSGGOG00000016600,ENSGGOG00000012541,ENSGGOG00000016412,ENSGGOG00000039124,ENSGGOG00000003379,ENSGGOG00000015515,ENSGGOG00000025366,ENSGGOG00000002695,ENSGGOG00000012833,ENSGGOG00000005730,ENSGGOG00000044057,ENSGGOG00000037042,ENSGGOG00000006501,ENSGGOG00000010005,ENSGGOG00000028074,ENSGGOG00000026439,ENSGGOG00000009483,ENSGGOG00000010110,ENSGGOG00000010698,ENSGGOG00000004627
Pan_troglodytes,Chimpanzee,59.4,ENSPTRG00000012621,ENSPTRG00000050097,ENSPTRG00000022942,ENSPTRG00000016624,ENSPTRG00000048501,ENSPTRG00000009404,ENSPTRG00000018089,ENSPTRG00000015739,ENSPTRG00000003859,ENSPTRG00000002452,ENSPTRG00000046026,ENSPTRG00000009834,ENSPTRG00000012308,ENSPTRG00000007912,ENSPTRG00000045365,ENSPTRG00000045643;ENSPTRG00000019177,ENSPTRG00000012817,ENSPTRG00000019029,ENSPTRG00000044273,ENSPTRG00000015377,ENSPTRG00000011000
Equus_caballus,Horse,57.0,ENSECAG00000004756,ENSECAG00000021487,ENSECAG00000008699,ENSECAG00000017516;ENSECAG00000000947,ENSECAG00000010336,ENSECAG00000008281,ENSECAG00000011493,ENSECAG00000021850,ENSECAG00000018798,ENSECAG00000007066,ENSECAG00000017337,ENSECAG00000013468,ENSECAG00000028489,ENSECAG00000024006,ENSECAG00000001653,ENSECAG00000008193,ENSECAG00000024693,ENSECAG00000024795,ENSECAG00000011063,ENSECAG00000022106,ENSECAG00000042883
Pan_paniscus,Pygmy chimpanzee or bonobo,55.0,ENSPPAG00000031600,ENSPPAG00000037497,ENSPPAG00000030531,ENSPPAG00000022425,ENSPPAG00000031082,ENSPPAG00000031622,ENSPPAG00000005870,ENSPPAG00000031653,ENSPPAG00000043800,ENSPPAG00000029784,ENSPPAG00000037791,ENSPPAG00000037926,ENSPPAG00000038490,ENSPPAG00000043297,ENSPPAG00000034043,ENSPPAG00000034370,ENSPPAG00000039520,ENSPPAG00000039137,ENSPPAG00000029459,ENSPPAG00000036528,ENSPPAG00000042534
Tursiops_truncatus,Bottlenosed dolphin,51.6,ENSTTRG00000002154,ENSTTRG00000004599,ENSTTRG00000007685,ENSTTRG00000001460,ENSTTRG00000010469,ENSTTRG00000000215,ENSTTRG00000014321,ENSTTRG00000011535,ENSTTRG00000000326,ENSTTRG00000011719,ENSTTRG00000002928,ENSTTRG00000008310,ENSTTRG00000011185,ENSTTRG00000008983,ENSTTRG00000010629,ENSTTRG00000012047,ENSTTRG00000000642,ENSTTRG00000014785,ENSTTRG00000000879,ENSTTRG00000011265,ENSTTRG00000007143
Macaca_mulatta,Rhesus monkey,40.0,ENSMMUG00000023054,ENSMMUG00000021827,ENSMMUG00000000937,ENSMMUG00000000284,ENSMMUG00000055835,ENSMMUG00000017252,ENSMMUG00000059809,ENSMMUG00000018671,ENSMMUG00000012315,ENSMMUG00000017489,ENSMMUG00000003282,ENSMMUG00000019892,ENSMMUG00000004169,ENSMMUG00000015832,ENSMMUG00000015463,ENSMMUG00000007037,ENSMMUG00000005240,ENSMMUG00000020184,ENSMMUG00000009120,ENSMMUG00000012723,ENSMMUG00000020796;ENSMMUG00000051535
Macaca_fascicularis,Long-tailed macaque,39.0,ENSMFAG00000046280,ENSMFAG00000002122,ENSMFAG00000000040,ENSMFAG00000034653,ENSMFAG00000041134,ENSMFAG00000044525,ENSMFAG00000038125,ENSMFAG00000043262,ENSMFAG00000035435,ENSMFAG00000031803,ENSMFAG00000039102,ENSMFAG00000041420,ENSMFAG00000036897,ENSMFAG00000041758,ENSMFAG00000039401,ENSMFAG00000003183,ENSMFAG00000030831,ENSMFAG00000000559,ENSMFAG00000035651,ENSMFAG00000002266,ENSMFAG00000034571
Macaca_nemestrina,Pigtail macaque,37.6,ENSMNEG00000037870,ENSMNEG00000040307,ENSMNEG00000034336,ENSMNEG00000033220,ENSMNEG00000012609,ENSMNEG00000037398,ENSMNEG00000041244,ENSMNEG00000030636,ENSMNEG00000028607,ENSMNEG00000032496,ENSMNEG00000045444,ENSMNEG00000029047,ENSMNEG00000000448,ENSMNEG00000031272,ENSMNEG00000035160,ENSMNEG00000031273,ENSMNEG00000014372,ENSMNEG00000031185,ENSMNEG00000036671,ENSMNEG00000029752,ENSMNEG00000006099


In [15]:
anti_genes = species.sort_values("lifespan", ascending=False).merge(anti_t.rename(columns={"symbol": "species"}),on="species", how="outer").set_index("species")
anti_genes

Unnamed: 0_level_0,common_name,lifespan,TWNK,BRAP,C6orf89,MTCP1,SIDT2,IGSF10,CA3,LIAS,RXRB,RWDD1,SMPDL3A,ACTC1,COX5B,CDADC1,CTPS2,ADPRM,ABHD16A
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Homo_sapiens,Human,122.5,ENSG00000107815,ENSG00000089234,ENSG00000198663,ENSG00000214827,ENSG00000149577,ENSG00000152580,ENSG00000164879,ENSG00000121897,ENSG00000204231,ENSG00000111832,ENSG00000172594,ENSG00000159251,ENSG00000135940,ENSG00000102543,ENSG00000047230,ENSG00000170222,ENSG00000204427
Loxodonta_africana,African bush elephant,65.0,ENSLAFG00000015859,ENSLAFG00000006701,ENSLAFG00000000208,ENSLAFG00000028512,ENSLAFG00000030954,ENSLAFG00000000038,ENSLAFG00000004404,ENSLAFG00000012023,ENSLAFG00000002141,ENSLAFG00000027334,ENSLAFG00000010744,ENSLAFG00000021518,ENSLAFG00000009386,ENSLAFG00000012861,ENSLAFG00000013533,ENSLAFG00000008416,ENSLAFG00000009495
Gorilla_gorilla,Gorilla,60.1,ENSGGOG00000015421,ENSGGOG00000007087,ENSGGOG00000013941,ENSGGOG00000037091,ENSGGOG00000014318,ENSGGOG00000002875,ENSGGOG00000010673,ENSGGOG00000014977,ENSGGOG00000009823,ENSGGOG00000026889,ENSGGOG00000011523,ENSGGOG00000023984,ENSGGOG00000001142,ENSGGOG00000005192,ENSGGOG00000006951,ENSGGOG00000009051,ENSGGOG00000009970
Pan_troglodytes,Chimpanzee,59.4,ENSPTRG00000029751,ENSPTRG00000005461,ENSPTRG00000024272,ENSPTRG00000022457,ENSPTRG00000004319,ENSPTRG00000015539,ENSPTRG00000052102,ENSPTRG00000015994,ENSPTRG00000018034,ENSPTRG00000018534,ENSPTRG00000018565,ENSPTRG00000006890,ENSPTRG00000012257;ENSPTRG00000048245,ENSPTRG00000005871,ENSPTRG00000045235;ENSPTRG00000021698,ENSPTRG00000008775,ENSPTRG00000017977
Equus_caballus,Horse,57.0,ENSECAG00000017734,ENSECAG00000007549,ENSECAG00000024122,ENSECAG00000043072,ENSECAG00000021466,ENSECAG00000023161,ENSECAG00000016228,ENSECAG00000015796,ENSECAG00000023041,ENSECAG00000006587,ENSECAG00000023408,ENSECAG00000013022,ENSECAG00000007968,ENSECAG00000020688,ENSECAG00000017669,ENSECAG00000025154,ENSECAG00000013375
Pan_paniscus,Pygmy chimpanzee or bonobo,55.0,ENSPPAG00000035055,ENSPPAG00000042331,ENSPPAG00000042318,ENSPPAG00000029273,ENSPPAG00000029761,ENSPPAG00000033391,ENSPPAG00000043034,ENSPPAG00000040519,ENSPPAG00000038590,ENSPPAG00000035047,ENSPPAG00000013616,ENSPPAG00000029795,ENSPPAG00000041629,ENSPPAG00000028450,ENSPPAG00000031030,ENSPPAG00000043053,ENSPPAG00000040009
Tursiops_truncatus,Bottlenosed dolphin,51.6,ENSTTRG00000015112,ENSTTRG00000010873,ENSTTRG00000002299,ENSTTRG00000008159,ENSTTRG00000000973,ENSTTRG00000003206,ENSTTRG00000005803,ENSTTRG00000014293,ENSTTRG00000004291,ENSTTRG00000016549,ENSTTRG00000000252,ENSTTRG00000014799,ENSTTRG00000013490,ENSTTRG00000004052,ENSTTRG00000013328,ENSTTRG00000000069,ENSTTRG00000014105
Macaca_mulatta,Rhesus monkey,40.0,ENSMMUG00000064033,ENSMMUG00000017107,ENSMMUG00000005004,ENSMMUG00000010251,ENSMMUG00000002053,ENSMMUG00000001107,ENSMMUG00000014890,ENSMMUG00000013851,ENSMMUG00000003858,ENSMMUG00000007415,ENSMMUG00000054098,ENSMMUG00000007027,ENSMMUG00000050877,ENSMMUG00000013470,ENSMMUG00000004797,ENSMMUG00000057593,ENSMMUG00000015686
Macaca_fascicularis,Long-tailed macaque,39.0,ENSMFAG00000032824,ENSMFAG00000036173,ENSMFAG00000042966,ENSMFAG00000001317,ENSMFAG00000032882,ENSMFAG00000002055,ENSMFAG00000000412,ENSMFAG00000040861,ENSMFAG00000003583,ENSMFAG00000042598,ENSMFAG00000043472,ENSMFAG00000039604,ENSMFAG00000034597;ENSMFAG00000036411;ENSMFAG0...,ENSMFAG00000032976,ENSMFAG00000009688,ENSMFAG00000042103,ENSMFAG00000000198
Macaca_nemestrina,Pigtail macaque,37.6,ENSMNEG00000041560,ENSMNEG00000039413,ENSMNEG00000029104,ENSMNEG00000041016,ENSMNEG00000032672,ENSMNEG00000041478,ENSMNEG00000030535,ENSMNEG00000005114,ENSMNEG00000034252,ENSMNEG00000045094,ENSMNEG00000034155,ENSMNEG00000028712,ENSMNEG00000031925;ENSMNEG00000034168,ENSMNEG00000018746,ENSMNEG00000036991,ENSMNEG00000032238,ENSMNEG00000034721


In [79]:
pro_genes.to_csv("/data/species/pro_genes.tsv",sep="\t",  index=True)
anti_genes.to_csv("/data/species/anti_genes.tsv",sep="\t",  index=True)
