In [2]:
from typing import *
from yspecies import *
from yspecies.enums import *
from yspecies.dataset import *
from yspecies.misc import *

In [3]:
from dataclasses import dataclass
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
#settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import pprint
pp = pprint.PrettyPrinter(indent=4)

### Parameters ###
Will be overriden by comparisons.yaml

In [5]:
#Parameters
threshold = 0.3


### Load pathes ###

In [6]:
from pathlib import Path
locations: Locations = Locations("./") if Path("./data").exists() else Locations("../")

In [7]:
external = shap_results = locations.output / "external" 
shap = external / "shap"
linear = external / "linear"
causality = external / "causality"

### Load linear ###

In [8]:
def lag_linear(df: pd.DataFrame, sign: int)->pd.DataFrame:
    selected = df[df["maxlifespan_sign"]==sign][["symbol","organ","maxlifespan_adjpval","maxlifespan_r2_adj"]].sort_values(by="maxlifespan_r2_adj", ascending=False).drop_duplicates()
    selected["organ"] = selected.apply(lambda row: row["organ"] + " (" + str(round(row["maxlifespan_r2_adj"],4)) +")", axis=1)
    return selected[["symbol","organ"]].groupby("symbol", as_index=False).agg({'organ': ', '.join})


def pro_linear(df: pd.DataFrame)->pd.DataFrame:
    return lag_linear(df, 1)
   
def anti_linear(df: pd.DataFrame)->pd.DataFrame:
    return lag_linear(df, -1)


In [9]:
linear_organ = load_table(linear / 'with_organ_indicator_columns.tsv').set_index("reference_gene")
anti_linear_organ = anti_linear(linear_organ)
pro_linear_organ = pro_linear(linear_organ)
pro_linear_organ.head(10)


Unnamed: 0,symbol,organ
0,A4GALT,"Lung (0.6232), Heart (0.5311)"
1,AAAS,Liver (0.3573)
2,AASS,"Lung (0.4628), Heart (0.322)"
3,ABCB10,Lung (0.4792)
4,ABCC10,Lung (0.4181)
5,ABHD3,Lung (0.5483)
6,ABLIM3,Liver (0.5392)
7,AC129492.1,Liver (0.3644)
8,ACADSB,Lung (0.3026)
9,ACAP1,"Brain (0.4236), Heart (0.3024)"


In [10]:
#linear_blk = load_table(linear /'Significant in B,L,K.tsv').set_index("reference_gene")
#linear_models = load_table(linear / 'linear_models_on_species_vars.tsv').set_index("reference_gene")
#linear_all = load_table(linear / 'Significant in all organs.tsv').set_index("reference_gene")
#linear_pro = linear_all[linear_all["maxlifespan_sign"]==1].sort_values(by="maxlifespan_r2_adj", ascending = False)
#linear_anti = linear_all[linear_all["maxlifespan_sign"]==-1].sort_values(by="maxlifespan_r2_adj", ascending = True)

### Load shap ###

In [11]:
def pro_genes(df: pd.DataFrame, threshold:float = 0.3) -> pd.DataFrame:
    return df[df["kendall_tau_to_max_lifespan"] >= threshold].sort_values(by="kendall_tau_to_max_lifespan", ascending=False)

def lifehistory(row, extra: list, threshold: float):
    def get(col: str):
        return col +" (" + str(round(row[col], 4))+")" if abs(row[col])>=abs(threshold) else ""
    return ', '.join([get(e).replace("kendall_tau_to_", "") for e in extra if get(e) != ""])

def pro(df: pd.DataFrame, threshold:float = 0.3) -> pd.DataFrame:
    main = ['ids','name', 'gain_score_to_max_lifespan', 'kendall_tau_to_max_lifespan']
    extra = ["kendall_tau_to_gestation_days", "kendall_tau_to_mass_g", "kendall_tau_to_temperature_celsius", "kendall_tau_to_metabolic_rate"]
    genes =  pro_genes(df, threshold)[main]    
    genes["life_history"] = df.apply(lambda row: lifehistory(row, extra, threshold), axis=1)
    return genes

def anti_genes(df: pd.DataFrame, threshold:float = -0.3) -> pd.DataFrame:
    return df[df["kendall_tau_to_max_lifespan"]<=threshold].sort_values(by="kendall_tau_to_max_lifespan", ascending=True)

def anti(df: pd.DataFrame, threshold:float = -0.3) -> pd.DataFrame:
    main = ['ids','name', 'gain_score_to_max_lifespan', 'kendall_tau_to_max_lifespan']
    extra = ["kendall_tau_to_gestation_days", "kendall_tau_to_mass_g", "kendall_tau_to_temperature_celsius", "kendall_tau_to_metabolic_rate"]   
    genes = anti_genes(df, threshold)[main]    
    genes["life_history_kendal_tau"] = df.apply(lambda row: lifehistory(row, extra, threshold) , axis=1)
    return genes

In [12]:
anton_5_5 = pd.read_csv(shap / "5_tissues_anton_species_5_bootstraps.csv", index_col=0)
eugen_5_5 = pd.read_csv(shap / "5_tissues_data_11_06_eugene_species.csv", index_col=0)
anton_7_4 = pd.read_csv(shap / "7_tissues_anton_species_4_bootstraps.csv", index_col=0)
anton_7_5 = pd.read_csv(shap / "7_tissues_anton_species_5_bootstraps.csv", index_col=0)
current = anton_5_5
pro(current, threshold)

Unnamed: 0,ids,name,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history
89,ENSG00000069275,NUCKS1,2750.898,0.715,gestation_days (0.5525)
14,ENSG00000160323,ADAMTS13,4504.608,0.693,
39,ENSG00000129187,DCTD,83059.823,0.64,gestation_days (0.7391)
53,ENSG00000146215,CRIP3,1580.386,0.639,
6,ENSG00000157343,ARMC12,14611.846,0.637,gestation_days (0.6541)
75,ENSG00000116030,SUMO1,1250.186,0.62,"gestation_days (0.7219), temperature_celsius (..."
65,ENSG00000148175,STOM,1011.608,0.607,
43,ENSG00000006282,SPATA20,67496.807,0.593,"gestation_days (0.6627), mass_g (0.5261), meta..."
45,ENSG00000073146,MOV10L1,4217.064,0.584,
25,ENSG00000168060,NAALADL1,4438.045,0.58,


In [13]:
anti(current, -threshold)

Unnamed: 0,ids,name,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history_kendal_tau
9,ENSG00000198663,C6orf89,7371.253,-0.746,gestation_days (-0.682)
41,ENSG00000107815,TWNK,29538.657,-0.717,
92,ENSG00000147123,NDUFB11,7288.644,-0.703,
79,ENSG00000085840,ORC1,17409.994,-0.589,
1,ENSG00000149577,SIDT2,17588.014,-0.54,
40,ENSG00000185271,KLHL33,5888.169,-0.525,
0,ENSG00000214827,MTCP1,36295.636,-0.484,
10,ENSG00000132646,PCNA,319.946,-0.439,
3,ENSG00000152580,IGSF10,5499.678,-0.397,


### Causality ###

### Intesection ###

In [14]:
causal_anton_5_5 = pd.read_csv(causality / "causal_relations_5_tissues_species_5.csv")
fathers = (causal_anton_5_5[causal_anton_5_5["predicate"] == "is father of"])[["node1"]].drop_duplicates().rename(columns={"node1": "name"})
fathers

Unnamed: 0,name
0,PICALM
8,SPATA20
14,NUCKS1
18,HNRNPM
28,DCTD
34,LIMD2
36,NDUFB11
37,STOM
39,SIDT2
41,ADAMTS13


### Causal merging of pro and anti longevity genes ###

In [15]:
pro_causal_merge = pro(current).merge(fathers, on="name", how="left", indicator = True).sort_values(by="_merge", ascending=False)
pro_causal_merge["_merge"] = pro_causal_merge["_merge"]=="both"
pro_causal = pro_causal_merge \
  .rename(columns={"_merge": "causal", "ids": "reference_gene", "name": "symbol"}) \
  .set_index("reference_gene") \
  .sort_values(by="kendall_tau_to_max_lifespan", ascending=False)
pro_causal

Unnamed: 0_level_0,symbol,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,causal
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000069275,NUCKS1,2750.898,0.715,gestation_days (0.5525),True
ENSG00000160323,ADAMTS13,4504.608,0.693,,True
ENSG00000129187,DCTD,83059.823,0.64,gestation_days (0.7391),True
ENSG00000146215,CRIP3,1580.386,0.639,,False
ENSG00000157343,ARMC12,14611.846,0.637,gestation_days (0.6541),False
ENSG00000116030,SUMO1,1250.186,0.62,"gestation_days (0.7219), temperature_celsius (...",False
ENSG00000148175,STOM,1011.608,0.607,,True
ENSG00000006282,SPATA20,67496.807,0.593,"gestation_days (0.6627), mass_g (0.5261), meta...",True
ENSG00000073146,MOV10L1,4217.064,0.584,,False
ENSG00000168060,NAALADL1,4438.045,0.58,,False


In [16]:
anti_causal_merge = anti(current).merge(fathers, on="name", how="left", indicator = True).sort_values(by="_merge", ascending=False)
anti_causal_merge["_merge"] = anti_causal_merge["_merge"]=="both"
anti_causal = anti_causal_merge \
  .rename(columns={"_merge": "causal", "ids": "reference_gene", "name": "symbol"}) \
  .set_index("reference_gene") \
  .sort_values(by="kendall_tau_to_max_lifespan", ascending=False)
anti_causal

Unnamed: 0_level_0,symbol,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history_kendal_tau,causal
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000152580,IGSF10,5499.678,-0.397,,False
ENSG00000132646,PCNA,319.946,-0.439,,False
ENSG00000214827,MTCP1,36295.636,-0.484,,True
ENSG00000185271,KLHL33,5888.169,-0.525,,True
ENSG00000149577,SIDT2,17588.014,-0.54,,True
ENSG00000085840,ORC1,17409.994,-0.589,,False
ENSG00000147123,NDUFB11,7288.644,-0.703,,True
ENSG00000107815,TWNK,29538.657,-0.717,,False
ENSG00000198663,C6orf89,7371.253,-0.746,gestation_days (-0.682),True


### Join with linear models ###

In [17]:
def shap_with_linear(shap_causal: pd.DataFrame, linear_organ: pd.DataFrame):
  return shap_causal.reset_index().merge(linear_organ,on="symbol", how="left") \
    .rename(columns={"organ": "organs (r^2) in linear models", "life_history": "life_history_kendal_tau", "kendall_tau_to_max_lifespan": "MLS_kendall_tau", "gain_score_to_max_lifespan": "MLS_gain_score"}) \
    .sort_values(by="MLS_kendall_tau", ascending = False) \
    .set_index("reference_gene")

In [18]:
pro_all =  shap_with_linear(pro_causal, pro_linear_organ)
pro_all

Unnamed: 0_level_0,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ENSG00000069275,NUCKS1,2750.898,0.715,gestation_days (0.5525),True,Liver (0.4727)
ENSG00000160323,ADAMTS13,4504.608,0.693,,True,Kidney (0.3314)
ENSG00000129187,DCTD,83059.823,0.64,gestation_days (0.7391),True,"Liver (0.6644), Brain (0.3519), Heart (0.341),..."
ENSG00000146215,CRIP3,1580.386,0.639,,False,
ENSG00000157343,ARMC12,14611.846,0.637,gestation_days (0.6541),False,
ENSG00000116030,SUMO1,1250.186,0.62,"gestation_days (0.7219), temperature_celsius (...",False,
ENSG00000148175,STOM,1011.608,0.607,,True,"Liver (0.5172), Brain (0.4164), Heart (0.3429)"
ENSG00000006282,SPATA20,67496.807,0.593,"gestation_days (0.6627), mass_g (0.5261), meta...",True,"Heart (0.6114), Lung (0.5763), Brain (0.5311),..."
ENSG00000073146,MOV10L1,4217.064,0.584,,False,Brain (0.3554)
ENSG00000168060,NAALADL1,4438.045,0.58,,False,Lung (0.3785)


In [19]:
anti_all =  shap_with_linear(anti_causal, anti_linear_organ).sort_values("MLS_kendall_tau")
anti_all

Unnamed: 0_level_0,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ENSG00000198663,C6orf89,7371.253,-0.746,gestation_days (-0.682),True,"Liver (0.3951), Kidney (0.3338)"
ENSG00000107815,TWNK,29538.657,-0.717,,False,
ENSG00000147123,NDUFB11,7288.644,-0.703,,True,
ENSG00000085840,ORC1,17409.994,-0.589,,False,
ENSG00000149577,SIDT2,17588.014,-0.54,,True,Liver (0.4537)
ENSG00000185271,KLHL33,5888.169,-0.525,,True,
ENSG00000214827,MTCP1,36295.636,-0.484,,True,"Brain (0.4052), Kidney (0.3005)"
ENSG00000132646,PCNA,319.946,-0.439,,False,Brain (0.3307)
ENSG00000152580,IGSF10,5499.678,-0.397,,False,


In [20]:
pro(anton_7_4)

Unnamed: 0,ids,name,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history
14,ENSG00000051620,HEBP2,3409.465,0.745,gestation_days (0.6874)
66,ENSG00000135617,PRADC1,248.13,0.728,gestation_days (0.5362)
15,ENSG00000160323,ADAMTS13,792.952,0.706,
48,ENSG00000129187,DCTD,55370.239,0.701,gestation_days (0.724)
18,ENSG00000139344,AMDHD1,567.738,0.685,gestation_days (0.4453)
68,ENSG00000117697,NSL1,30778.72,0.672,gestation_days (0.7108)
86,ENSG00000120129,DUSP1,1283.946,0.661,
41,ENSG00000166436,TRIM66,271.514,0.644,
119,ENSG00000069275,NUCKS1,4716.805,0.626,gestation_days (0.5242)
71,ENSG00000146215,CRIP3,558.849,0.61,


In [21]:
intersections = locations.output / "intersections"
pro_all.to_csv(intersections / "pro_intersections.tsv", sep = "\t", index = True)
anti_all.to_csv(intersections / "anti_intersections.tsv", sep = "\t", index = True)

In [22]:
pro_all_ext = pro_all.merge(pro(anton_7_4).rename(columns={"ids": "reference_gene", "name": "symbol"}), \
  on=["reference_gene", "symbol"], how="outer", indicator = True) \
  .sort_values(by="_merge", ascending=False)
pro_all_ext.to_csv(intersections / "pro_7_4_intersections.tsv", sep = "\t", index = False)
pro_all_ext

Unnamed: 0,reference_gene,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge
0,ENSG00000069275,NUCKS1,2750.898,0.715,gestation_days (0.5525),True,Liver (0.4727),4716.805,0.626,gestation_days (0.5242),both
12,ENSG00000107551,RASSF4,39677.401,0.542,,False,"Brain (0.5112), Kidney (0.3926), Heart (0.3676...",26066.176,0.54,,both
20,ENSG00000083896,YTHDC1,190349.72,0.316,gestation_days (0.6478),False,,240204.36,0.422,gestation_days (0.5441),both
18,ENSG00000106066,CPVL,894.426,0.391,,False,Liver (0.3138),1027.814,0.506,,both
16,ENSG00000134308,YWHAQ,1079.746,0.438,,False,,2071.263,0.539,,both
1,ENSG00000160323,ADAMTS13,4504.608,0.693,,True,Kidney (0.3314),792.952,0.706,,both
14,ENSG00000133256,PDE6B,25037.261,0.515,,False,,19078.165,0.427,,both
13,ENSG00000166436,TRIM66,398.831,0.533,,True,Liver (0.4859),271.514,0.644,,both
15,ENSG00000099783,HNRNPM,477.721,0.458,,True,,2100.155,0.354,gestation_days (0.403),both
7,ENSG00000006282,SPATA20,67496.807,0.593,"gestation_days (0.6627), mass_g (0.5261), meta...",True,"Heart (0.6114), Lung (0.5763), Brain (0.5311),...",3156.56,0.593,gestation_days (0.6402),both


In [129]:
anti_all_ext_7_5 = anti_all.merge(anti(anton_7_5).rename(columns={"ids": "reference_gene", "name": "symbol"}), \
  on=["reference_gene", "symbol"], how="outer", indicator = True) \
  .sort_values(by="_merge", ascending=False)
anti_all_ext_7_5.to_csv(intersections / "anti_7_5_intersections.tsv", sep = "\t", index = False)
anti_all_ext_7_5

Unnamed: 0,reference_gene,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau_x,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history_kendal_tau_y,_merge
0,ENSG00000198663,C6orf89,7371.253,-0.746,gestation_days (-0.682),True,"Liver (0.3951), Kidney (0.3338)",14943.378,-0.687,"gestation_days (-0.6524), metabolic_rate (-0.682)",both
2,ENSG00000147123,NDUFB11,7288.644,-0.703,,True,,29831.91,-0.59,"gestation_days (-0.5214), temperature_celsius ...",both
4,ENSG00000149577,SIDT2,17588.014,-0.54,,True,Liver (0.4537),19962.663,-0.517,gestation_days (-0.5088),both
6,ENSG00000214827,MTCP1,36295.636,-0.484,,True,"Brain (0.4052), Kidney (0.3005)",13345.507,-0.708,,both
1,ENSG00000107815,TWNK,29538.657,-0.717,,False,,21700.948,-0.553,,both
12,ENSG00000171132,PRKCE,,,,,,1054.806,-0.536,,right_only
17,ENSG00000172594,SMPDL3A,,,,,,5948.801,-0.378,,right_only
16,ENSG00000107537,PHYH,,,,,,557.328,-0.38,,right_only
15,ENSG00000100478,AP4S1,,,,,,2550.805,-0.389,,right_only
14,ENSG00000107949,BCCIP,,,,,,830.966,-0.484,,right_only


## Genage annotations ##

In [121]:
genage_dir = locations.input.input / "annotations" / "genage"

genage_conversions = pd.read_csv(genage_dir  / "genage_conversion.tsv", sep="\t")
genage = genage_conversions[["Ensembl","Gene Symbol", "Organism", "Lifespan Effect", "Longevity Influence", "Method"]].drop_duplicates()
genage.head(10)

Unnamed: 0,Ensembl,Gene Symbol,Organism,Lifespan Effect,Longevity Influence,Method
0,YPL174C,NIP100,Saccharomyces cerevisiae,decrease,fitness,Deletion
1,YER091C,MET6,Saccharomyces cerevisiae,decrease,fitness,Deletion
2,YDR108W,TRS85,Saccharomyces cerevisiae,decrease,fitness,Deletion
4,YMR135C,GID8,Saccharomyces cerevisiae,decrease,fitness,Deletion
6,YCR024C-A,PMP1,Saccharomyces cerevisiae,decrease,fitness,Deletion
7,YDR181C,SAS4,Saccharomyces cerevisiae,increase,anti,Deletion
8,YJL210W,PEX2,Saccharomyces cerevisiae,decrease,fitness,Deletion
9,YNL079C,TPM1,Saccharomyces cerevisiae,decrease,fitness,Deletion
10,YJR127C,RSF2,Saccharomyces cerevisiae,decrease,fitness,Deletion
11,YMR307W,GAS1,Saccharomyces cerevisiae,decrease,fitness,Deletion


In [122]:
genage["Ensembl"].drop_duplicates().shape

(2090,)

In [123]:
genage_genes = pd.read_csv(genage_dir  / "genage_orthologs" / "all.tsv", sep="\t").rename(columns={"Homo_sapiens":"reference_gene"})
genage_genes.head(10)

Unnamed: 0,reference_gene,Caenorhabditis_elegans,Drosophila_melanogaster,Mus_musculus,Mesocricetus_auratus,Danio_rerio
0,ENSG00000242265,,,ENSMUSG00000092035,ENSMAUG00000018656,ENSDARG00000109342
1,ENSG00000139990,WBGene00011242,FBgn0250755,ENSMUSG00000049106,ENSMAUG00000004350,ENSDARG00000060320
2,ENSG00000073921,WBGene00006751,FBgn0086372,ENSMUSG00000039361,ENSMAUG00000011448,ENSDARG00000012866;ENSDARG00000014137
3,ENSG00000139687,WBGene00003020,,ENSMUSG00000022105,ENSMAUG00000020865,ENSDARG00000006782
4,ENSG00000119977,WBGene00017120,,ENSMUSG00000025008,ENSMAUG00000021808,
5,ENSG00000145592,,,,ENSMAUG00000000193,ENSDARG00000034291
6,ENSG00000242866,,,ENSMUSG00000033498,ENSMAUG00000018007,ENSDARG00000105391
7,ENSG00000135506,WBGene00018611,,ENSMUSG00000040462,ENSMAUG00000019379,ENSDARG00000020301
8,ENSG00000150687,,,ENSMUSG00000039405,ENSMAUG00000010049,
9,ENSG00000162426,,FBgn0035968;FBgn0267429,ENSMUSG00000039838,ENSMAUG00000021292,ENSDARG00000004302


In [124]:
genage_humanized = genage.head(0)
for c in genage_genes.columns[1:]:
    col = c.replace("_", " ")
    selected_genes = genage_genes[["reference_gene",c]].rename(columns = {c:"Ensembl"}).dropna()
    selected_genes["Ensembl"] = selected_genes.apply(lambda row: row["Ensembl"].split(";"),1)
    genage_org = genage[genage["Organism"]==col]
    merged = selected_genes.explode("Ensembl").merge(genage_org, on="Ensembl", how="inner")
    genage_humanized = pd.concat([genage_humanized, merged])
genage_humanized = genage_humanized.drop_duplicates()
genage_humanized.head(10)

Unnamed: 0,Ensembl,Gene Symbol,Organism,Lifespan Effect,Longevity Influence,Method,reference_gene
0,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000095917
1,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000116176
2,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000172236
3,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000197253
4,WBGene00004481,rps-12,Caenorhabditis elegans,increase,anti,Post-developmental RNA interference,ENSG00000112306
5,WBGene00004789,sgk-1,Caenorhabditis elegans,increase,anti,Deletion,ENSG00000101049
6,WBGene00004789,sgk-1,Caenorhabditis elegans,increase,anti,Deletion,ENSG00000104205
7,WBGene00003476,mtm-3,Caenorhabditis elegans,decrease,pro,RNA interference,ENSG00000087053
8,WBGene00003476,mtm-3,Caenorhabditis elegans,decrease,pro,RNA interference,ENSG00000063601
9,WBGene00003476,mtm-3,Caenorhabditis elegans,decrease,pro,RNA interference,ENSG00000003987


In [133]:
pro_genage = pro_all_ext_7_5.merge(genage_humanized, on="reference_gene")
pro_genage

Unnamed: 0,reference_gene,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge,Ensembl,Gene Symbol,Organism,Lifespan Effect,Longevity Influence,Method
0,ENSG00000134308,YWHAQ,1079.746,0.438,,False,,2245.225,0.353,,both,WBGene00001502,ftt-2,Caenorhabditis elegans,decrease,pro,RNA interference
1,ENSG00000134308,YWHAQ,1079.746,0.438,,False,,2245.225,0.353,,both,WBGene00001502,ftt-2,Caenorhabditis elegans,increase,pro,Overexpression
2,ENSG00000134308,YWHAQ,1079.746,0.438,,False,,2245.225,0.353,,both,WBGene00003920,par-5,Caenorhabditis elegans,increase,pro,Overexpression
3,ENSG00000134308,YWHAQ,1079.746,0.438,,False,,2245.225,0.353,,both,WBGene00003920,par-5,Caenorhabditis elegans,decrease,pro,RNA interference
4,ENSG00000162994,CLHC1,,,,,,13795.645,0.63,,right_only,WBGene00011867,chc-1,Caenorhabditis elegans,decrease,pro,RNA interference


In [134]:
anti_genage = anti_all_ext_7_5.merge(genage_humanized, on="reference_gene")
anti_genage

Unnamed: 0,reference_gene,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau_x,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history_kendal_tau_y,_merge,Ensembl,Gene Symbol,Organism,Lifespan Effect,Longevity Influence,Method
0,ENSG00000171132,PRKCE,,,,,,1054.806,-0.536,,right_only,WBGene00006599,tpa-1,Caenorhabditis elegans,increase,anti,RNA interference


In [101]:
genage_human = pd.read_csv(genage_dir / "genage_human.csv")
genage_human.shape

(307, 6)

In [139]:
pro_human_genage = genage_human.merge(pro_all_ext_7_5, on="symbol")
pro_human_genage

Unnamed: 0,GenAge ID,symbol,name,entrez gene id,uniprot,why,reference_gene,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge
0,50,FOS,FBJ murine osteosarcoma viral oncogene homolog,2353,FOS_HUMAN,putative,ENSG00000170345,,,,,,2475.703,0.495,,right_only
1,211,SUMO1,small ubiquitin-like modifier 1,7341,SUMO1_HUMAN,upstream,ENSG00000116030,1250.186,0.62,"gestation_days (0.7219), temperature_celsius (...",False,,13977.012,0.582,,both


In [140]:
anti_human_genage = genage_human.merge(anti_all_ext_7_5, on="symbol")
anti_human_genage

Unnamed: 0,GenAge ID,symbol,name,entrez gene id,uniprot,why,reference_gene,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau_x,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history_kendal_tau_y,_merge
0,113,PCNA,proliferating cell nuclear antigen,5111,PCNA_HUMAN,functional,ENSG00000132646,319.946,-0.439,,False,Brain (0.3307),,,,left_only


In [141]:
pro_genage.to_csv(intersections / "genage_pro_model_intersections.tsv", sep = "\t", index = False)
anti_genage.to_csv(intersections / "genage_anti_model_intersections.tsv", sep = "\t", index = False)
pro_human_genage.to_csv(intersections / "genage_pro_human_intersections.tsv", sep = "\t", index = False)
anti_human_genage.to_csv(intersections / "genage_anti_human_intersections.tsv", sep = "\t", index = False)