In [2]:
from typing import *
from yspecies import *
from yspecies.enums import *
from yspecies.dataset import *
from yspecies.misc import *

In [3]:
from dataclasses import dataclass
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
#settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import pprint
pp = pprint.PrettyPrinter(indent=4)

### Parameters ###
Will be overriden by comparisons.yaml

In [5]:
#Parameters
threshold = 0.3


### Load pathes ###

In [6]:
from pathlib import Path
locations: Locations = Locations("./") if Path("./data").exists() else Locations("../")

In [7]:
external = shap_results = locations.output.external 
intersections = locations.output.intersections
shap = external.shap
linear = external.linear
causality = external.dir / "causality"

### Load linear ###

In [7]:
def lag_linear(df: pd.DataFrame, sign: int)->pd.DataFrame:
    selected = df[df["maxlifespan_sign"]==sign][["symbol","organ","maxlifespan_adjpval","maxlifespan_r2_adj"]].sort_values(by="maxlifespan_r2_adj", ascending=False).drop_duplicates()
    selected["organ"] = selected.apply(lambda row: row["organ"] + " (" + str(round(row["maxlifespan_r2_adj"],4)) +")", axis=1)
    return selected[["symbol","organ"]].groupby("symbol", as_index=False).agg({'organ': ', '.join})


def pro_linear(df: pd.DataFrame)->pd.DataFrame:
    return lag_linear(df, 1)
   
def anti_linear(df: pd.DataFrame)->pd.DataFrame:
    return lag_linear(df, -1)


In [8]:
linear_organ = load_table(linear / 'with_organ_indicator_columns.tsv').set_index("reference_gene")
anti_linear_organ = anti_linear(linear_organ)
pro_linear_organ = pro_linear(linear_organ)
pro_linear_organ.head(10)


Unnamed: 0,symbol,organ
0,A4GALT,"Lung (0.6232), Heart (0.5311)"
1,AAAS,Liver (0.3573)
2,AASS,"Lung (0.4628), Heart (0.322)"
3,ABCB10,Lung (0.4792)
4,ABCC10,Lung (0.4181)
5,ABHD3,Lung (0.5483)
6,ABLIM3,Liver (0.5392)
7,AC129492.1,Liver (0.3644)
8,ACADSB,Lung (0.3026)
9,ACAP1,"Brain (0.4236), Heart (0.3024)"


In [9]:
#linear_blk = load_table(linear /'Significant in B,L,K.tsv').set_index("reference_gene")
#linear_models = load_table(linear / 'linear_models_on_species_vars.tsv').set_index("reference_gene")
#linear_all = load_table(linear / 'Significant in all organs.tsv').set_index("reference_gene")
#linear_pro = linear_all[linear_all["maxlifespan_sign"]==1].sort_values(by="maxlifespan_r2_adj", ascending = False)
#linear_anti = linear_all[linear_all["maxlifespan_sign"]==-1].sort_values(by="maxlifespan_r2_adj", ascending = True)

### Load shap ###

In [90]:
def pro_genes(df: pd.DataFrame, threshold:float = 0.3) -> pd.DataFrame:
    return df[df["kendall_tau_to_max_lifespan"] >= threshold].sort_values(by="kendall_tau_to_max_lifespan", ascending=False)

def lifehistory(row, extra: list, threshold: float):
    def get(col: str):
        return col +" (" + str(round(row[col], 4))+")" if abs(row[col])>=abs(threshold) else ""
    return ', '.join([get(e).replace("kendall_tau_to_", "") for e in extra if get(e) != ""])

def with_lifehistory(genes: pd.DataFrame):
    main = ['ids','name', 'gain_score_to_max_lifespan', 'kendall_tau_to_max_lifespan']
    extra = ["kendall_tau_to_gestation_days", "kendall_tau_to_mass_g", "kendall_tau_to_temperature_celsius", "kendall_tau_to_metabolic_rate"]
    selected_genes = genes[main].rename(columns = {"ids": "reference_gene", "name": "symbol"})
    selected_genes["life_history"] = genes.apply(lambda row: lifehistory(row, extra, threshold), axis=1)
    return selected_genes

def pro(df: pd.DataFrame, threshold:float = 0.3) -> pd.DataFrame:
    return with_lifehistory(pro_genes(df, threshold))    
    
def anti_genes(df: pd.DataFrame, threshold:float = -0.3) -> pd.DataFrame:
    return df[df["kendall_tau_to_max_lifespan"]<=threshold].sort_values(by="kendall_tau_to_max_lifespan", ascending=True)

def anti(df: pd.DataFrame, threshold:float = -0.3) -> pd.DataFrame:
    return with_lifehistory(anti_genes(df, threshold))        

In [124]:
anton_5_5 = pd.read_csv(shap / "5_tissues_anton_species_5_bootstraps.csv", index_col=0)
#anton_7_4 = pd.read_csv(shap / "7_tissues_anton_species_4_bootstraps.csv", index_col=0)
anton_5_5_updated  = pd.read_csv(shap / "5_tissues_species_validation.csv")
anton_7_5_updated  = pd.read_csv(shap / "7_tissues_species_validation.csv")

current = anton_5_5_updated
current_7 = anton_7_5_updated
pro(current, threshold)

Unnamed: 0,reference_gene,symbol,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history
28,ENSG00000138382,METTL5,567.972,0.719,
62,ENSG00000166436,TRIM66,1093.345,0.696,
0,ENSG00000160323,ADAMTS13,1240.185,0.659,
18,ENSG00000129187,DCTD,71669.584,0.656,gestation_days (0.7374)
37,ENSG00000069275,NUCKS1,577.205,0.65,
58,ENSG00000006282,SPATA20,93405.346,0.636,gestation_days (0.6903)
3,ENSG00000157343,ARMC12,19732.875,0.628,gestation_days (0.6295)
14,ENSG00000113946,CLDN16,12825.009,0.616,gestation_days (0.5818)
33,ENSG00000168060,NAALADL1,12657.496,0.614,
52,ENSG00000107551,RASSF4,16217.082,0.6,


In [125]:
anti(current, -threshold)

Unnamed: 0,reference_gene,symbol,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history
65,ENSG00000107815,TWNK,19014.891,-0.715,
6,ENSG00000089234,BRAP,1359.8,-0.701,
8,ENSG00000198663,C6orf89,15864.829,-0.681,gestation_days (-0.7211)
31,ENSG00000214827,MTCP1,9390.777,-0.603,
56,ENSG00000149577,SIDT2,12894.13,-0.534,
26,ENSG00000152580,IGSF10,4300.925,-0.517,
9,ENSG00000164879,CA3,18519.766,-0.499,


# Causality #

### Intesection ###

In [126]:
causal_anton_5_5 = pd.read_csv(causality / "causal_relations_5_tissues_new_set_Rodrigo.csv")
causal_current = causal_anton_5_5 
fathers = (causal_current[causal_current["predicate"] == "is father of"])[["node1"]].drop_duplicates().rename(columns={"node1": "symbol"})
fathers

Unnamed: 0,symbol
0,SPATA20
2,NUCKS1
5,BRAP
10,RASSF4
11,TWNK
17,DCTD
23,PDE6B
29,METTL5
34,IGSF10
37,ARMC12


In [127]:
pro_causal_merge = pro(current).merge(fathers, on="symbol", how="left", indicator = True).sort_values(by="_merge", ascending=False)
pro_causal_merge["_merge"] = pro_causal_merge["_merge"]=="both"
pro_causal = pro_causal_merge \
  .rename(columns={"_merge": "causal"}) \
  .set_index("reference_gene") \
  .sort_values(by="kendall_tau_to_max_lifespan", ascending=False)
pro_causal

Unnamed: 0_level_0,symbol,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,causal
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000138382,METTL5,567.972,0.719,,True
ENSG00000166436,TRIM66,1093.345,0.696,,True
ENSG00000160323,ADAMTS13,1240.185,0.659,,False
ENSG00000129187,DCTD,71669.584,0.656,gestation_days (0.7374),True
ENSG00000069275,NUCKS1,577.205,0.65,,True
ENSG00000006282,SPATA20,93405.346,0.636,gestation_days (0.6903),True
ENSG00000157343,ARMC12,19732.875,0.628,gestation_days (0.6295),True
ENSG00000113946,CLDN16,12825.009,0.616,gestation_days (0.5818),False
ENSG00000168060,NAALADL1,12657.496,0.614,,True
ENSG00000107551,RASSF4,16217.082,0.6,,True


In [128]:
anti_causal_merge = anti(current).merge(fathers, on="symbol", how="left", indicator = True).sort_values(by="_merge", ascending=False)
anti_causal_merge["_merge"] = anti_causal_merge["_merge"]=="both"
anti_causal = anti_causal_merge \
  .rename(columns={"_merge": "causal"}) \
  .set_index("reference_gene") \
  .sort_values(by="kendall_tau_to_max_lifespan", ascending=False)
anti_causal

Unnamed: 0_level_0,symbol,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,causal
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ENSG00000164879,CA3,18519.766,-0.499,,False
ENSG00000152580,IGSF10,4300.925,-0.517,,True
ENSG00000149577,SIDT2,12894.13,-0.534,,False
ENSG00000214827,MTCP1,9390.777,-0.603,,True
ENSG00000198663,C6orf89,15864.829,-0.681,gestation_days (-0.7211),True
ENSG00000089234,BRAP,1359.8,-0.701,,True
ENSG00000107815,TWNK,19014.891,-0.715,,True


# comparison with old results #

In [129]:
old =  pro(anton_5_5).rename(columns={"gain_score_to_max_lifespan": "gain_score_to_max_lifespan_OLD", "kendall_tau_to_max_lifespan": "kendall_tau_to_max_lifespan_OLD", "life_history": "life_history_OLD"}) 
novel = pro_causal.reset_index()
new_old_comparison = novel.merge(old, on=["reference_gene", "symbol"], how="outer", indicator = True) \
  .sort_values(by="_merge", ascending = False)
new_old_comparison["_merge"] = new_old_comparison["_merge"].replace({"right_only": "old_results", "left_only": "new_results"})
new_old_comparison

Unnamed: 0,reference_gene,symbol,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,causal,gain_score_to_max_lifespan_OLD,kendall_tau_to_max_lifespan_OLD,life_history_OLD,_merge
10,ENSG00000133256,PDE6B,133800.141,0.545,gestation_days (0.6518),True,25037.261,0.515,,both
2,ENSG00000160323,ADAMTS13,1240.185,0.659,,False,4504.608,0.693,,both
3,ENSG00000129187,DCTD,71669.584,0.656,gestation_days (0.7374),True,83059.823,0.64,gestation_days (0.7391),both
4,ENSG00000069275,NUCKS1,577.205,0.65,,True,2750.898,0.715,gestation_days (0.5525),both
5,ENSG00000006282,SPATA20,93405.346,0.636,gestation_days (0.6903),True,67496.807,0.593,"gestation_days (0.6627), mass_g (0.5261), meta...",both
6,ENSG00000157343,ARMC12,19732.875,0.628,gestation_days (0.6295),True,14611.846,0.637,gestation_days (0.6541),both
8,ENSG00000168060,NAALADL1,12657.496,0.614,,True,4438.045,0.58,,both
9,ENSG00000107551,RASSF4,16217.082,0.6,,True,39677.401,0.542,,both
12,ENSG00000170417,TMEM182,3185.571,0.535,,True,623.907,0.573,,both
1,ENSG00000166436,TRIM66,1093.345,0.696,,True,398.831,0.533,,both


In [130]:
new_old_comparison.to_csv(intersections / "new_old_comparison.tsv", sep = "\t", index = False)

# Join with linear models #

In [132]:
def shap_with_linear(shap_causal: pd.DataFrame, linear_organ: pd.DataFrame):
  return shap_causal.reset_index().merge(linear_organ,on="symbol", how="left") \
    .rename(columns={"organ": "organs (r^2) in linear models", "life_history": "life_history_kendal_tau", "kendall_tau_to_max_lifespan": "MLS_kendall_tau", "gain_score_to_max_lifespan": "MLS_gain_score"}) \
    .sort_values(by="MLS_kendall_tau", ascending = False) \
    .set_index("reference_gene")

In [133]:
pro_all =  shap_with_linear(pro_causal, pro_linear_organ)
pro_all

Unnamed: 0_level_0,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ENSG00000138382,METTL5,567.972,0.719,,True,
ENSG00000166436,TRIM66,1093.345,0.696,,True,Liver (0.4859)
ENSG00000160323,ADAMTS13,1240.185,0.659,,False,Kidney (0.3314)
ENSG00000129187,DCTD,71669.584,0.656,gestation_days (0.7374),True,"Liver (0.6644), Brain (0.3519), Heart (0.341),..."
ENSG00000069275,NUCKS1,577.205,0.65,,True,Liver (0.4727)
ENSG00000006282,SPATA20,93405.346,0.636,gestation_days (0.6903),True,"Heart (0.6114), Lung (0.5763), Brain (0.5311),..."
ENSG00000157343,ARMC12,19732.875,0.628,gestation_days (0.6295),True,
ENSG00000113946,CLDN16,12825.009,0.616,gestation_days (0.5818),False,
ENSG00000168060,NAALADL1,12657.496,0.614,,True,Lung (0.3785)
ENSG00000107551,RASSF4,16217.082,0.6,,True,"Brain (0.5112), Kidney (0.3926), Heart (0.3676..."


In [134]:
anti_all =  shap_with_linear(anti_causal, anti_linear_organ).sort_values("MLS_kendall_tau")
anti_all

Unnamed: 0_level_0,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models
reference_gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ENSG00000107815,TWNK,19014.891,-0.715,,True,
ENSG00000089234,BRAP,1359.8,-0.701,,True,Heart (0.304)
ENSG00000198663,C6orf89,15864.829,-0.681,gestation_days (-0.7211),True,"Liver (0.3951), Kidney (0.3338)"
ENSG00000214827,MTCP1,9390.777,-0.603,,True,"Brain (0.4052), Kidney (0.3005)"
ENSG00000149577,SIDT2,12894.13,-0.534,,False,Liver (0.4537)
ENSG00000152580,IGSF10,4300.925,-0.517,,True,
ENSG00000164879,CA3,18519.766,-0.499,,False,Liver (0.4193)


In [135]:
intersections = locations.output.intersections
pro_all.to_csv(intersections / "pro_intersections.tsv", sep = "\t", index = True)
anti_all.to_csv(intersections / "anti_intersections.tsv", sep = "\t", index = True)

In [136]:
pro_all_ext = pro_all.merge(pro(current_7), on=["reference_gene", "symbol"], how="outer", indicator = True) \
  .sort_values(by="_merge", ascending=False)
pro_all_ext["_merge"] = pro_all_ext["_merge"].replace({"left_only": "five_tissues_model_only", "right_only": "seven_tissues_model_only"})
pro_all_ext.to_csv(intersections / "pro_intersections_5_and_7.tsv", sep = "\t", index = False)
pro_all_ext

Unnamed: 0,reference_gene,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge
10,ENSG00000133256,PDE6B,133800.141,0.545,gestation_days (0.6518),True,,128276.065,0.504,gestation_days (0.6405),both
2,ENSG00000160323,ADAMTS13,1240.185,0.659,,False,Kidney (0.3314),1573.559,0.743,,both
3,ENSG00000129187,DCTD,71669.584,0.656,gestation_days (0.7374),True,"Liver (0.6644), Brain (0.3519), Heart (0.341),...",65586.3,0.703,,both
4,ENSG00000069275,NUCKS1,577.205,0.65,,True,Liver (0.4727),5379.475,0.74,,both
5,ENSG00000006282,SPATA20,93405.346,0.636,gestation_days (0.6903),True,"Heart (0.6114), Lung (0.5763), Brain (0.5311),...",13535.91,0.613,gestation_days (0.6727),both
6,ENSG00000157343,ARMC12,19732.875,0.628,gestation_days (0.6295),True,,17594.637,0.709,gestation_days (0.6132),both
9,ENSG00000107551,RASSF4,16217.082,0.6,,True,"Brain (0.5112), Kidney (0.3926), Heart (0.3676...",21821.285,0.632,,both
1,ENSG00000166436,TRIM66,1093.345,0.696,,True,Liver (0.4859),360.945,0.545,,both
13,ENSG00000169189,NSMCE1,2700.554,0.465,gestation_days (0.6451),False,Liver (0.4798),13737.792,0.743,gestation_days (0.7144),both
15,ENSG00000183696,UPP1,,,,,,8118.256,0.66,,seven_tissues_model_only


In [139]:
anti_all_ext = anti_all.merge(anti(current_7), on=["reference_gene", "symbol"], how="outer", indicator = True) \
  .sort_values(by="_merge", ascending=False)
anti_all_ext["_merge"] = anti_all_ext["_merge"].replace({"left_only": "five_tissues_model_only", "right_only": "seven_tissues_model_only"})
anti_all_ext.to_csv(intersections / "anti_intersections_5_and_7.tsv", sep = "\t", index = False)
anti_all_ext

Unnamed: 0,reference_gene,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge
0,ENSG00000107815,TWNK,19014.891,-0.715,,True,,22810.264,-0.67,,both
2,ENSG00000198663,C6orf89,15864.829,-0.681,gestation_days (-0.7211),True,"Liver (0.3951), Kidney (0.3338)",26612.642,-0.782,"gestation_days (-0.7173), temperature_celsius ...",both
3,ENSG00000214827,MTCP1,9390.777,-0.603,,True,"Brain (0.4052), Kidney (0.3005)",7219.915,-0.618,,both
4,ENSG00000149577,SIDT2,12894.13,-0.534,,False,Liver (0.4537),14385.231,-0.576,,both
6,ENSG00000164879,CA3,18519.766,-0.499,,False,Liver (0.4193),19963.007,-0.513,,both
11,ENSG00000121897,LIAS,,,,,,55686.324,-0.586,,seven_tissues_model_only
15,ENSG00000204231,RXRB,,,,,,3657.395,-0.413,,seven_tissues_model_only
14,ENSG00000111832,RWDD1,,,,,,546.879,-0.452,,seven_tissues_model_only
13,ENSG00000172594,SMPDL3A,,,,,,4650.269,-0.543,,seven_tissues_model_only
12,ENSG00000159251,ACTC1,,,,,,82.838,-0.55,,seven_tissues_model_only


# Genage annotations #

In [141]:
genage_folder = locations.input.annotations.genage

genage_conversions = pd.read_csv(genage_folder.conversion, sep="\t")
genage = genage_conversions[["Ensembl","Gene Symbol", "Organism", "Lifespan Effect", "Longevity Influence", "Method"]].drop_duplicates()
genage.head(10)

Unnamed: 0,Ensembl,Gene Symbol,Organism,Lifespan Effect,Longevity Influence,Method
0,YPL174C,NIP100,Saccharomyces cerevisiae,decrease,fitness,Deletion
1,YER091C,MET6,Saccharomyces cerevisiae,decrease,fitness,Deletion
2,YDR108W,TRS85,Saccharomyces cerevisiae,decrease,fitness,Deletion
4,YMR135C,GID8,Saccharomyces cerevisiae,decrease,fitness,Deletion
6,YCR024C-A,PMP1,Saccharomyces cerevisiae,decrease,fitness,Deletion
7,YDR181C,SAS4,Saccharomyces cerevisiae,increase,anti,Deletion
8,YJL210W,PEX2,Saccharomyces cerevisiae,decrease,fitness,Deletion
9,YNL079C,TPM1,Saccharomyces cerevisiae,decrease,fitness,Deletion
10,YJR127C,RSF2,Saccharomyces cerevisiae,decrease,fitness,Deletion
11,YMR307W,GAS1,Saccharomyces cerevisiae,decrease,fitness,Deletion


In [142]:
genage["Ensembl"].drop_duplicates().shape

(2090,)

In [143]:
genage_genes = pd.read_csv(genage_folder.orthologs.dir / "all.tsv", sep="\t").rename(columns={"Homo_sapiens":"reference_gene"})
genage_genes.head(10)

Unnamed: 0,reference_gene,Caenorhabditis_elegans,Drosophila_melanogaster,Mus_musculus,Mesocricetus_auratus,Danio_rerio
0,ENSG00000242265,,,ENSMUSG00000092035,ENSMAUG00000018656,ENSDARG00000109342
1,ENSG00000139990,WBGene00011242,FBgn0250755,ENSMUSG00000049106,ENSMAUG00000004350,ENSDARG00000060320
2,ENSG00000073921,WBGene00006751,FBgn0086372,ENSMUSG00000039361,ENSMAUG00000011448,ENSDARG00000012866;ENSDARG00000014137
3,ENSG00000139687,WBGene00003020,,ENSMUSG00000022105,ENSMAUG00000020865,ENSDARG00000006782
4,ENSG00000119977,WBGene00017120,,ENSMUSG00000025008,ENSMAUG00000021808,
5,ENSG00000145592,,,,ENSMAUG00000000193,ENSDARG00000034291
6,ENSG00000242866,,,ENSMUSG00000033498,ENSMAUG00000018007,ENSDARG00000105391
7,ENSG00000135506,WBGene00018611,,ENSMUSG00000040462,ENSMAUG00000019379,ENSDARG00000020301
8,ENSG00000150687,,,ENSMUSG00000039405,ENSMAUG00000010049,
9,ENSG00000162426,,FBgn0035968;FBgn0267429,ENSMUSG00000039838,ENSMAUG00000021292,ENSDARG00000004302


In [144]:
genage_humanized = genage.head(0)
for c in genage_genes.columns[1:]:
    col = c.replace("_", " ")
    selected_genes = genage_genes[["reference_gene",c]].rename(columns = {c:"Ensembl"}).dropna()
    selected_genes["Ensembl"] = selected_genes.apply(lambda row: row["Ensembl"].split(";"),1)
    genage_org = genage[genage["Organism"]==col]
    merged = selected_genes.explode("Ensembl").merge(genage_org, on="Ensembl", how="inner")
    genage_humanized = pd.concat([genage_humanized, merged])
genage_humanized = genage_humanized.drop_duplicates()
genage_humanized.head(10)

Unnamed: 0,Ensembl,Gene Symbol,Organism,Lifespan Effect,Longevity Influence,Method,reference_gene
0,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000095917
1,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000116176
2,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000172236
3,WBGene00006619,try-1,Caenorhabditis elegans,increase,anti,RNA interference,ENSG00000197253
4,WBGene00004481,rps-12,Caenorhabditis elegans,increase,anti,Post-developmental RNA interference,ENSG00000112306
5,WBGene00004789,sgk-1,Caenorhabditis elegans,increase,anti,Deletion,ENSG00000101049
6,WBGene00004789,sgk-1,Caenorhabditis elegans,increase,anti,Deletion,ENSG00000104205
7,WBGene00003476,mtm-3,Caenorhabditis elegans,decrease,pro,RNA interference,ENSG00000087053
8,WBGene00003476,mtm-3,Caenorhabditis elegans,decrease,pro,RNA interference,ENSG00000063601
9,WBGene00003476,mtm-3,Caenorhabditis elegans,decrease,pro,RNA interference,ENSG00000003987


In [153]:
pro_genage = pro_all_ext.merge(genage_humanized, on="reference_gene")
anti_genage = anti_all_ext.merge(genage_humanized, on="reference_gene")
anti_genage.head(5)

Unnamed: 0,reference_gene,symbol,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge,Ensembl,Gene Symbol,Organism,Lifespan Effect,Longevity Influence,Method


### Add genage human info ###

In [154]:
genage_human = pd.read_csv(genage_folder.human, sep='\t')
genage_human.shape

(307, 6)

In [155]:
pro_human_genage = genage_human.merge(pro_all_ext, on="symbol")
pro_human_genage

Unnamed: 0,GenAge ID,symbol,name,entrez gene id,uniprot,why,reference_gene,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge
0,211,SUMO1,small ubiquitin-like modifier 1,7341,SUMO1_HUMAN,upstream,ENSG00000116030,,,,,,8916.589,0.538,,seven_tissues_model_only


In [156]:
anti_human_genage = genage_human.merge(anti_all_ext, on="symbol")
anti_human_genage

Unnamed: 0,GenAge ID,symbol,name,entrez gene id,uniprot,why,reference_gene,MLS_gain_score,MLS_kendall_tau,life_history_kendal_tau,causal,organs (r^2) in linear models,gain_score_to_max_lifespan,kendall_tau_to_max_lifespan,life_history,_merge


### Saving genage results ###

In [32]:
pro_genage.to_csv(intersections / "genage_pro_model_intersections.tsv", sep = "\t", index = False)
anti_genage.to_csv(intersections / "genage_anti_model_intersections.tsv", sep = "\t", index = False)
pro_human_genage.to_csv(intersections / "genage_pro_human_intersections.tsv", sep = "\t", index = False)
anti_human_genage.to_csv(intersections / "genage_anti_human_intersections.tsv", sep = "\t", index = False)