In [43]:
'''
Target list source: https://www.yeastgenome.org/locus/S000004466/regulation
Goal: Take the list of targets for ScYap1 and indentify those with oxidative stress annoations, 
then use the identified genes to compare with C.glabrata targets
'''

import pandas as pd

targ = pd.read_csv('../data/yeast_genome_cerevisiae_YAP1_targets.txt', 
                   delimiter='\t', skiprows=8)

targ_anno = targ.loc[:, ['Target Systematic Name', 'Happens During', 'Happens During.1']]

keyword = 'ox'  # Find this phrase in any annotations
ox_targ = []  # genes in target list related to oxidative stress
for index, row in targ_anno.iterrows():
    if (keyword in str(row['Happens During'])) or (keyword in str(row['Happens During.1'])):
        ox_targ.append(row['Target Systematic Name'])

sc_yap1_targets = targ_anno[targ_anno.loc[:, 'Target Systematic Name'].isin(ox_targ)]
sc_yap1_targets = sc_yap1_targets.loc[:, 'Target Systematic Name'].tolist()
print(len(sc_yap1_targets), 'targets found for S.cerevisiae')

109 targets found for S.cerevisiae


In [57]:
'''
Relational list source: http://www.candidagenome.org/download/homology/orthologs/
Goal: Take the relational list of orthologues across different yeast species and find which
S.cerevisiae genes correspond to which C.glabrata genes
'''

rel = pd.read_csv('../data/All_Species_Orthologs_from_CGOB.txt', delimiter='\t')
conversion = rel.loc[:, ['Saccharomyces cerevisiae S288C', 'Candida glabrata CBS138']]
conversion = conversion[conversion.loc[:, 'Saccharomyces cerevisiae S288C'].isin(sc_yap1_targets)]
cg_genes = conversion.loc[:, 'Candida glabrata CBS138'].tolist()
cg_genes = [gene for gene in cg_genes if not type(gene) == float]

print(len(cg_genes), 'targets have orthologs in C.glabrata')
print(len(sc_yap1_targets) - len(cg_genes), 'targets in S.cerevisiae had no ortholog in C.glabrata')


70 targets have orthologs in C.glabrata
39 targets in S.cerevisiae had no ortholog in C.glabrata


In [68]:
'''
Goal: Compare the S.cerevisiae targets with C.glabrata orthologs to C.glabrata targets from
meta-analysis
'''

cg_targs = pd.read_csv('../data/glabrata_target_genes.txt').iloc[:, 0].tolist()
print(len(cg_targs), 'targets for C.glabrata based on meta-analysis')

overlap_targs = [gene for gene in cg_genes if gene in cg_targs]
print(len(overlap_targs), 'targets overlapped between sets')
print(overlap_targs)

206 targets for C.glabrata based on meta-analysis
3 targets overlapped between sets
['CAGL0F05137g', 'CAGL0L04664g', 'CAGL0G01540g']
