In [27]:
from pathlib import Path
import pandas as pd

ORGANISMS = ["human", "mouse"]
MIRNAS = ["mir197", "mir769"]
PROJECTDIR = Path('.').resolve().parents[2]

rnaseq_path = '{}/milestones/data/rnaseq/{}_{}_differential_genes.tsv'
target_path = f'{PROJECTDIR}/milestones/data/targetscan/unweighted_TargetScanHuman_human_mouse_mir197_mir769.tsv'
doapr_path = '../results/sigdown_and_predictedTargets_tsH.tsv'


## Find down-regulated genes that are also predicted as targets

* do not filter for context++ score threshold yet, this will be done in later steps

In [29]:
def filter_targets_for_condition(df, organism, mirna):
    df = df[
        (df.organism == organism) & (df.mirna == mirna)
    ]
    fdf = df.copy().drop(columns=['organism', 'mirna'])
    fdf = fdf.rename(columns={'target_gene': 'gene', 'Total context++ score': 'TShuman_tc++s'})
    if organism == 'mouse':
        fdf.gene = fdf.gene.str.capitalize()
    return fdf

def downregulated_and_predicted(target_path):
    predicted_targets = pd.read_csv(target_path, sep='\t')
    
    col = []
    for organism in ORGANISMS:
        for mirna in MIRNAS:
            condition_specific_targets = filter_targets_for_condition(predicted_targets, organism, mirna)
            # display(condition_specific_targets)
            sigdown = pd.read_csv(rnaseq_path.format(PROJECTDIR, organism, mirna), sep='\t')
            # display(sigdown)
            merged = sigdown.merge(condition_specific_targets)
            col.append(merged)
    down_and_predicted = pd.concat(col)
    return down_and_predicted
        
        
doapr = downregulated_and_predicted(target_path)
display(doapr)
doapr.to_csv(doapr_path, sep='\t', index=False)

Unnamed: 0,orthomap,organism,mirna,gene,log2FoldChange,pvalue,baseMean,bMctrl,bMtreat,padj,TShuman_tc++s
0,ENSG00000157933|ENSMUSG00000029050,human,mir197,SKI,-1.186566,6.449984e-62,847.105189,1177.457942,516.752436,1.251297e-59,-0.020
1,ENSG00000151012|ENSMUSG00000027737,human,mir197,SLC7A11,1.366058,2.488821e-56,684.048915,381.960001,986.137828,2.414156e-54,-0.010
2,ENSG00000163683|ENSMUSG00000037822,human,mir197,SMIM14,1.136504,1.427720e-29,488.205959,304.899428,671.512490,3.956825e-28,-0.240
3,ENSG00000084092|ENSMUSG00000036285,human,mir197,NOA1,-1.166855,1.209226e-21,243.011588,336.640897,149.382278,2.345899e-20,-0.054
4,ENSG00000124120|ENSMUSG00000017679,human,mir197,TTPAL,-1.198943,6.888736e-20,205.249691,286.337016,124.162366,1.214922e-18,-0.790
...,...,...,...,...,...,...,...,...,...,...,...
50,ENSG00000166924|ENSMUSG00000045348,mouse,mir769,Nyap1,-0.542431,2.847110e-04,112.589647,130.212652,89.092307,3.182065e-04,-0.020
51,ENSG00000137312|ENSMUSG00000059714,mouse,mir769,Flot1,-0.532856,3.063969e-04,115.236629,132.967070,91.596042,3.407731e-04,-0.330
52,ENSG00000095383|ENSMUSG00000039813,mouse,mir769,Tbc1d2,-0.569863,3.249519e-04,108.697999,126.533729,84.917027,3.571001e-04,-0.260
53,ENSG00000105948|ENSMUSG00000056832,mouse,mir769,Ttc26,-0.521078,6.609669e-04,129.838634,149.375598,103.789348,6.944721e-04,-0.270
