## Imports

In [10]:
import pandas as pd
from pathlib import Path
from rodent_loss_src.rnaseq import parse_rnaseq, read_oma

## Input

In [11]:
PROJECTDIR = Path('.').resolve().parents[2]

lost_proteins_path = '../data/symbol_filterlist_05.list'
mirtarbase_path = f'{PROJECTDIR}/analyses/compensation_integration/results/lost_human_targets_strongMTIonly_True.tsv'
doapr_path = f'{PROJECTDIR}/milestones/data/doapr/sigdown_and_predictedTargets_tsM_tsH.tsv'

rnaseq_path = '{}/milestones/data/rnaseq/human_{}_differential_genes.tsv'
omapath = f'{PROJECTDIR}/external_data/human_mouse_omapairwise.txt'

MIRNAS = ['mir197', 'mir769']

## Load data

In [16]:
def read_human_doapr(path, targetscan_cutoff=-0.2):
    df = pd.read_csv(path, sep='\t')
    df.mirna = df.mirna.apply(lambda x: x.replace('Mir-197-3p', 'Mir-197').replace('Mir-769-5p', 'Mir-769'))
    df = df[df.mirna.isin(['Mir-197', 'Mir-769'])]
    df = df[df['organism']  == 'human']
    df = df[(df['TShuman_tc++s'] <= targetscan_cutoff)]
    return df


def read_mirtarbase_targets(path):
    df = pd.read_csv(path, sep='\t')
    df = df.rename(columns={'mirgene_fam': 'mirna'})
    df = df[df.mirna.isin(['Mir-197', 'Mir-769'])]
    
    col = {}
    for mirna in MIRNAS:
        mdf = df[df.mirna == mirna.replace('mir', 'Mir-')]
        col[mirna] = list(mdf['Target Gene'].unique())
        
    return col


def read_rnaseq_data(path):
    col = {}
    for mirna in MIRNAS:
        col[mirna] = pd.read_csv(path.format(PROJECTDIR, mirna), sep='\t')
    return col
    
mirna2mirtarbase_targets = read_mirtarbase_targets(mirtarbase_path)
mirna2rnaseq = read_rnaseq_data(rnaseq_path)
print(mirna2mirtarbase_targets)

for mirna, rnaseqdf in mirna2rnaseq.items():
    display(rnaseqdf)
    break


{'mir197': ['FOXO3', 'TUSC2', 'NSUN5', 'CD82', 'BMF', 'PMAIP1', 'MTHFD1', 'FOXJ2', 'MAPK1', 'IGFBP5', 'RAN'], 'mir769': ['GSK3B']}


Unnamed: 0,orthomap,organism,mirna,gene,log2FoldChange,pvalue,baseMean,bMctrl,bMtreat,padj
0,ENSG00000157933|ENSMUSG00000029050,human,mir197,SKI,-1.186566,6.449984e-62,847.105189,1177.457942,516.752436,1.251297e-59
1,ENSG00000151012|ENSMUSG00000027737,human,mir197,SLC7A11,1.366058,2.488821e-56,684.048915,381.960001,986.137828,2.414156e-54
2,ENSG00000086758|ENSMUSG00000025261,human,mir197,HUWE1,-0.657545,1.404866e-41,3933.364672,4814.752950,3051.976394,9.084800e-40
3,ENSG00000180694|ENSMUSG00000043252,human,mir197,TMEM64,1.240293,4.685725e-35,399.292229,237.094468,561.489991,2.272577e-33
4,ENSG00000168615|ENSMUSG00000031555,human,mir197,ADAM9,1.018874,1.513984e-33,691.420705,456.596503,926.244907,5.874256e-32
...,...,...,...,...,...,...,...,...,...,...
189,ENSG00000023697|ENSMUSG00000030225,human,mir197,DERA,-0.517819,7.564422e-03,93.242527,109.976259,76.508796,7.723673e-03
190,ENSG00000179542|ENSMUSG00000046699,human,mir197,SLITRK4,-0.526373,8.585381e-03,87.363510,103.306930,71.420089,8.720229e-03
191,ENSG00000140044|ENSMUSG00000034271,human,mir197,JDP2,0.599764,9.516980e-03,239.610237,190.304484,288.915989,9.616115e-03
192,ENSG00000075651|ENSMUSG00000027695,human,mir197,PLD1,-0.538208,1.115808e-02,89.500527,106.188377,72.812678,1.121590e-02


## Find expression of miRTarBase targets in RNAseq data

In [23]:
for mirna, rnaseqdf in mirna2rnaseq.items():
    print(mirna2mirtarbase_targets[mirna])
    overlap = rnaseqdf[rnaseqdf.gene.isin(mirna2mirtarbase_targets[mirna])]
    display(overlap)

['FOXO3', 'TUSC2', 'NSUN5', 'CD82', 'BMF', 'PMAIP1', 'MTHFD1', 'FOXJ2', 'MAPK1', 'IGFBP5', 'RAN']


Unnamed: 0,orthomap,organism,mirna,gene,log2FoldChange,pvalue,baseMean,bMctrl,bMtreat,padj


['GSK3B']


Unnamed: 0,orthomap,organism,mirna,gene,log2FoldChange,pvalue,baseMean,bMctrl,bMtreat,padj
