## Imports

In [11]:
import pandas as pd
from pathlib import Path
from rodent_loss_src.doapr import read_and_filter_doapr

## Input

In [12]:
PROJECTDIR = Path('.').resolve().parents[2]

lost_proteins_path = '../data/symbol_filterlist_05.list'
mirtarbase_path = f'{PROJECTDIR}/analyses/compensation_integration/results/lost_human_targets_strongMTIonly_True.tsv'
doapr_path = f'{PROJECTDIR}/milestones/data/doapr/sigdown_and_predictedTargets_tsM_tsH.tsv'

lostfampath = f'{PROJECTDIR}/external_data/all_rodentlost_mirfams.list'

## Load data


In [19]:
def read_list(path):
    with open(path) as fh:
        return [line.strip() for line in fh if line]
    
    
def human_primary_targets(path):
    col = []
    for mirna in ['mir197', 'mir769']:
        tdf = read_and_filter_doapr(path, 'human', mirna)
        tdf['mirna'] = mirna.replace('mir', 'Mir-')
        col.append(tdf)
    return pd.concat(col)
    
    
    
lost_proteins = read_list(lost_proteins_path)
mirtar_targets = pd.read_csv(mirtarbase_path, sep='\t')
doapr = human_primary_targets(doapr_path)
display(mirtar_targets.head())
display(doapr.head())


Unnamed: 0,miRTarBase ID,miRNA,Species (miRNA),Target Gene,Target Gene (Entrez ID),Species (Target Gene),Experiments,Support Type,References (PMID),mirbase_fam,mirgene_fam
0,MIRT000473,hsa-miR-1296-5p,Homo sapiens,MCM2,4171,Homo sapiens,qRT-PCR//Western blot,Functional MTI,20332239,mir-1296,Mir-1296
1,MIRT001979,hsa-miR-510-5p,Homo sapiens,HTR3E,285242,Homo sapiens,Luciferase reporter assay,Functional MTI,18614545,mir-510,Mir-506
2,MIRT003549,hsa-miR-489-3p,Homo sapiens,PTPN11,5781,Homo sapiens,Luciferase reporter assay//Western blot,Functional MTI,20700123,mir-489,Mir-489
3,MIRT003549,hsa-miR-489-3p,Homo sapiens,PTPN11,5781,Homo sapiens,Luciferase reporter assay,Functional MTI,26918448,mir-489,Mir-489
4,MIRT004198,hsa-miR-197-3p,Homo sapiens,FOXO3,2309,Homo sapiens,Microarray//qRT-PCR//Western blot,Functional MTI,26540468,mir-197,Mir-197


Unnamed: 0_level_0,TShuman_tc++s,mirna
gene,Unnamed: 1_level_1,Unnamed: 2_level_1
TTPAL,-0.79,Mir-197
PARP2,-0.49,Mir-197
STK38L,-0.328,Mir-197
ATP6V1A,-0.415,Mir-197
MAPK8,-0.32,Mir-197


## MiRTarBase targets of the 15 lost miRNAs in the set of 127 lost protein-coding genes

In [32]:
lost_mirtar_targets = mirtar_targets[mirtar_targets['Target Gene'].isin(lost_proteins)]
display(lost_mirtar_targets)


Unnamed: 0,miRTarBase ID,miRNA,Species (miRNA),Target Gene,Target Gene (Entrez ID),Species (Target Gene),Experiments,Support Type,References (PMID),mirbase_fam,mirgene_fam


## Primary targets in set

In [42]:
overlap = set(doapr.index.values).intersection(set(['lost_proteins']))
print(overlap)

set()


In [35]:
display(lost_proteins)

['COA1',
 'STK17A',
 'MAGEF1',
 'IL26',
 'RBM23',
 'ZNF304',
 'ZNF74',
 'PXT1',
 'OR2S2',
 'SPOCD1',
 'RNF175',
 'RPA4',
 'USP26',
 'TRIM4',
 'CALML6',
 'CSKMT',
 'ADIRF',
 'STX10',
 'ZNF548',
 'CCDC102B',
 'ZNF256',
 'MCCD1',
 'HSD11B1L',
 'CLECL1',
 'TRIM52',
 'HTR1E',
 'ZNF613',
 'POM121L2',
 'HTR3C',
 'SCD5',
 'GRK7',
 'RAX2',
 'ZNF792',
 'CD1B',
 'APRG1',
 'ADGRE3',
 'TSPAN19',
 'DIRAS3',
 'ACTL8',
 'ZACN',
 'SLC2A4RG',
 'HERC5',
 'CAPN14',
 'S100A12',
 'MCF2L2',
 'KRBA2',
 'LUZP4',
 'GJA9',
 'NBPF6',
 'ZNF599',
 'ZNF181',
 'ZNF621',
 'SLC25A2',
 'RBP5',
 'P2RY11',
 'RHEX',
 'MCHR2',
 'GYG2',
 'PI3',
 'PRR13',
 'TTC31',
 'DEFB127',
 'ZNF75D',
 'ZNF891',
 'BEX5',
 'ZNF684',
 'ZNF175',
 'DUXA',
 'ZNF214',
 'MAP1LC3C',
 'TCEAL4',
 'GSTA4',
 'CCL14',
 'ZFP57',
 'TCN1',
 'ZFP42',
 'BCAS4',
 'CXCL8',
 'HBM',
 'ZNF350',
 'TCEAL2',
 'ZNF713',
 'ZNF200',
 'ADA2',
 'ZNF620',
 'ZNF576',
 'RTP5',
 'PAAF1',
 'ZBED8',
 'SLC2A11',
 'CCNP',
 'IFIT5',
 'NAT16',
 'ZNF75A',
 'NBPF4',
 'ZNF79',
 'PEA