In [13]:
import pandas as pd
import numpy as np
import scipy.stats as st
import seaborn as sns
import sys
import os
import gseapy as gp
import matplotlib.pyplot as plt
import swan_vis as swan
import yaml
from snakemake.io import expand
import itertools

p = os.path.dirname(os.path.dirname(os.getcwd()))
sys.path.append(p)

from proc_revisions.utils import *
from proc_revisions.plotting import *

In [29]:
od = '../../proc_revisions/'
config_file = f'{od}/config.yml'
with open(config_file) as f:
    config = yaml.safe_load(f)

In [30]:
ab = od+expand(config['lr']['talon']['fusion_fix']['ab'], species='human')[0]
filt_ab = od+expand(config['lr']['cerberus']['filt_ab'], species='human')[0]
read_annot = od+expand(config['lr']['talon']['full_annot'], species='human')[0]
t_metadata = od+expand(config['ref']['cerberus']['new_gtf_t_info'], species='human')[0]
lib_meta = od+expand(config['lr']['meta'], species='human')[0]
swan_file = od+expand(config['lr']['swan']['sg'], species='human')[0]
cerberus_h5 = od+expand(config['lr']['cerberus']['ca_triplets'], species='human', obs_col='sample')[0]
cerb_t_metadata = od+expand(config['lr']['cerberus']['gtf_t_info'], species='human')[0]
major_isos = od+expand(config['lr']['analysis']['major_isos'], species='human', obs_col='sample')[0]
pi_tpm_table = od+expand(config['lr']['mane']['pi_tpm']['triplet'], species='human', obs_col='sample')[0]
pp_summary = od+expand(config['lr']['protein_pred']['summary'], species='human')[0]

ref_t_metadata = od+expand(config['ref']['new_gtf_t_info'], species='human')[0]
ref_g_metadata = od+expand(config['ref']['new_gtf_g_info'], species='human')[0]

sr_ab = '../'+config['sr']['ab']


ver = 'v40_cerberus'
min_tpm = 1
gene_subset = 'protein_coding'
obs_col = 'sample'
go_gene_subset = 'protein_coding'

m_ab = od+expand(config['lr']['talon']['fusion_fix']['ab'], species='mouse')[0]
m_filt_ab = od+expand(config['lr']['cerberus']['filt_ab'], species='mouse')[0]
m_read_annot = od+expand(config['lr']['talon']['full_annot'], species='mouse')[0]
m_t_metadata = od+expand(config['ref']['cerberus']['new_gtf_t_info'], species='mouse')[0]
m_lib_meta = od+expand(config['lr']['meta'], species='mouse')[0]
m_swan_file = od+expand(config['lr']['swan']['sg'], species='mouse')[0]
m_cerberus_h5 = od+expand(config['lr']['cerberus']['ca_triplets'], species='mouse', obs_col='sample')[0]
cerb_t_metadata = od+expand(config['lr']['cerberus']['gtf_t_info'], species='mouse')[0]
m_major_isos = od+expand(config['lr']['analysis']['major_isos'], species='mouse', obs_col='sample')[0]
m_pi_tpm_table = od+expand(config['lr']['mane']['pi_tpm']['triplet'], species='mouse', obs_col='sample')[0]
m_pp_summary = od+expand(config['lr']['protein_pred']['summary'], species='mouse')[0]


m_ref_t_metadata = od+expand(config['ref']['new_gtf_t_info'], species='mouse')[0]
m_ref_g_metadata = od+expand(config['ref']['new_gtf_g_info'], species='mouse')[0]


orth_table = '../../proc_revisions/ref/biomart_human_to_mouse.tsv'
mouse_ver = 'vM25_cerberus'

In [31]:
sg = swan.read(m_swan_file)

Read in graph from ../../proc_revisions/data/mouse/lr/swan/swan_graph.p


In [32]:
def get_tc_mouse_samples(config):
    m_lib_meta = expand(config['lr']['meta'], species='mouse')[0]
    df = pd.read_csv(m_lib_meta, sep='\t')
    tc_tissues = ['muscle', 'hippocampus', 'cortex', 'adrenal gland', 'heart']
    tc_times = ['18-20mo', '2mo', '4d', '25d', '14d', '36d', '10d']
    meta.head()
    s = meta.loc[(meta.general_tissue_cell_type.isin(tc_tissues))&\
             (meta.age.isin(tc_times)), 'sample'].unique().tolist()
    return s

def get_du_tc_cfg_entries():
    """
    Get the cfg entries for running du tests
    """
    obs_col = 'sample'
    feats = ['tss', 'tes', 'ic', 'iso']
    combos = [c for c in itertools.combinations(s, 2) if c[0].split('_')[0]==c[1].split('_')[0]]
    obs_cond1 = [c[0] for c in combos]
    obs_cond2 = [c[1] for c in combos]
    files = expand(expand(config['lr']['analysis']['du'],
                  zip,
                  obs_cond1=obs_cond1,
                  obs_cond2=obs_cond2,
                  allow_missing=True),
                  obs_col='sample',
                  species='mouse',
                  feat=feats)
    return files

In [33]:
get_du_tc_cfg_entries()

['data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_25d_sample_du_tss.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_25d_sample_du_tes.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_25d_sample_du_ic.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_25d_sample_du_iso.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_4d_sample_du_tss.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_4d_sample_du_tes.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_4d_sample_du_ic.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_4d_sample_du_iso.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_14d_sample_du_tss.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_14d_sample_du_tes.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_14d_sample_du_ic.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_14d_sample_du_iso.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_2mo_sample_du_tss.tsv',
 'data/mouse/lr/du/gastroc_18-20mo_vs_gastroc_2mo_sample_du_tes.tsv',
 'data/mouse/lr/du/gastroc_

In [11]:
def get_tc_mouse_samples():
    m_lib_meta = od+expand(config['lr']['meta'], species='mouse')[0]
    df = pd.read_csv(m_lib_meta, sep='\t')
    tc_tissues = ['muscle', 'hippocampus', 'cortex', 'adrenal gland', 'heart']
    tc_times = ['18-20mo', '2mo', '4d', '25d', '14d', '36d', '10d']
    meta.head()
    s = meta.loc[(meta.general_tissue_cell_type.isin(tc_tissues))&\
             (meta.age.isin(tc_times)), 'sample'].unique().tolist()
    return s

# meta = sg.adata.obs.copy(deep=True)
# meta.head()
# tc_tissues = ['muscle', 'hippocampus', 'cortex', 'adrenal gland', 'heart']
# tc_times = ['18-20mo', '2mo', '4d', '25d', '14d', '36d', '10d']
# meta.head()
# s = meta.loc[(meta.general_tissue_cell_type.isin(tc_tissues))&\
#          (meta.age.isin(tc_times))]
s = get_tc_mouse_samples()
print(s)
    

['gastroc_18-20mo', 'adrenal_2mo', 'adrenal_18-20mo', 'adrenal_4d', 'gastroc_25d', 'gastroc_4d', 'adrenal_25d', 'adrenal_14d', 'heart_18-20mo', 'gastroc_14d', 'gastroc_2mo', 'adrenal_36d', 'gastroc_10d', 'gastroc_36d', 'hippocampus_2mo', 'hippocampus_10d', 'hippocampus_18-20mo', 'cortex_2mo', 'heart_14d', 'cortex_14d', 'heart_2mo', 'hippocampus_14d', 'cortex_18-20mo', 'adrenal_10d']


In [15]:
combos = [c for c in itertools.combinations(s, 2) if c[0].split('_')[0]==c[1].split('_')[0]]
combos
# obs_cond1 = [c[0] for c in combos]
# obs_cond2 = [c[1] for c in combos]

[('gastroc_18-20mo', 'gastroc_25d'),
 ('gastroc_18-20mo', 'gastroc_4d'),
 ('gastroc_18-20mo', 'gastroc_14d'),
 ('gastroc_18-20mo', 'gastroc_2mo'),
 ('gastroc_18-20mo', 'gastroc_10d'),
 ('gastroc_18-20mo', 'gastroc_36d'),
 ('adrenal_2mo', 'adrenal_18-20mo'),
 ('adrenal_2mo', 'adrenal_4d'),
 ('adrenal_2mo', 'adrenal_25d'),
 ('adrenal_2mo', 'adrenal_14d'),
 ('adrenal_2mo', 'adrenal_36d'),
 ('adrenal_2mo', 'adrenal_10d'),
 ('adrenal_18-20mo', 'adrenal_4d'),
 ('adrenal_18-20mo', 'adrenal_25d'),
 ('adrenal_18-20mo', 'adrenal_14d'),
 ('adrenal_18-20mo', 'adrenal_36d'),
 ('adrenal_18-20mo', 'adrenal_10d'),
 ('adrenal_4d', 'adrenal_25d'),
 ('adrenal_4d', 'adrenal_14d'),
 ('adrenal_4d', 'adrenal_36d'),
 ('adrenal_4d', 'adrenal_10d'),
 ('gastroc_25d', 'gastroc_4d'),
 ('gastroc_25d', 'gastroc_14d'),
 ('gastroc_25d', 'gastroc_2mo'),
 ('gastroc_25d', 'gastroc_10d'),
 ('gastroc_25d', 'gastroc_36d'),
 ('gastroc_4d', 'gastroc_14d'),
 ('gastroc_4d', 'gastroc_2mo'),
 ('gastroc_4d', 'gastroc_10d'),
 ('gas