In [4]:
import pandas as pd
import numpy as np
import scipy.stats as st
import seaborn as sns
import sys
import os
import gseapy as gp
import pdb
import copy
from scipy import sparse
import anndata
import cerberus

p = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
sys.path.append(p)

from scripts.utils import *
from scripts.plotting import *

In [5]:
h5 = '../cerberus_annot.h5'
h5_annot = 'cerberus_annot_triplets.h5'
ab = '../../talon/human_talon_abundance.tsv'
filt_ab = '../cerberus_filtered_abundance.tsv'
obs_col = 'sample'
min_tpm = 1
major_set = '../swan/isos_sample_gene_90.tsv'
mm_tissues = ['adrenal gland', 'heart',
              'muscle', 'brain', 'pgp1_excite_neuron',
              'pgp1_astro', 'h9_osteocyte',
              'h1', 'wtc11']

## Compare centroid of all obs protein coding genes to those in each tissue

In [6]:
ca = cerberus.read(h5_annot)

In [11]:
# add biotype to subset on 
gene_df, _, _ = get_gtf_info(how='gene', ver='v40_cerberus', add_stable_gid=True)
gene_df = gene_df[['gid_stable', 'biotype_category']]
gene_df.rename({'gid_stable': 'gid'}, axis=1, inplace=True)
ca.triplets = ca.triplets.merge(gene_df, how='left', on='gid')

In [12]:
def compute_centroid(ca, gene=None, subset=None):
    """ 
    Compute the centroid of simplex coordinates for a given set of genes / triplets
    
    Parameters:
        gene (str): Gene ID or name
        subset (dict of str): Subset
    """
    
    df = ca.triplets.copy(deep=True)
    
    if gene:
        df, gene = cerberus.subset_df_on_gene(df, gene)

    # if we have a list of allowed sources, limit to those entries
    if subset:
        df = cerberus.subset_df(df, subset)
    
    df = cerberus.compute_simplex_coords(df, 'splicing_ratio')
    
    df = df[['tss_ratio', 'spl_ratio', 'tes_ratio']]
    centroid = df.mean().tolist()
    
    return centroid

In [13]:
c = compute_centroid(ca, subset = {'source': 'obs_det', 'biotype_category': 'protein_coding'})

[0.34586555392142465, 0.25119837661919536, 0.40293606945938]