In [1]:
import pandas as pd
import numpy as np
import scipy.stats as st
import seaborn as sns
import sys
import os
import gseapy as gp
import matplotlib.pyplot as plt
import swan_vis as swan
import yaml
from snakemake.io import expand

p = os.path.dirname(os.path.dirname(os.getcwd()))
sys.path.append(p)

from scripts.utils import *
from scripts.mane_utils import *
from scripts.plotting import *

In [2]:
config_file = '../snakemake/config.yml'
with open(config_file) as f:
    config = yaml.safe_load(f)
    
    

In [3]:
swan_file = '../'+expand(config['data']['sg'], species='human')[0]


In [4]:
sg = swan.read(swan_file)

Read in graph from ../data/human/swan_graph.p


In [5]:
sg.edge_adata

AnnData object with n_obs × n_vars = 138 × 606402
    obs: 'dataset', 'total_counts'
    var: 'edge_id'
    layers: 'counts', 'tpm'

In [12]:
genes = ['BIN1', 'MAPT', 'PKM', 'TPM1']

In [38]:
# get edges associated with transcripts from given input genes
# limit to just exons
t_df = sg.t_df.loc[sg.t_df.gname.isin(genes)]
e_df = swan.pivot_path_list(t_df, 'path')
e_df = e_df.loc[e_df.edge_id.isin(sg.edge_df.loc[sg.edge_df.edge_type=='exon'].index.tolist())]
eids = e_df.edge_id.unique().tolist()
e_df = e_df.merge(sg.t_df[['gname', 'gid']],
                  how='left', 
                  left_index=True, 
                  right_index=True)
e_df.head()

Unnamed: 0_level_0,edge_id,gname,gid
tid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"ENSG00000067225[1,1,1]",298641,PKM,ENSG00000067225
"ENSG00000067225[1,1,1]",298622,PKM,ENSG00000067225
"ENSG00000067225[1,1,1]",298624,PKM,ENSG00000067225
"ENSG00000067225[1,1,1]",298626,PKM,ENSG00000067225
"ENSG00000067225[1,1,1]",298628,PKM,ENSG00000067225


In [39]:
# df of tid <-> edge id mapping to find exons 
# that are compatible with one another
fname = 'pilot_tid_eid_map.tsv'
e_df.to_csv(fname, sep='\t')

In [32]:
type(eids[0])
sg.edge_adata.var.dtypes
print(len(sg.edge_adata.var.index))
print(len(sg.edge_df.index))

606402
1023930


In [33]:
# subset edge adata by the input genes
edge_adata = sg.edge_adata.copy()
# print(len(eids))
eids = sg.edge_adata.var.loc[sg.edge_adata.var.edge_id.isin(eids)].index.tolist()
# print(len(edge_var.index))
edge_adata = edge_adata[:, eids]

In [35]:
df = swan.calc_tpm(edge_adata)

In [40]:
# tpm of each exon in each library
fname = 'pilot_exon_tpm.tsv'
df.to_csv(fname, sep='\t')

In [42]:
for g in genes:
    sg.gen_report(g,
                  f'figures/',
                  novelty=True,
                  cmap='viridis',
                  indicate_novel=True,
                  transcript_col='tname',
                  include_unexpressed=True,
                  datasets={'dataset': ['h9_neural_crest_1_1']})
    sg.gen_report(g,
                  f'figures/',
                  novelty=True,
                  cmap='magma',
                  transcript_col='tname',
                  display_numbers=True,
                  browser=True,
                  layer='pi',
                  include_unexpressed=True,
                  datasets={'dataset': ['h9_neural_crest_1_1']})

  t_df['sum'] = np.log2(t_df+1).sum(axis=1)



Plotting transcripts for ENSG00000136717
Saving transcript path graph for ENSG00000136717[1,4,1] as figures/_novel_ENSG00000136717[1,4,1]_path.png
Saving transcript path graph for ENSG00000136717[1,2,1] as figures/_novel_ENSG00000136717[1,2,1]_path.png
Saving transcript path graph for ENSG00000136717[1,3,1] as figures/_novel_ENSG00000136717[1,3,1]_path.png
Saving transcript path graph for ENSG00000136717[2,4,1] as figures/_novel_ENSG00000136717[2,4,1]_path.png
Saving transcript path graph for ENSG00000136717[1,18,1] as figures/_novel_ENSG00000136717[1,18,1]_path.png
Saving transcript path graph for ENSG00000136717[2,2,1] as figures/_novel_ENSG00000136717[2,2,1]_path.png
Saving transcript path graph for ENSG00000136717[2,3,1] as figures/_novel_ENSG00000136717[2,3,1]_path.png
Saving transcript path graph for ENSG00000136717[1,19,1] as figures/_novel_ENSG00000136717[1,19,1]_path.png
Saving transcript path graph for ENSG00000136717[5,17,1] as figures/_novel_ENSG00000136717[5,17,1]_path.pn

  norm_val = (entry[col]-self.g_min)/(self.g_max-self.g_min)



Plotting transcripts for ENSG00000186868
Saving transcript path graph for ENSG00000186868[1,8,3] as figures/_browser_ENSG00000186868[1,8,3]_path.png
Saving transcript path graph for ENSG00000186868[1,8,2] as figures/_browser_ENSG00000186868[1,8,2]_path.png
Saving transcript path graph for ENSG00000186868[1,7,3] as figures/_browser_ENSG00000186868[1,7,3]_path.png
Saving transcript path graph for ENSG00000186868[1,22,3] as figures/_browser_ENSG00000186868[1,22,3]_path.png
Saving transcript path graph for ENSG00000186868[6,22,1] as figures/_browser_ENSG00000186868[6,22,1]_path.png
Saving transcript path graph for ENSG00000186868[1,21,3] as figures/_browser_ENSG00000186868[1,21,3]_path.png
Saving transcript path graph for ENSG00000186868[4,14,7] as figures/_browser_ENSG00000186868[4,14,7]_path.png
Saving transcript path graph for ENSG00000186868[1,7,11] as figures/_browser_ENSG00000186868[1,7,11]_path.png
Saving transcript path graph for ENSG00000186868[6,21,1] as figures/_browser_ENSG000