In [28]:
import swan_vis as swan
import scanpy as sc
import glob
import pandas as pd

In [3]:
def get_lr_cluster_colors():
    purple = '#8DA0CB'
    yellow = '#FFD92F'
    green = '#A6D854'
    pink = '#E78AC3'
    beige = '#E5C494'
    salmon = '#FC8D62'
    teal = '#66C2A5'
    
    c_dict = {'1': yellow, '2': beige, '3': teal,
              '4': salmon, '5': purple, '6': pink, '7': green}
    order = ['1', '2', '3', '4', '5', '6', '7']
    
    return c_dict, order

def get_sample_colors(samples=None):
    sample_green = '#019f73'
    sample_blue = '#57b4e9'
    sample_pink = '#cb79a7'
    c_dict = {'MB_cells': sample_pink, 'MB_nuclei': sample_blue, 'MT_nuclei': sample_green}
    order = ['MB_cells', 'MB_nuclei', 'MT_nuclei']
    
    if samples:
        keys = c_dict.keys()
        pop_list = []
        for key in keys:
            if key not in samples:
                pop_list.append(key)
        for p in pop_list:
            del c_dict[p]
        order = [o for o in order if o in samples]            
    
    return c_dict, order

def get_condition_colors():
    sample_pink = '#cb79a7'
    sample_green = '#019f73'
    sample_dark_green = '#066b4b'
    c_dict = {'MB': sample_pink, 'MNC': sample_green, 'MT': sample_dark_green}
    order = ['MB', 'MNC', 'MT']
    
    return c_dict, order

In [2]:
annot = 'gencode.vM21.primary_assembly.annotation_UCSC_names.gtf'
gtf1 = 'sc_mb.gtf'
gtf2 = 'sn_mb.gtf'
gtf3 = 'sn_mt.gtf'

sg = swan.SwanGraph(sc=True)
sg.add_annotation(annot, verbose=True)

sg.add_transcriptome(gtf1, verbose=True)
sg.add_transcriptome(gtf2, verbose=True)
sg.add_transcriptome(gtf3, verbose=True)


Adding annotation to the SwanGraph


Processing transcripts: 100%|█████████▉| 141800/141862 [01:02<00:00, 2265.44it/s]


Reindexing and sorting entries on genomic location...


Reindexing vertices: 100%|█████████▉| 754000/754537 [00:00<00:00, 993838.12it/s] 
Reindexing edges:  97%|█████████▋| 730000/754537 [00:00<00:00, 793620.52it/s] 



Annotation added to the SwanGraph


In [35]:
sg.save_graph('swan')

Saving graph as swan.p


In [114]:
# concatenate and format the h5ad files
i = 0
for f in glob.glob('*h5ad'):
    exp = f.split('.')[0]
    if i == 0:
        adata = sc.read(f)
        adata.obs['exp'] = exp
    else:
        temp = sc.read(f)
        temp.obs['exp'] = exp
        adata = adata.concatenate(temp)
    i += 1

In [115]:
df = pd.DataFrame(index=tids, columns=bcs, data=X)

In [116]:
df.to_csv('c2c12_abundance.tsv', sep='\t')

In [117]:
meta = adata.obs.copy(deep=True)
meta.reset_index(inplace=True)
meta['merged_bc_2'] = meta.merged_bc.str.split('-', expand=True)[0]

adata = sc.read('/Users/fairliereese/mortazavi_lab/data/c2c12_paper_2020/sc_pacbio/201218/scanpy/transcript_processed.h5ad')
temp = adata.obs[['merged_bc', 'leiden']]
temp.rename({'merged_bc': 'merged_bc_2'}, axis=1, inplace=True)

meta = meta.merge(temp, how='left', on='merged_bc_2')
meta.rename({'merged_bc':'dataset'}, axis=1, inplace=True)
meta.drop('merged_bc_2', axis=1, inplace=True)

# reformat leiden
meta.leiden = (meta.leiden.astype(int)+1).astype(str)

# add celltype
m = {'1': 'MB', '2': 'MB', '3': 'MB',
     '4': 'MNC', '5': 'MNC',
     '6': 'MT', '7': 'MT'}
meta['cell_type'] = meta.leiden.map(m)

meta.to_csv('c2c12_metadata.tsv', sep='\t')


This is where adjacency matrices should go now.

This is where adjacency matrices should go now.


In [37]:
# now add abundance file and metadata to SwanGraph
ab = 'c2c12_abundance.tsv'
sg.add_abundance(ab)


Adding abundance for datasets ACTATGCAACACGACCGTGCTAGC-0-0, GAGTTAGCTCTTCACATTCCGATC-0-0, CTGGCATAACAAGCTACTTTGGTC-0-0, ATCATTCCGACAGTGCAGCGAAAC-0-0, ATTGGCTCGAGCTGAACTTTGGTC-0-0... (and 459 more) to SwanGraph




In [119]:
meta = 'c2c12_metadata.tsv'
sg.add_metadata(meta, overwrite=True)

AnnData expects .obs.index to contain strings, but got values like:
    [0, 1, 2, 3, 4]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)


In [120]:
sg.save_graph('swan')

Saving graph as swan.p


In [1]:
import swan_vis as swan
sg = swan.read('swan.p')

Read in graph from swan.p


In [4]:
# add colors

# sample
c_dict, order = get_sample_colors()
sg.set_metadata_colors('sample', c_dict)

# cell type
c_dict, order = get_condition_colors()
sg.set_metadata_colors('cell_type', c_dict)

# cluster
c_dict, order = get_lr_cluster_colors()
sg.set_metadata_colors('leiden', c_dict)

In [8]:
# get switching events b/w each of the 
# different cell types
ofile = open('die_comparisons.tsv', 'w')
tests = ['iso', 'tss', 'tes']
cell_types = sg.adata.obs.cell_type.unique()
tested = []
for c1 in cell_types:
    for c2 in cell_types:
        if c1 == c2:
            continue
        elif (c1, c2) in tested or (c2, c1) in tested:
            continue
        else:
            for test in tests:
                fname = '{}_{}_{}_die.tsv'.format(c1, c2, test)
#                 print('Testing {} DIE b/w {} and {}'.format(test, c1, c2))
#                 die, genes = sg.die_gene_test(kind=test,
#                                               obs_col='cell_type',
#                                               obs_conditions=[c1, c2],
#                                               verbose=True)
                ofile.write('{}\t{}\t{}\t{}\n'.format(test, c1, c2, fname))
#                 die.to_csv(fname()
        tested.append((c1, c2))
        tested.append((c2, c1))
ofile.close()

In [None]:
sg.save_graph(swan)