## Install Swan

In [None]:
!git clone git@github.com:mortazavilab/swan_vis.git
!cd swan_vis
!pip install .

Alternatively....

In [None]:
!pip install swan_vis

## Already run 
The following section was already run so you don't have to wait for results, but you're welcome to give it a try!

In [None]:
# data download
!wget http://crick.bio.uci.edu/freese/210830_ucd_worksho/data.tgz
!tar -xf data.tgz

In [1]:
import swan_vis as swan

In [2]:
# construct SwanGraph
annot_gtf = 'ref/gencode.vM21.annotation.gtf'
data_gtf = 'data/all_talon_observedOnly.gtf'
ab_file = 'data/all_talon_abundance_filtered.tsv'
meta = 'data/metadata.tsv'

In [6]:
sg = swan.SwanGraph()
sg.add_annotation(annot_gtf)
sg.add_transcriptome(data_gtf)
sg.add_abundance(ab_file)
sg.add_metadata(meta)

In [5]:
sg.save_graph('swan')

Saving graph as swan.p


In [3]:
# compare 0 and 72 hr time point 
obs_col = 'time_point'
obs_conditions = ['0hr', '72hr']
_ = sg.de_gene_test(obs_col, obs_conditions=obs_conditions)
_ = sg.de_transcript_test(obs_col, obs_conditions=obs_conditions)
_ = sg.die_gene_test(obs_col, obs_conditions=obs_conditions)

In [None]:
# find novel exon-skipping and intron retention events
_ = sg.find_es_genes()
_ = sg.find_ir_genes()

In [None]:
# save graph again so we have access to all the results 
sg.save_graph('swan')

## Your turn!

Now we'll take a look at some of the results from Swan and make some cool plots. If you want to do anything off-book or are confused about anything please take a look at the [documentation](https://freese.gitbook.io/swan/). 

In [None]:
# download the SwanGraph
!wget http://crick.bio.uci.edu/freese/210830_ucd_workshop/swan.p

In [2]:
# load the SwanGraph
sg = swan.read('swan.p')

Read in graph from swan.p


In [None]:
# TODO - start with IR / ES stuff that visualizing the smol graphs is useful for

In [32]:
# add some colors to represent the 2 different time points
green = '#019f73'
pink = '#cb79a7'
cmap = {'0hr': pink, '72hr': green}
sg.set_metadata_colors('time_point', cmap)

In [9]:
# results from differential gene expression test
obs_col = 'time_point'
obs_conditions = ['0hr', '72hr']
de_genes = sg.get_de_genes(obs_col, obs_conditions=obs_conditions,
                           q=0.05, log2fc=1)

In [10]:
de_genes.head()

Unnamed: 0,gid,pval,qval,log2fc,mean,zero_mean,grad,coef_mle,coef_sd,ll,gname
19866,ENSMUSG00000070733.13,0.0,0.0,283.913085,0.561909,False,0.500002,283.913085,2.222759e-162,-16.217098,Fryl
18848,ENSMUSG00000065852.1,0.0,0.0,283.913085,0.407021,False,0.5,283.913085,2.222759e-162,-15.435992,Gm22353
18852,ENSMUSG00000065862.1,0.0,0.0,283.913085,0.407021,False,0.5,283.913085,2.222759e-162,-15.435992,Gm24029
19949,ENSMUSG00000071042.12,0.0,0.0,283.913085,0.20351,False,0.40929,283.913085,2.222759e-162,-11.215066,Rasgrp3
7466,ENSMUSG00000029699.13,0.0,0.0,283.913085,0.578117,False,1.0,283.913085,2.222759e-162,-28.840319,Ssc4d


In [11]:
de_genes.loc[de_genes.gname.str.contains('Myo')]

Unnamed: 0,gid,pval,qval,log2fc,mean,zero_mean,grad,coef_mle,coef_sd,ll,gname
19415,ENSMUSG00000068697.7,0.0,0.0,283.913085,4.689763,False,1.0,283.913085,2.222759e-162,-38.567663,Myoz1
6503,ENSMUSG00000028116.13,0.0,0.0,283.913085,28.415941,False,0.999999,283.913085,2.222759e-162,-45.865588,Myoz2
20253,ENSMUSG00000072720.9,0.0,0.0,283.913085,5.47139,False,1.0,283.913085,2.222759e-162,-39.200583,Myo18b
10601,ENSMUSG00000037139.15,0.0,0.0,283.913085,16.414171,False,0.999999,283.913085,2.222759e-162,-43.658559,Myom3
8369,ENSMUSG00000031461.4,0.0,0.0,283.913085,351.204727,False,0.999999,283.913085,2.222759e-162,-55.932105,Myom2
4263,ENSMUSG00000024049.15,0.0,0.0,283.913085,20.664499,False,0.999999,283.913085,2.222759e-162,-44.587521,Myom1
4505,ENSMUSG00000024471.12,0.0,0.0,283.913085,10.071095,False,0.999999,283.913085,2.222759e-162,-41.596729,Myot
5560,ENSMUSG00000026459.5,0.0,0.0,283.913085,2162.365112,False,0.999999,283.913085,2.222759e-162,-63.212547,Myog
3582,ENSMUSG00000022272.17,0.00066,0.004051,1.475328,8.793978,False,0.68904,1.475328,0.4331994,-3.089287,Myo10


In [12]:
len(de_genes.index)

5392

In [15]:
# results from differential transcript expression test
de_transcripts = sg.get_de_transcripts(obs_col, obs_conditions=obs_conditions,
                           q=0.05, log2fc=1)

In [16]:
de_transcripts.head()

Unnamed: 0,tid,pval,qval,log2fc,mean,zero_mean,grad,coef_mle,coef_sd,ll,gid,gname
90372,ENSMUST00000184224.1,0.0,0.0,283.913085,0.187303,False,0.374605,283.913085,2.222759e-162,-10.301695,ENSMUSG00000098716.1,Gm28033
90689,ENSMUST00000184591.1,0.0,0.0,283.913085,2.312467,False,0.999999,283.913085,2.222759e-162,-35.526327,ENSMUSG00000033182.12,Kbtbd12
90287,ENSMUST00000184135.1,0.0,0.0,283.913085,0.20351,False,0.40929,283.913085,2.222759e-162,-11.215066,ENSMUSG00000098761.1,Gm18821
16680,ENSMUST00000079773.13,0.0,0.0,283.913085,13.458757,False,0.999999,283.913085,2.222759e-162,-42.859372,ENSMUSG00000003345.16,Csnk1g2
45913,ENSMUST00000132080.1,0.0,0.0,283.913085,0.187303,False,0.374605,283.913085,2.222759e-162,-10.301695,ENSMUSG00000013858.14,Tmem259


In [21]:
de_transcripts.loc[de_transcripts.gname.str.contains('Myo')].head()

Unnamed: 0,tid,pval,qval,log2fc,mean,zero_mean,grad,coef_mle,coef_sd,ll,gid,gname
89607,ENSMUST00000183377.1,0.0,0.0,283.913085,0.20351,False,0.40929,283.913085,2.222759e-162,-11.215066,ENSMUSG00000033577.18,Myo6
49005,ENSMUST00000135375.3,0.0,0.0,283.913085,1.563254,False,1.000001,283.913085,2.222759e-162,-33.923097,ENSMUSG00000000631.20,Myo18a
86409,ENSMUST00000179759.2,0.0,0.0,283.913085,20.477194,False,0.999999,283.913085,2.222759e-162,-44.551066,ENSMUSG00000024049.15,Myom1
45155,ENSMUST00000131251.1,0.0,0.0,283.913085,0.390814,False,0.781626,283.913085,2.222759e-162,-21.519703,ENSMUSG00000031461.4,Myom2
58333,ENSMUST00000145304.7,0.0,0.0,283.913085,10.364664,False,1.0,283.913085,2.222759e-162,-41.799408,ENSMUSG00000037139.15,Myom3


In [19]:
de_transcripts.loc[de_transcripts.gname == 'Pkm']

Unnamed: 0,tid,pval,qval,log2fc,mean,zero_mean,grad,coef_mle,coef_sd,ll,gid,gname
146970,TALONT000524508,0.0,0.0,283.913085,4.50246,False,0.999999,283.913085,2.222759e-162,-38.394349,ENSMUSG00000032294.17,Pkm
146957,TALONT000523760,2e-06,2e-05,2.987293,13.641756,False,0.3522,2.987293,0.6332595,-4.011946,ENSMUSG00000032294.17,Pkm
146953,TALONT000523686,0.006228,0.042511,2.183307,4.312392,False,3.8e-05,2.183307,0.7981339,-6.115639,ENSMUSG00000032294.17,Pkm
146951,TALONT000523682,2.2e-05,0.000176,1.360462,14.967567,False,1.263405,1.360462,0.3206211,0.0,ENSMUSG00000032294.17,Pkm


In [20]:
de_transcripts.loc[de_transcripts.gname == 'Tpm2']

Unnamed: 0,tid,pval,qval,log2fc,mean,zero_mean,grad,coef_mle,coef_sd,ll,gid,gname
145514,TALONT000415087,0.0,0.0,283.913085,10.503345,False,1.0,283.913085,2.222759e-162,-41.851966,ENSMUSG00000028464.16,Tpm2
145516,TALONT000415089,0.0,0.0,283.913085,6.155773,False,1.0,283.913085,2.222759e-162,-39.562596,ENSMUSG00000028464.16,Tpm2
145517,TALONT000415177,0.0,0.0,283.913085,2.141371,False,1.000004,283.913085,2.222759e-162,-35.287652,ENSMUSG00000028464.16,Tpm2
26461,ENSMUST00000107913.9,0.0,0.0,3.570377,673.755588,False,5.430085,3.570377,0.1180513,0.0,ENSMUSG00000028464.16,Tpm2
145515,TALONT000415088,0.003188,0.022426,3.218529,5.673465,False,0.091297,3.218529,1.091389,0.0,ENSMUSG00000028464.16,Tpm2


In [22]:
len(de_transcripts.index)

12232

In [27]:
# results from differential transcript expression test
die_genes = sg.get_die_genes(obs_col=obs_col, obs_conditions=obs_conditions,
                             p=0.05, dpi=10)

In [28]:
die_genes.head()

Unnamed: 0,gid,p_val,dpi,adj_p_val
3,ENSMUSG00000061689.15,1.4436049999999998e-19,67.565826,3.4240320000000002e-18
5,ENSMUSG00000020152.7,6.042619e-11,19.780212,8.726454e-10
11,ENSMUSG00000000326.13,0.0009814141,28.23403,0.005577975
17,ENSMUSG00000000420.15,1.257904e-29,37.49585,4.475362000000001e-28
18,ENSMUSG00000000440.12,0.009107806,44.185776,0.04001738


In [37]:
# add gene names, which I should make automatic in future releases!
gnames = sg.t_df[['gname', 'gid']].drop_duplicates()
die_genes = die_genes.merge(gnames, on='gid', how='left')

In [38]:
die_genes = die_genes.sort_values(by='dpi', ascending=False)
die_genes.head()

Unnamed: 0,gid,p_val,dpi,adj_p_val,gname
281,ENSMUSG00000026414.13,5.312136e-237,98.948153,2.645927e-234,Tnnt2
161,ENSMUSG00000020836.15,1.7873329999999998e-64,97.932594,1.718034e-62,Coro6
151,ENSMUSG00000020439.17,5.141485e-75,97.710327,5.523568e-73,Smtn
946,ENSMUSG00000039542.16,7.076545e-128,96.70433,1.436015e-125,Ncam1
496,ENSMUSG00000036918.16,3.520068e-06,95.447243,3.115743e-05,Ttc7


## Make some visualizations of the analysis results

In [29]:
!mkdir figures

In [34]:
sg.gen_report('Dlgap4',
              'figures/dlgap4',
              metadata_cols=['time_point'],
              cmap='viridis',
              transcript_name=True)


Plotting transcripts for ENSMUSG00000061689.15
Saving transcript path graph for ENSMUST00000109566.8 as figures/dlgap4_ENSMUST00000109566.8_path.png
Saving transcript path graph for ENSMUST00000099145.5 as figures/dlgap4_ENSMUST00000099145.5_path.png
Saving transcript path graph for ENSMUST00000127944.8 as figures/dlgap4_ENSMUST00000127944.8_path.png
Saving transcript path graph for ENSMUST00000131157.8 as figures/dlgap4_ENSMUST00000131157.8_path.png
Generating report for ENSMUSG00000061689.15


In [35]:
sg.gen_report('Dlgap4',
              'figures/dlgap4',
              metadata_cols=['time_point'],
              cmap='viridis',
              transcript_name=True, 
              browser=True)


Plotting transcripts for ENSMUSG00000061689.15
Saving transcript path graph for ENSMUST00000109566.8 as figures/dlgap4_browser_ENSMUST00000109566.8_path.png
Saving transcript path graph for ENSMUST00000099145.5 as figures/dlgap4_browser_ENSMUST00000099145.5_path.png
Saving transcript path graph for ENSMUST00000127944.8 as figures/dlgap4_browser_ENSMUST00000127944.8_path.png
Saving transcript path graph for ENSMUST00000131157.8 as figures/dlgap4_browser_ENSMUST00000131157.8_path.png
Generating report for ENSMUSG00000061689.15


In [36]:
sg.gen_report('Dlgap4',
              'figures/dlgap4',
              metadata_cols=['time_point'],
              cmap='magma',
              transcript_name=True, 
              layer='pi',
              display_numbers=True)


Plotting transcripts for ENSMUSG00000061689.15
Saving transcript path graph for ENSMUST00000109566.8 as figures/dlgap4_ENSMUST00000109566.8_path.png
Saving transcript path graph for ENSMUST00000099145.5 as figures/dlgap4_ENSMUST00000099145.5_path.png
Saving transcript path graph for ENSMUST00000127944.8 as figures/dlgap4_ENSMUST00000127944.8_path.png
Saving transcript path graph for ENSMUST00000131157.8 as figures/dlgap4_ENSMUST00000131157.8_path.png
Generating report for ENSMUSG00000061689.15


In [40]:
sg.gen_report('Tnnt2',
              'figures/tnnt2',
              metadata_cols=['time_point'],
              cmap='viridis',
              transcript_name=True,
              novelty=True, 
              indicate_novel=True)


Plotting transcripts for ENSMUSG00000026414.13
Saving transcript path graph for TALONT000394811 as figures/tnnt2_novel_TALONT000394811_path.png
Saving transcript path graph for TALONT000394812 as figures/tnnt2_novel_TALONT000394812_path.png
Saving transcript path graph for TALONT000396016 as figures/tnnt2_novel_TALONT000396016_path.png
Saving transcript path graph for TALONT000396098 as figures/tnnt2_novel_TALONT000396098_path.png
Saving transcript path graph for TALONT000394818 as figures/tnnt2_novel_TALONT000394818_path.png
Saving transcript path graph for TALONT000394815 as figures/tnnt2_novel_TALONT000394815_path.png
Saving transcript path graph for TALONT000394848 as figures/tnnt2_novel_TALONT000394848_path.png
Saving transcript path graph for TALONT000394894 as figures/tnnt2_novel_TALONT000394894_path.png
Saving transcript path graph for ENSMUST00000191055.1 as figures/tnnt2_novel_ENSMUST00000191055.1_path.png
Saving transcript path graph for TALONT000394893 as figures/tnnt2_nov

In [41]:
sg.gen_report('Tnnt2',
              'figures/tnnt2',
              metadata_cols=['time_point'],
              cmap='magma',
              transcript_name=True,
              layer='pi',
              display_numbers=True,
              novelty=True, 
              browser=True)


Plotting transcripts for ENSMUSG00000026414.13
Saving transcript path graph for TALONT000394811 as figures/tnnt2_browser_TALONT000394811_path.png
Saving transcript path graph for TALONT000394812 as figures/tnnt2_browser_TALONT000394812_path.png
Saving transcript path graph for TALONT000396016 as figures/tnnt2_browser_TALONT000396016_path.png
Saving transcript path graph for TALONT000396098 as figures/tnnt2_browser_TALONT000396098_path.png
Saving transcript path graph for TALONT000394818 as figures/tnnt2_browser_TALONT000394818_path.png
Saving transcript path graph for TALONT000394815 as figures/tnnt2_browser_TALONT000394815_path.png
Saving transcript path graph for TALONT000394848 as figures/tnnt2_browser_TALONT000394848_path.png
Saving transcript path graph for TALONT000394894 as figures/tnnt2_browser_TALONT000394894_path.png
Saving transcript path graph for ENSMUST00000191055.1 as figures/tnnt2_browser_ENSMUST00000191055.1_path.png
Saving transcript path graph for TALONT000394893 as

In [43]:
sg.gen_report('Coro6',
              'figures/coro6',
              metadata_cols=['time_point'],
              cmap='viridis',
              transcript_name=True,
              novelty=True, 
              indicate_novel=True)


Plotting transcripts for ENSMUSG00000020836.15
Saving transcript path graph for TALONT000544061 as figures/coro6_novel_TALONT000544061_path.png
Saving transcript path graph for ENSMUST00000102493.7 as figures/coro6_novel_ENSMUST00000102493.7_path.png
Saving transcript path graph for TALONT000544046 as figures/coro6_novel_TALONT000544046_path.png
Saving transcript path graph for TALONT000544055 as figures/coro6_novel_TALONT000544055_path.png
Saving transcript path graph for TALONT000544048 as figures/coro6_novel_TALONT000544048_path.png
Saving transcript path graph for TALONT000544058 as figures/coro6_novel_TALONT000544058_path.png
Saving transcript path graph for TALONT000544052 as figures/coro6_novel_TALONT000544052_path.png
Saving transcript path graph for ENSMUST00000108391.8 as figures/coro6_novel_ENSMUST00000108391.8_path.png
Generating report for ENSMUSG00000020836.15


In [44]:
sg.gen_report('Coro6',
              'figures/coro6',
              metadata_cols=['time_point'],
              cmap='magma',
              transcript_name=True,
              novelty=True, 
              layer='pi', 
              browser=True)


Plotting transcripts for ENSMUSG00000020836.15
Saving transcript path graph for TALONT000544061 as figures/coro6_browser_TALONT000544061_path.png
Saving transcript path graph for ENSMUST00000102493.7 as figures/coro6_browser_ENSMUST00000102493.7_path.png
Saving transcript path graph for TALONT000544046 as figures/coro6_browser_TALONT000544046_path.png
Saving transcript path graph for TALONT000544055 as figures/coro6_browser_TALONT000544055_path.png
Saving transcript path graph for TALONT000544048 as figures/coro6_browser_TALONT000544048_path.png
Saving transcript path graph for TALONT000544058 as figures/coro6_browser_TALONT000544058_path.png
Saving transcript path graph for TALONT000544052 as figures/coro6_browser_TALONT000544052_path.png
Saving transcript path graph for ENSMUST00000108391.8 as figures/coro6_browser_ENSMUST00000108391.8_path.png
Generating report for ENSMUSG00000020836.15


In [45]:
die_genes.head()

Unnamed: 0,gid,p_val,dpi,adj_p_val,gname
281,ENSMUSG00000026414.13,5.312136e-237,98.948153,2.645927e-234,Tnnt2
161,ENSMUSG00000020836.15,1.7873329999999998e-64,97.932594,1.718034e-62,Coro6
151,ENSMUSG00000020439.17,5.141485e-75,97.710327,5.523568e-73,Smtn
946,ENSMUSG00000039542.16,7.076545e-128,96.70433,1.436015e-125,Ncam1
496,ENSMUSG00000036918.16,3.520068e-06,95.447243,3.115743e-05,Ttc7


In [46]:
sg.gen_report('Smtn',
              'figures/smtn',
              metadata_cols=['time_point'],
              cmap='magma',
              transcript_name=True,
              novelty=True, 
              layer='pi', 
              browser=True)


Plotting transcripts for ENSMUSG00000020439.17
Saving transcript path graph for ENSMUST00000020721.14 as figures/smtn_browser_ENSMUST00000020721.14_path.png
Saving transcript path graph for ENSMUST00000020718.9 as figures/smtn_browser_ENSMUST00000020718.9_path.png
Saving transcript path graph for TALONT000451717 as figures/smtn_browser_TALONT000451717_path.png
Saving transcript path graph for TALONT000451783 as figures/smtn_browser_TALONT000451783_path.png
Saving transcript path graph for TALONT000451804 as figures/smtn_browser_TALONT000451804_path.png
Saving transcript path graph for ENSMUST00000170588.7 as figures/smtn_browser_ENSMUST00000170588.7_path.png
Generating report for ENSMUSG00000020439.17
