## 02: Expression Plot

#### Create an RNA isoform structure plot with an RNA isoform expression plot beside it

In [7]:
import RNApysoforms as RNApy
import plotly.offline as py
py.init_notebook_mode(connected=True)

In [8]:
## Path to your ENSEMBL GTF file and counts matrix file
ensembl_gtf_path = "../dash_apps/RNApysoforms/tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv"

In [9]:
## Read ENSEMBL gtf and counts matrix
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path)

In [10]:
## Filter gene name in annotation and counts matrix.
sod1_annotation, sod1_counts_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="SOD1")

sod1_counts_matrix.head()

transcript_id,gene_id,sample_id,counts
str,str,str,f64
"""ENST00000476106""","""ENSG00000142168""","""sample_1""",0.0
"""ENST00000476106""","""ENSG00000142168""","""sample_4""",0.0
"""ENST00000476106""","""ENSG00000142168""","""sample_7""",0.0
"""ENST00000476106""","""ENSG00000142168""","""sample_2""",0.0
"""ENST00000476106""","""ENSG00000142168""","""sample_6""",0.0


In [11]:
"""
Rescale introns (no need to run function "to_intron", shorten_gaps() already does this 
by default if introns aren't already included in annotation.
"""
sod1_annotation = RNApy.shorten_gaps(sod1_annotation)

sod1_annotation.head()

gene_id,gene_name,transcript_id,transcript_name,transcript_biotype,seqnames,strand,type,start,end,exon_number,rescaled_start,rescaled_end
str,str,str,str,str,str,str,str,i64,i64,i64,i64,i64
"""ENSG00000142168""","""SOD1""","""ENST00000270142""","""SOD1-201""","""protein_coding""","""21""","""+""","""exon""",31659693,31659841,1,29,177
"""ENSG00000142168""","""SOD1""","""ENST00000270142""","""SOD1-201""","""protein_coding""","""21""","""+""","""CDS""",31659770,31659841,1,106,177
"""ENSG00000142168""","""SOD1""","""ENST00000270142""","""SOD1-201""","""protein_coding""","""21""","""+""","""intron""",31659841,31663790,1,177,1431
"""ENSG00000142168""","""SOD1""","""ENST00000270142""","""SOD1-201""","""protein_coding""","""21""","""+""","""CDS""",31663790,31663886,2,1431,1527
"""ENSG00000142168""","""SOD1""","""ENST00000270142""","""SOD1-201""","""protein_coding""","""21""","""+""","""exon""",31663790,31663886,2,1431,1527


In [12]:
"""
Create traces for plotting, the expression plot will come out in
the order of columns passed on the `expression_columns` parameters.
This is important if you are passing multiple expression columns
like CPM and relative abundance.
"""
traces = RNApy.make_traces(annotation=sod1_annotation,  expression_matrix=sod1_counts_matrix, 
                        x_start="rescaled_start", x_end="rescaled_end",
                         y='transcript_id', annotation_hue="transcript_biotype",
                         hover_start="start", hover_end="end",
                         expression_columns=["counts"])

"""
Put traces into the figure. The order of `subplot_titles` is important.
The first plot will always be "Transcript Structure" if you passed an annotation
to make the traces. After that the order of the expression plots is determined 
by the `expression_columns` parameter passed to the `make_traces()` function.
"""
fig = RNApy.make_plot(traces = traces, subplot_titles = ["Transcript Structure", "Counts"], width=1200, height=500)

## Show figure
py.iplot(fig,filename="s2_plot")