## Plot specific transcripts

### Hand pick which transcripts to plot and plot them in specific order

In [3]:
import RNApysoforms as RNApy
import polars as pl
import plotly.offline as py
py.init_notebook_mode(connected=True)

In [4]:
## Path to your ENSEMBL GTF file, counts matrix file, and metadata file
ensembl_gtf_path = "../dash_apps/RNApysoforms/tests/test_data/Homo_sapiens_chr21_and_Y.GRCh38.110.gtf"
counts_matrix_path = "../dash_apps/RNApysoforms/tests/test_data/counts_matrix_chr21_and_Y.tsv"
metadata_path = "../dash_apps/RNApysoforms/tests/test_data/sample_metadata.tsv"


## Read ENSEMBL GTF and counts matrix with metadata and normalization
annotation = RNApy.read_ensembl_gtf(ensembl_gtf_path)
counts_matrix = RNApy.read_expression_matrix(expression_matrix_path=counts_matrix_path,
                                          metadata_path=metadata_path,
                                           cpm_normalization=True, relative_abundance=True)


## Filter APP gene and do not filter RNA isoforms by expression
app_annotation, app_expresison_matrix = RNApy.gene_filtering(annotation=annotation, expression_matrix=counts_matrix, target_gene="APP",
                                                        order_by_expression=True, order_by_expression_column="counts")

## Rescale introns
app_annotation = RNApy.shorten_gaps(app_annotation)


"""
Filter only the desired transcripts and keep them on in the app_annotation.
`make_traces()` only plots transcripts present in both the annotationa and the
expression matrix when both are passed (it does give a warning when that happens),
so only the transcripts you kept in the annotation will be plotted.
"""
transcript_to_keep = ["ENST00000348990", "ENST00000707133"]
app_annotation = app_annotation.filter(pl.col("transcript_id").is_in(transcript_to_keep))


## Order transcripts based on `transcripts_to_keep` order
app_annotation = app_annotation.with_columns(
    pl.col("transcript_id").cast(pl.Categorical).cast(pl.Utf8).replace(
        {k: i for i, k in enumerate(transcript_to_keep)}).alias("sort_key")
        ).sort("sort_key", descending=True).drop("sort_key")


"""
Create traces for plotting, make sure to set the
`order_transcripts_by_expression_matrix` to False so
that the order of the annotation is the one that determines
the order in which the transcripts are plotted.
"""
traces = RNApy.make_traces(annotation=app_annotation,  expression_matrix=app_expresison_matrix, 
                        x_start="rescaled_start", x_end="rescaled_end",
                         y='transcript_id', annotation_hue="transcript_biotype",
                         hover_start="start", hover_end="end",
                         expression_columns=["counts", "CPM", "relative_abundance"],
                         expression_hue="AD status", marker_size=3, arrow_size=7,
                         order_transcripts_by_expression_matrix=False ## Order by annotation order instead of expression matrix
                         )

## Put traces into figure
fig = RNApy.make_plot(traces=traces, subplot_titles=["Transcript Structure", "Counts", "CPM", "Relative Abundance"], 
                   width=1200, height=500, boxgap=0.1, boxgroupgap=0.5)

## Show figure
py.iplot(fig,filename="s5_plot")



18 transcript(s) are present in the expression matrix but missing in the annotation. Missing transcripts: ENST00000346798, ENST00000354192, ENST00000357903, ENST00000358918, ENST00000359726, ENST00000415997, ENST00000439274, ENST00000440126, ENST00000448850, ENST00000462267, ENST00000463070, ENST00000464867, ENST00000466453, ENST00000474136, ENST00000491395, ENST00000548570, ENST00000707132, ENST00000707134. Only transcripts present in both will be used for making traces.

