<a href="https://colab.research.google.com/github/arana3uic/RNAvelocity/blob/collab_notebooks/RNAvelocity_HCI003GM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Analysis Contributions**   

> **Description:** Trial run of notebook script for testing.  
> **Data Address:** ~>Lana">(1)Data>SEURAT sc-RNA seq>RNAvelocityHCI003GM    
> **Data Owner:** Svetlana Semina    
> **Script Developed By:** Svetlana Semina    
> **Notebook Generated By:** Amisha Rana   

In [None]:
#This will install the required Python library packages needed for this tutorial
!pip install numpy==1.23 pandas==1.5.3 matplotlib==3.5.0 scanpy==1.9.1 igraph==0.9.8 scvelo==0.2.4 loompy==3.0.6 anndata==0.8.0

In [None]:
#This will create a new directory called "input-files", then download and extract several input data files needed for this tutorial, then display the list of files now available.

!mkdir input-files
!curl -o input-files/filtered_feature_bc_matrix.tar.gz https://uic365-my.sharepoint.com/personal/semina_uic_edu/_layouts/15/onedrive.aspx?login_hint=semina%40uic%2Eedu&id=%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents%2FCurrent%20Lab%20Members%2FLana%2F%281%29Data%2FSEURAT%20sc%2DRNA%20seq%2FRNAvelocityHCI003GM&listurl=https%3A%2F%2Fuic365%2Esharepoint%2Ecom%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents&viewid=ef1c37e1%2D5e85%2D4d27%2Db503%2Db0cef016c7c4&view=0/filtered_feature_bc_matrix.tar.gz

!curl -o input-files/possorted_genome_bam_CE8EQ.loom https://uic365-my.sharepoint.com/personal/semina_uic_edu/_layouts/15/onedrive.aspx?login_hint=semina%40uic%2Eedu&id=%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents%2FCurrent%20Lab%20Members%2FLana%2F%281%29Data%2FSEURAT%20sc%2DRNA%20seq%2FRNAvelocityHCI003GM&listurl=https%3A%2F%2Fuic365%2Esharepoint%2Ecom%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents&viewid=ef1c37e1%2D5e85%2D4d27%2Db503%2Db0cef016c7c4&view=0/possorted_genome_bam_CE8EQ.loom

!curl -o input-files/HCI0003GM_clusters.csv https://uic365-my.sharepoint.com/personal/semina_uic_edu/_layouts/15/onedrive.aspx?login_hint=semina%40uic%2Eedu&id=%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents%2FCurrent%20Lab%20Members%2FLana%2F%281%29Data%2FSEURAT%20sc%2DRNA%20seq%2FRNAvelocityHCI003GM&listurl=https%3A%2F%2Fuic365%2Esharepoint%2Ecom%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents&viewid=ef1c37e1%2D5e85%2D4d27%2Db503%2Db0cef016c7c4&view=0/HCI0003GM_clusters.csv.csv

!curl -o input-files/HCI0003GM_UMAP_coordinates.csv https://uic365-my.sharepoint.com/personal/semina_uic_edu/_layouts/15/onedrive.aspx?login_hint=semina%40uic%2Eedu&id=%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents%2FCurrent%20Lab%20Members%2FLana%2F%281%29Data%2FSEURAT%20sc%2DRNA%20seq%2FRNAvelocityHCI003GM&listurl=https%3A%2F%2Fuic365%2Esharepoint%2Ecom%2Fsites%2FO365%2DFrasorLab%2FShared%20Documents&viewid=ef1c37e1%2D5e85%2D4d27%2Db503%2Db0cef016c7c4&view=0/HCI0003GM_UMAP_coordinate.csv

!tar -xvzf input-files/filtered_feature_bc_matrix.tar.gz -C input-files/

!ls -lah input-files

In [3]:
# First, import required packages in the current session.

import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
import scanpy as sc
import igraph
import scvelo as scv
import loompy as lmp
import anndata

import warnings
warnings.filterwarnings('ignore')

In [4]:
#The scvelo tool only calculates velocity
#Now we need anchors for visualization
#UMAP gives us projections
#barcode assignments are associated with different clusters
Clusters = pd.read_csv("./input-files/HCI0003GM_clusters.csv", delimiter=',',index_col=0)

# Create list with Barcodes
# This will be used later to subset the count matrix
HCI003GM_BCs = Clusters.index

# Read UMAP exported  
UMAP = pd.read_csv("./input-files/HCI0003GM_UMAP_coordinate.csv", delimiter=',',index_col=0)

# Tansform to Numpy (for formatting)
UMAP_Numpy = UMAP.to_numpy()

# Define Path to cellranger output
Path10x='./input-files/filtered_feature_bc_matrix/'

# Read filtered feature bc matrix output from cellranger count
HCI003GM = sc.read_10x_mtx(Path10x,var_names='gene_symbols',cache=True)
HCI003GM

# These are the barcodes (n_obs) 
# This is the number set in --force-cells
HCI003GM.obs

# These are the gene_ids (n_vars)
HCI003GM.var_names

HCI003GM_df = HCI003GM.to_df()
HCI003GM_df.head()


# Filter Cells to only clustered cells in Seurat
HCI003GM = HCI003GM[HCI003GM_BCs]

# Add Clusters 
HCI003GM.obs['RNA_snn_res.0.2'] = Clusters

# Add UMAP from Seurat 
HCI003GM.obsm["UMAP_1"] = UMAP_Numpy

#You might get this warning below, but nothing to worry about.
#Trying to set attribute .obs of view, copying.
#Next, read velocyto output and merge

# Read velocyto output
VelHCI003GM = scv.read('./input-files/possorted_genome_bam_CE8EQ.loom', cache=True)

# Merge velocyto and cellranger outputs
HCI003GM_merged = scv.utils.merge(HCI003GM, VelHCI003GM)

HCI003GM_merged

#You might get this warning, but no need to worry:
#Variable names are not unique. To make them unique, call .var_names_make_unique.
#Next, process dataset and obtain latent time values for each cell

# Standard scvelo processing to run Dynamical Mode
scv.pp.filter_and_normalize(HCI003GM_merged, min_shared_counts=30, n_top_genes=2000)
scv.pp.moments(HCI003GM_merged, n_pcs=30, n_neighbors=30)

scv.tl.recover_dynamics(HCI003GM_merged)
scv.tl.velocity(HCI003GM_merged, mode='dynamical')
scv.tl.velocity_graph(HCI003GM_merged)
scv.tl.recover_latent_time(HCI003GM_merged)

HCI003GM_merged

# velocity plo
# default plotting parameters
scv.pl.velocity_embedding_stream(HCI003GM_merged,basis="umap",color="Loupe",title='HCI003GM',fontsize=20,legend_fontsize=20,min_mass=2,save='scVelo-umap-cluster.png')



FileNotFoundError: ignored