# NanoCount python API

### Activate virtual environment

In [1]:
from NanoCount.NanoCount import NanoCount
from NanoCount.common import jhelp, head

### Running NanoCount 

#### Basic command

In [6]:
NanoCount (alignment_file="./data/aligned_reads.bam", count_file="./output/tx_counts.tsv")
head("./output/tx_counts.tsv")

[01;34m## Checking options and input files ##[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[32m	Generate initial read/transcript compatibility index[0m
[01;34m## Start EM abundance estimate ##[0m
	Progress: 3.00 rounds [00:00, 4.59 rounds/s]
[32m	Exit EM loop after 3 rounds[0m
[32m	Convergence value: 0.0019235550878568812[0m
[01;34m## Summarize data ##[0m
[32m	Convert results to dataframe[0m
[32m	Compute estimated counts and TPM[0m
[32m	Write file[0m


transcript_name raw                   est_count          tpm                
YHR174W_mRNA    0.5953263925577137    52155.35459919618  595326.3925577137  
YGR192C_mRNA    0.02051266206449663   1797.073298146421  20512.66206449663  
YLR110C_mRNA    0.009451191671993415  827.9999999999991  9451.191671993416  
YOL086C_mRNA    0.00827549995434206   724.9999999999992  8275.49995434206   
YKL060C_mRNA    0.00659757099808236   577.9999999999994  6597.57099808236   
YPR080W_mRNA    0.005222125833257232  457.49999999999955 5222.1258332572315 
YBR118W_mRNA    0.005222125833257232  457.49999999999955 5222.1258332572315 
YLR044C_mRNA    0.0050680303168660346 443.99999999999955 5068.030316866035  
YKL152C_mRNA    0.004166286183910141  364.99999999999966 4166.2861839101415 



#### Basic command without file writing and Dataframe output

In interactive mode it is also possible not to write the results out but instead to access the data directly as a pandas DataFrame

In [7]:
nc = NanoCount (alignment_file="./data/aligned_reads.bam")
display(nc.count_df)

[01;34m## Checking options and input files ##[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[32m	Generate initial read/transcript compatibility index[0m
[01;34m## Start EM abundance estimate ##[0m
	Progress: 3.00 rounds [00:00, 4.68 rounds/s]
[32m	Exit EM loop after 3 rounds[0m
[32m	Convergence value: 0.0019235550878568812[0m
[01;34m## Summarize data ##[0m
[32m	Convert results to dataframe[0m
[32m	Compute estimated counts and TPM[0m


Unnamed: 0_level_0,raw,est_count,tpm
transcript_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
YHR174W_mRNA,5.953264e-01,52155.354599,595326.392558
YGR192C_mRNA,2.051266e-02,1797.073298,20512.662064
YLR110C_mRNA,9.451192e-03,828.000000,9451.191672
YOL086C_mRNA,8.275500e-03,725.000000,8275.499954
YKL060C_mRNA,6.597571e-03,578.000000,6597.570998
...,...,...,...
YHR218W_mRNA,5.474187e-08,0.004796,0.054742
YBL111C_mRNA,5.474187e-08,0.004796,0.054742
YOR248W_mRNA,3.727178e-08,0.003265,0.037272
YOR309C_mRNA,3.377066e-08,0.002959,0.033771


#### Adding extra transcripts information

The `extra_tx_info` option adds a columns with the transcript lengths and also includes all the zero-coverage transcripts in the results   

In [9]:
nc = NanoCount (alignment_file="./data/aligned_reads.bam", extra_tx_info=True)
display(nc.count_df)

[01;34m## Checking options and input files ##[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[32m	Generate initial read/transcript compatibility index[0m
[01;34m## Start EM abundance estimate ##[0m
	Progress: 3.00 rounds [00:00, 4.34 rounds/s]
[32m	Exit EM loop after 3 rounds[0m
[32m	Convergence value: 0.0019235550878568812[0m
[01;34m## Summarize data ##[0m
[32m	Convert results to dataframe[0m
[32m	Compute estimated counts and TPM[0m


Unnamed: 0,raw,est_count,tpm,transcript_length
YHR174W_mRNA,0.595326,52155.354599,595326.392558,1314
YGR192C_mRNA,0.020513,1797.073298,20512.662064,999
YLR110C_mRNA,0.009451,828.000000,9451.191672,402
YOL086C_mRNA,0.008275,725.000000,8275.499954,1047
YKL060C_mRNA,0.006598,578.000000,6597.570998,1080
...,...,...,...,...
YPR200C_mRNA,0.000000,0.000000,0.000000,393
YPR201W_mRNA,0.000000,0.000000,0.000000,1215
YPR202W_mRNA,0.000000,0.000000,0.000000,717
YPR203W_mRNA,0.000000,0.000000,0.000000,309


#### Relaxing the equivalence threshold

The default value is 0.9 (90% of the alignment score of the primary alignment) but this value could be lowered to allow more secondary alignments to be included in the uncertainty calculation.
Lowering the value bellow 0.75 might not be relevant and will considerably increase the computation time.

In [2]:
NanoCount (alignment_file="./data/aligned_reads.bam", count_file="./output/tx_counts.tsv", equivalent_threshold=0.8, extra_tx_info=True)
head("./output/tx_counts.tsv")

[01;34m## Checking options and input files ##[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[32m	Generate initial read/transcript compatibility index[0m
[01;34m## Start EM abundance estimate ##[0m
	Progress: 17.0 rounds [00:03, 4.44 rounds/s]
[32m	Exit EM loop after 17 rounds[0m
[32m	Convergence value: 0.004988077857734706[0m
[01;34m## Summarize data ##[0m
[32m	Convert results to dataframe[0m
[32m	Compute estimated counts and TPM[0m
[32m	Write file[0m


transcript_name raw                  est_count          tpm                transcript_length 
YHR174W_mRNA    0.5847481080119605   51228.61224671184  584748.1080119605  1314              
YGR192C_mRNA    0.015286737423038144 1339.2404921575258 15286.737423038145 999               
YGR254W_mRNA    0.011624369387633806 1018.3877533118225 11624.369387633806 1314              
YLR110C_mRNA    0.00945119167199341  827.9999999999986  9451.19167199341   402               
YJR009C_mRNA    0.009088112600958011 796.1913687447295  9088.112600958011  999               
YOL086C_mRNA    0.008275499954342055 724.9999999999987  8275.499954342056  1047              
YKL060C_mRNA    0.006597570998082356 577.9999999999991  6597.570998082356  1080              
YBR118W_mRNA    0.005222125833257228 457.49999999999926 5222.125833257228  1377              
YPR080W_mRNA    0.005222125833257228 457.49999999999926 5222.125833257228  1377              



### NanoCount help

In [12]:
jhelp(NanoCount)

**NanoCount** (alignment_file, count_file, min_read_length, min_query_fraction_aligned, equivalent_threshold, scoring_value, convergence_target, max_em_rounds, extra_tx_info, verbose, quiet)

Estimate abundance of transcripts using an EM

---

* **alignment_file** (required) [str]

BAM or SAM file containing aligned ONT dRNA-Seq reads including secondary and supplementary alignment

* **count_file** (default: "") [str]

Output file path where to write estimated counts (TSV format)

* **min_read_length** (default: 50) [int]

Minimal length of the read to be considered valid

* **min_query_fraction_aligned** (default: 0.5) [float]

Minimal fraction of the primary hit query aligned to consider the read valid

* **equivalent_threshold** (default: 0.9) [float]

Fraction of the alignment score or the alignment length of secondary hits compared to the primary hit to be considered valid hits

* **scoring_value** (default: alignment_score) [str]

Value to use for score thresholding of secondary hits either "alignment_score" or "alignment_length"

* **convergence_target** (default: 0.005) [float]

Convergence target value of the cummulative difference between abundance values of successive EM round to trigger the end of the EM loop.

* **max_em_rounds** (default: 100) [int]

Maximum number of EM rounds before triggering stop

* **extra_tx_info** (default: False) [bool]

Add transcripts length and zero coverage transcripts to the output file (required valid bam/sam header)

* **verbose** (default: False) [bool]

Increase verbosity for QC and debugging

* **quiet** (default: False) [bool]

Reduce verbosity

