# NanoCount command line usage

### Activate virtual environment

In [1]:
conda activate NanoCount

(NanoCount) 

: 1

### Running NanoCount

In [3]:
NanoCount --help

usage: NanoCount [-h] [--version] -i ALIGNMENT_FILE [-o COUNT_FILE]
                 [-l MIN_READ_LENGTH] [-f MIN_QUERY_FRACTION_ALIGNED]
                 [-t EQUIVALENT_THRESHOLD] [-s SCORING_VALUE]
                 [-c CONVERGENCE_TARGET] [-e MAX_EM_ROUNDS] [-x]
                 [-p PRIMARY_SCORE] [-v] [-q]

EM based transcripts abundance estimation from nanopore reads mapped to a
transcriptome with minimap2

optional arguments:
  -h, --help            show this help message and exit
  --version             show program's version number and exit

Input/Output options:
  -i ALIGNMENT_FILE, --alignment_file ALIGNMENT_FILE
                        BAM or SAM file containing aligned ONT dRNA-Seq reads
                        including secondary and supplementary alignment
                        (required) [str]
  -o COUNT_FILE, --count_file COUNT_FILE
                        Output file path where to write estimated counts (TSV
                        format) (default: None) [str]

Misc 

: 1

#### Basic command

In [4]:
NanoCount -i ./data/aligned_reads.bam -o ./output/tx_counts.tsv
head ./output/tx_counts.tsv

[01;34m## Checking options and input files ##[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[32m	Generate initial read/transcript compatibility index[0m
[01;34m## Start EM abundance estimate ##[0m
	Progress: 2.00 rounds [00:00, 14.8 rounds/s]
[32m	Exit EM loop after 2 rounds[0m
[32m	Convergence value: 0.0026556625233718663[0m
[01;34m## Summarize data ##[0m
[32m	Convert results to dataframe[0m
[32m	Compute estimated counts and TPM[0m
[32m	Write file[0m
(NanoCount) transcript_name	raw	est_count	tpm
YHR174W_mRNA	0.037525253303454735	921.3950696130275	37525.25330345474
YLR110C_mRNA	0.032926045534605486	808.466122056703	32926.04553460549
YKL060C_mRNA	0.023539952757188228	577.9999999999998	23539.952757188228
YKL152C_mRNA	0.014865195080231321	364.99999999999983	14865.19508023132
YCR012W_mRNA	0.014539382585322141	356.9999999999999	14539.38258532214
YDR050C_mRNA	0.014539382585322141	356.9999999999999	14539.38258532214
YOR369C_mR

: 1

#### Adding extra transcripts information

The `extra_tx_info` option adds a columns with the transcript lengths and also includes all the zero-coverage transcripts in the results

In [7]:
NanoCount -i ./data/aligned_reads.bam -o ./output/tx_counts.tsv --extra_tx_info
head ./output/tx_counts.tsv

[01;34m## Checking options and input files ##[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[32m	Generate initial read/transcript compatibility index[0m
[01;34m## Start EM abundance estimate ##[0m
	Progress: 2.00 rounds [00:00, 14.8 rounds/s]
[32m	Exit EM loop after 2 rounds[0m
[32m	Convergence value: 0.0026556625233718663[0m
[01;34m## Summarize data ##[0m
[32m	Convert results to dataframe[0m
[32m	Compute estimated counts and TPM[0m
[32m	Write file[0m
(NanoCount) transcript_name	raw	est_count	tpm	transcript_length
YHR174W_mRNA	0.037525253303454735	921.3950696130275	37525.25330345474	1314
YLR110C_mRNA	0.032926045534605486	808.466122056703	32926.04553460549	402
YKL060C_mRNA	0.023539952757188228	577.9999999999998	23539.952757188228	1080
YKL152C_mRNA	0.014865195080231321	364.99999999999983	14865.19508023132	744
YCR012W_mRNA	0.014539382585322141	356.9999999999999	14539.38258532214	1251
YDR050C_mRNA	0.014539382585322141	356.9

: 1

#### Relaxing the equivalence threshold

The default value is 0.9 (90% of the alignment score of the primary alignment) but this value could be lowered to allow more secondary alignments to be included in the uncertainty calculation.
Lowering the value bellow 0.75 might not be relevant and will considerably increase the computation time.

In [8]:
NanoCount -i ./data/aligned_reads.bam -o ./output/tx_counts.tsv --equivalent_threshold 0.8
head ./output/tx_counts.tsv

[01;34m## Checking options and input files ##[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[32m	Generate initial read/transcript compatibility index[0m
[01;34m## Start EM abundance estimate ##[0m
	Progress: 2.00 rounds [00:00, 14.8 rounds/s]
[32m	Exit EM loop after 2 rounds[0m
[32m	Convergence value: 0.0026556625233718663[0m
[01;34m## Summarize data ##[0m
[32m	Convert results to dataframe[0m
[32m	Compute estimated counts and TPM[0m
[32m	Write file[0m
(NanoCount) transcript_name	raw	est_count	tpm
YHR174W_mRNA	0.037525253303454735	921.3950696130275	37525.25330345474
YLR110C_mRNA	0.032926045534605486	808.466122056703	32926.04553460549
YKL060C_mRNA	0.023539952757188228	577.9999999999998	23539.952757188228
YKL152C_mRNA	0.014865195080231321	364.99999999999983	14865.19508023132
YCR012W_mRNA	0.014539382585322141	356.9999999999999	14539.38258532214
YDR050C_mRNA	0.014539382585322141	356.9999999999999	14539.38258532214
YOR369C_mR

: 1

#### verbose mode

Print additional information for QC and debugging

In [9]:
NanoCount -i ./data/aligned_reads.bam -o ./output/tx_counts.tsv --equivalent_threshold 0.8  --verbose

[01;34m## Checking options and input files ##[0m
[37m	[DEBUG]: Options summary[0m
[37m	[DEBUG]: 	Package name: NanoCount[0m
[37m	[DEBUG]: 	Package version: 0.2.2[0m
[37m	[DEBUG]: 	Timestamp: 2020-06-08 15:34:39.567165[0m
[37m	[DEBUG]: 	quiet: False[0m
[37m	[DEBUG]: 	verbose: True[0m
[37m	[DEBUG]: 	primary_score: [0m
[37m	[DEBUG]: 	extra_tx_info: False[0m
[37m	[DEBUG]: 	max_em_rounds: 100[0m
[37m	[DEBUG]: 	convergence_target: 0.005[0m
[37m	[DEBUG]: 	scoring_value: alignment_score[0m
[37m	[DEBUG]: 	equivalent_threshold: 0.8[0m
[37m	[DEBUG]: 	min_query_fraction_aligned: 0.5[0m
[37m	[DEBUG]: 	min_read_length: 50[0m
[37m	[DEBUG]: 	count_file: ./output/tx_counts.tsv[0m
[37m	[DEBUG]: 	alignment_file: ./data/aligned_reads.bam[0m
[01;34m## Initialise Nanocount ##[0m
[32m	Parse Bam file and filter low quality hits[0m
[37m	[DEBUG]: Summary of reads parsed in input bam file[0m
[37m	[DEBUG]: 	Mapped hits: 156,984[0m
[37m	[DEBUG]: 	Unmapped hits: 9,545[0m


: 1