# Meth_Comp API usage

## Import module

In [1]:
# Import main module 
from pycoMeth.Meth_Comp import Meth_Comp

# optionally inport jupyter helper functions
from pycoMeth.common import head, jhelp

## Getting help

In [23]:
jhelp(Meth_Comp)

**Meth_Comp** (aggregate_fn_list, ref_fasta_fn, output_tsv_fn, output_bed_fn, max_missing, min_diff_llr, sample_id, verbose, quiet, progress, kwargs)

Compare methylation values for each CpG positions or intervals between n samples and perform a statistical test to evaluate if the positions are significantly different. For 2 samples a Mann_Withney test is performed otherwise multiples samples are compared with a Kruskal Wallis test. pValues are adjusted for multiple tests using the Benjamini & Hochberg procedure for controlling the false discovery rate.

---

* **aggregate_fn_list** (required) [list(str)]

A list of output tsv files corresponding to several samples to compare generated by either CpG_Aggregate or Interval_Aggregate.

* **ref_fasta_fn** (required) [str]

Reference file used for alignment in Fasta format (ideally already indexed with samtools faidx)

* **output_tsv_fn** (default: None) [str]

Path to write an more extensive result report in TSV format (At least 1 output file is required)

* **output_bed_fn** (default: None) [str]

Path to write a summary result file in BED format (At least 1 output file is required)

* **max_missing** (default: 0) [int]

Max number of missing samples to perform the test

* **min_diff_llr** (default: 2) [int]

Minimal llr boundary for negative and positive median llr. The test if only performed if at least one sample has a median llr above (methylated) and 1 sample has a median llr below (unmethylated)

* **sample_id** (default: "") [str]

Sample ID to be used for the BED track header

* **verbose** (default: False) [bool]

* **quiet** (default: False) [bool]

* **progress** (default: False) [bool]

* **kwargs**



## Example usage

#### Usage with CpG Aggregate output

In [4]:
ff = Meth_Comp (
    aggregate_fn_list=[
        "./data/Yeast_sample_1_CpG.tsv.gz", 
        "./data/Yeast_sample_2_CpG.tsv.gz", 
        "./data/Yeast_sample_3_CpG.tsv.gz", 
        "./data/Yeast_sample_4_CpG.tsv.gz"],
    ref_fasta_fn="./data/yeast.fa",
    output_bed_fn="./results/CpG_Yeast.bed",
    output_tsv_fn="./results/CpG_Yeast.tsv.gz",
    sample_id_list=["S1","S2","S3","S4"],
    max_missing = 1,
    min_diff_llr = 1,
    progress=True)

head("./results/CpG_Yeast.tsv.gz")
head("./results/CpG_Yeast.bed")

## Checking options and input files ##
## Parsing files ##
	Reading input files header and checking consistancy between headers
	Starting asynchronous file parsing
37.1M bytes [00:10, 3.64M bytes/s]                       
	Adjust pvalues
	Writing output file
100%|██████████| 4.78k/4.78k [00:00<00:00, 16.4k sites/s]
	Results summary
		Sites with insufficient samples: 121,015
		Valid sites: 4,779
		Sites with insufficient effect size: 119,021
		Sites with significant pvalue (< 0.01): 219


chromosome start end   n_samples pvalue              statistic          adj_pvalue          neg_med pos_med ambiguous_med labels           med_llr_list         raw_llr_list                                                 
I          542   543   3         0.18009231214795254 3.428571428571427  0.3730650887538211  2       1       0             ["S1","S2","S4"] [2.365,-2.8,-1.67]   [[2.14,2.59],[-2.03,-3.57],[0.47,-3.81]]                     
I          3367  3368  3         0.133468619474501   4.027777777777779  0.3730650887538211  1       1       1             ["S1","S3","S4"] [-1.22,1.425,-0.15]  [[-1.22,0.38,-1.79],[0.43,2.42],[0.06,-0.15,-1.75]]          
I          3516  3517  3         0.20816079512556984 3.1388888888888893 0.3895932997767732  2       1       0             ["S1","S3","S4"] [-1.7,1.56,-4.52]    [[-2.26,-1.7,3.09],[2.54,0.58],[-1.06,-4.52,-7.95]]          
I          3674  3675  3         0.23587708298570004 2.8888888888888893 0.40783523139966066 2       1       0   

#### Usage with Interval Aggregate output

In [7]:
ff = Meth_Comp (
    aggregate_fn_list=[
        "./data/Medaka_sample_1_CGI.tsv.gz", 
        "./data/Medaka_sample_2_CGI.tsv.gz", 
        "./data/Medaka_sample_3_CGI.tsv.gz", 
        "./data/Medaka_sample_4_CGI.tsv.gz"],
    ref_fasta_fn="./data/medaka.fa",
    output_bed_fn="./results/CGI_Medaka.bed",
    output_tsv_fn="./results/CGI_Medaka.tsv.gz",
    sample_id_list=["S1","S2","S3","S4"],
    max_missing = 1,
    min_diff_llr = 2,
    progress=True)

head("./results/CGI_Medaka.tsv.gz")
head("./results/CGI_Medaka.bed")

## Checking options and input files ##
## Parsing files ##
	Reading input files header and checking consistancy between headers
	Starting asynchronous file parsing
91.7M bytes [00:11, 7.69M bytes/s]                       
	Adjust pvalues
	Writing output file
100%|██████████| 706/706 [00:00<00:00, 9.11k sites/s]
	Results summary
		Sites with insufficient samples: 16,759
		Sites with insufficient effect size: 242,957
		Valid sites: 706
		Sites with significant pvalue (< 0.01): 659
		Sites with significant FDR adj pvalue (< 0.01): 656


chromosome start   end     n_samples pvalue                 statistic          adj_pvalue             neg_med pos_med ambiguous_med labels                med_llr_list                 raw_llr_list                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     