# Meth_Comp API usage

## Import module

In [2]:
# Import main module 
from pycoMeth.Meth_Comp import Meth_Comp

# optionally inport jupyter helper functions
from pycoMeth.common import head, jhelp

## Getting help

In [3]:
jhelp(Meth_Comp)

**Meth_Comp** (aggregate_fn_list, ref_fasta_fn, output_tsv_fn, output_bed_fn, max_missing, min_diff_llr, sample_id_list, pvalue_adj_method, pvalue_threshold, verbose, quiet, progress, kwargs)

Compare methylation values for each CpG positions or intervals between n samples and perform a statistical test to evaluate if the positions are significantly different. For 2 samples a Mann_Withney test is performed otherwise multiples samples are compared with a Kruskal Wallis test. pValues are adjusted for multiple tests using the Benjamini & Hochberg procedure for controlling the false discovery rate.

---

* **aggregate_fn_list** (required) [list(str)]

A list of output tsv files corresponding to several samples to compare generated by either CpG_Aggregate or Interval_Aggregate. (can be gzipped)

* **ref_fasta_fn** (required) [str]

Reference file used for alignment in Fasta format (ideally already indexed with samtools faidx)

* **output_tsv_fn** (default: None) [str]

Path to write an more extensive result report in TSV format (At least 1 output file is required) (can be gzipped)

* **output_bed_fn** (default: None) [str]

Path to write a summary result file in BED format (At least 1 output file is required) (can be gzipped)

* **max_missing** (default: 0) [int]

Max number of missing samples to perform the test

* **min_diff_llr** (default: 2) [float]

Minimal llr boundary for negative and positive median llr. The test if only performed if at least one sample has a median llr above (methylated) and 1 sample has a median llr below (unmethylated)

* **sample_id_list** (default: None) [list(str)]

list of sample ids to annotate results in tsv file

* **pvalue_adj_method** (default: fdr_bh) [str]

Method to use for pValue multiple test adjustment

* **pvalue_threshold** (default: 0.01) [float]

Alpha parameter (family-wise error rate) for pValue adjustment

* **verbose** (default: False) [bool]

* **quiet** (default: False) [bool]

* **progress** (default: False) [bool]

* **kwargs**



## Example usage

#### Usage with CpG Aggregate output

In [38]:
ff = Meth_Comp (
    aggregate_fn_list=[
        "./data/Yeast_sample_1_CpG.tsv.gz", 
        "./data/Yeast_sample_2_CpG.tsv.gz", 
        "./data/Yeast_sample_3_CpG.tsv.gz", 
        "./data/Yeast_sample_4_CpG.tsv.gz"],
    ref_fasta_fn="./data/yeast.fa",
    output_bed_fn="./results/CpG_Yeast.bed",
    output_tsv_fn="./results/CpG_Yeast.tsv.gz",
    sample_id_list=["S1","S2","S3","S4"],
    max_missing = 1,
    min_diff_llr = 0,
    progress=True)

head("./results/CpG_Yeast.tsv.gz")
head("./results/CpG_Yeast.bed")

## Checking options and input files ##
## Parsing files ##
	Reading input files header and checking consistancy between headers
	Starting asynchronous file parsing
37.1M bytes [00:17, 2.09M bytes/s]                       
	Adjust pvalues
	Writing output file
100%|██████████| 19.2k/19.2k [00:01<00:00, 11.8k sites/s]
	Results summary
		Sites with insufficient samples: 121,015
		Sites with insufficient effect size: 104,566
		Valid sites: 19,234
		Sites with non-significant adjusted pvalue: 19,234
		Sites with non-significant pvalue: 19,229
		Sites with significant pvalue: 5


chromosome start end  n_samples pvalue              adj_pvalue          neg_med pos_med ambiguous_med labels           med_llr_list         raw_llr_list                                                          
I          542   543  3         0.18009231214795254 0.48089622821792577 2       1       0             ["S1","S2","S4"] [2.365,-2.8,-1.67]   [[2.14,2.59],[-2.03,-3.57],[0.47,-3.81]]                              
I          1755  1756 3         0.5020322546363214  0.6505901437764703  2       1       0             ["S1","S2","S4"] [0.56,-2.515,-1.76]  [[0.56,0.85,-0.96,-6.89,1.52],[-4.2,-0.83],[-1.76,-2.91,-0.04]]       
I          2563  2564 3         0.599677279263108   0.7093599501443185  2       1       0             ["S1","S2","S4"] [0.385,-1.875,-0.93] [[2.01,0.75,-5.17,0.02],[2.53,-6.28],[-1.09,-0.28,-1.18,-0.34,-0.93]] 
I          3148  3149 3         0.133468619474501   0.48089622821792577 2       1       0             ["S1","S3","S4"] [-1.3,0.14,-2.13]    [[-1.3,-3.75,-0.

#### Usage with Interval Aggregate output

In [40]:
ff = Meth_Comp (
    aggregate_fn_list=[
        "./data/Medaka_sample_1_CGI.tsv.gz", 
        "./data/Medaka_sample_2_CGI.tsv.gz", 
        "./data/Medaka_sample_3_CGI.tsv.gz", 
        "./data/Medaka_sample_4_CGI.tsv.gz"],
    ref_fasta_fn="./data/medaka.fa",
    output_bed_fn="./results/CGI_Medaka.bed",
    output_tsv_fn="./results/CGI_Medaka.tsv.gz",
    sample_id_list=["S1","S2","S3","S4"],
    max_missing = 1,
    min_diff_llr = 1,
    progress=True)

head("./results/CGI_Medaka.tsv.gz")
head("./results/CGI_Medaka.bed")

## Checking options and input files ##
## Parsing files ##
	Reading input files header and checking consistancy between headers
	Starting asynchronous file parsing
91.7M bytes [00:13, 6.80M bytes/s]                       
	Adjust pvalues
	Writing output file
100%|██████████| 3.48k/3.48k [00:00<00:00, 6.70k sites/s]
	Results summary
		Sites with insufficient effect size: 240,179
		Sites with insufficient samples: 16,759
		Valid sites: 3,484
		Sites with non-significant adjusted pvalue: 2,115
		Sites with non-significant pvalue: 1,829
		Sites with significant pvalue: 1,655
		Sites with significant adjusted pvalue: 1,369


chromosome start   end     n_samples pvalue                 adj_pvalue             neg_med pos_med ambiguous_med labels                med_llr_list                 raw_llr_list                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        