# Interval_Aggregate API usage

## Import module

In [2]:
# Import main module 
from pycoMeth.Interval_Aggregate import Interval_Aggregate

# optionally inport jupyter helper functions
from pycoMeth.common import head, jhelp
import sys

## Getting help

In [3]:
jhelp (Interval_Aggregate)

**Interval_Aggregate** (cpg_aggregate_fn, ref_fasta_fn, interval_bed_fn, output_bed_fn, output_tsv_fn, interval_size, min_cpg_per_interval, sample_id, min_llr, verbose, quiet, progress, kwargs)

Aggregate data for all CpG falling in each intervals. Either define interval if a bed file is provided or sliding window

---

* **cpg_aggregate_fn** (required) [str]

Output tsv file generated by CpG_Aggregate

* **ref_fasta_fn** (required) [str]

Reference file used for alignment in Fasta format (ideally already indexed with samtools faidx)

* **interval_bed_fn** (default: None) [str]

SORTED bed file containing annotations to look use as interval in which CpG data are aggregated (Optional)

* **output_bed_fn** (default: None) [str]

Path to write a summary result file in BED format (At least 1 output file is requires in CLI mode)

* **output_tsv_fn** (default: None) [str]

Path to write an more extensive result report in TSV format (At least 1 output file is requires in CLI mode)

* **interval_size** (default: 1000) [int]

Size of the sliding window in which to aggregate CpG sites data from if no BED file is provided

* **min_cpg_per_interval** (default: 5) [int]

Minimal number of CpG sites per interval.

* **sample_id** (default: "") [str]

Sample ID to be used for the BED track header

* **min_llr** (default: 2) [float]

Minimal log likelyhood ratio to consider a site significantly methylated or unmethylated in output BED file

* **verbose** (default: False) [bool]

* **quiet** (default: False) [bool]

* **progress** (default: False) [bool]

* **kwargs**



## Example usage

### Default usage with sliding windows

In [3]:
ff = Interval_Aggregate (
    cpg_aggregate_fn="./data/CpG_Aggregate_sample_1.tsv",
    ref_fasta_fn="./data/ref.fa",
    output_bed_fn="./results/Interval_Aggregate_sample_1.bed",
    output_tsv_fn="./results/Interval_Aggregate_sample_1.tsv",
    interval_size=500,
    min_cpg_per_interval=3,
    sample_id="sample_1",
    progress=True)

head("./results/Interval_Aggregate_sample_1.tsv")
head("./results/Interval_Aggregate_sample_1.bed")

## Checking options and input files ##
## Parsing CpG_aggregate file ##
	Input file: 100%|██████████| 5.82M/5.82M [00:02<00:00, 2.69M bytes/s]
	Results summary
		Lines parsed: 89,392
		Total number of intervals: 24,319
		Empty intervals skipped: 14,390
		Valid intervals written: 9,195
		Low CpG intervals skipped: 734


chromosome start end  num_motifs median_llr llr_list                                                                                       
I          500   1000 12         -3.35      [-1.14,-3.54,-7.24,-4.3,0.56,-0.65,-4.37,-3.78,-0.27,-1.32,-3.35]                              
I          1000  1500 22         -3.65      [-2.48,-5.035,-4.16,-3.315,-3.295,-1.69,-9.885,-7.95,-3.65,-3.0,-2.83,-8.36,-8.56]             
I          1500  2000 19         -3.4       [-5.71,-6.05,-0.925,-7.165,-3.975,0.56,-1.78,-1.86,-3.67,-3.4,-5.53,-1.06,-1.79,-1.94,-6.22]   
I          2000  2500 15         -4.272     [-5.24,-3.07,-4.33,-19.055,-7.55,-1.255,-2.565,-4.215,-5.515,-3.48]                            
I          2500  3000 19         -2.5       [-0.705,0.385,-6.685,-10.175,-4.27,-2.3,-2.5,-2.52,-1.425,-4.255,-2.73,0.35,-9.31,-1.05,-0.99] 
I          3000  3500 9          -1.4       [-2.34,-2.19,-1.4,-1.0,0.29,-1.3,-6.9,-1.22,-1.92]                                             
I          3500  400

### Usage with a CpG Islands annotation Bed file

In [4]:
ff = Interval_Aggregate (
    cpg_aggregate_fn="./data/CpG_Aggregate_sample_1.tsv",
    ref_fasta_fn="./data/ref.fa",
    interval_bed_fn="./data/Yeast_CGI.bed",
    output_bed_fn="./results/CGI_Aggregate_sample_1.bed",
    output_tsv_fn="./results/CGI_Aggregate_sample_1.tsv",
    sample_id="sample_1",
    min_cpg_per_interval=1,
    progress=True)

head("./results/CGI_Aggregate_sample_1.tsv")
head("./results/CGI_Aggregate_sample_1.bed")

## Checking options and input files ##
## Parsing CpG_aggregate file ##
	Input file: 100%|█████████▉| 5.81M/5.82M [00:00<00:00, 6.16M bytes/s]
	Results summary
		Lines parsed: 89,235
		Total number of intervals: 2,041
		Empty intervals skipped: 1,323
		Valid intervals written: 718


chromosome start end   num_motifs median_llr llr_list                                                                                                                
I          1804  2170  14         -3.67      [-3.67,-3.4,-5.53,-1.06,-1.79,-1.94,-6.22,-5.24,-3.07,-4.33,-19.055]                                                    
I          31835 32949 10         -5.65      [-2.925,-6.055,-1.785,-5.65,-6.83,-1.695,-12.32]                                                                        
I          33497 34371 19         -3.295     [-4.38,-3.32,-1.29,-3.27,-5.89,-8.96,-6.88,-2.22,-3.605,-1.07,-6.465,-1.735,0.15,-0.96]                                 
I          44730 44988 9          -3.2       [-2.37,-4.9,-1.63,-1.69,-8.09,-4.03]                                                                                    
I          47889 48187 13         -4.55      [-4.55,-9.41,-3.37,-4.66,-3.24,-4.66,-4.535]                                                                            
I   

### Example with multiple files

In [3]:
for i in range (1, 5):
    sys.stdout.write(f"##### SAMPLE {i} #####")
    sys.stdout.flush()
    ff = Interval_Aggregate (
        cpg_aggregate_fn=f"./data/CpG_Aggregate_sample_{i}.tsv",
        ref_fasta_fn="./data/ref.fa",
        output_bed_fn=f"./results/Interval_Aggregate_sample_{i}.bed",
        output_tsv_fn=f"./results/Interval_Aggregate_sample_{i}.tsv",
        sample_id=f"sample_{i}",
        interval_size=500,
        min_cpg_per_interval=3,
        min_llr=1,
        quiet=True)

##### SAMPLE 1 #####

## Checking options and input files ##
## Parsing CpG_aggregate file ##


##### SAMPLE 2 #####

## Checking options and input files ##
## Parsing CpG_aggregate file ##


##### SAMPLE 3 #####

## Checking options and input files ##
## Parsing CpG_aggregate file ##


##### SAMPLE 4 #####

## Checking options and input files ##
## Parsing CpG_aggregate file ##
