# 0 - Set up

First, import the necessary packages from illuminator, and set the logger level to your convenience.

In [None]:
from illuminator.annotations import Annotations, ArrayType, GenomeVersion
from illuminator.samples import Samples, read_samples
from illuminator.visualizations import plot_betas, betas_dendrogram, betas_mds
from illuminator.utils import set_logger

set_logger('INFO')  # set the verbosity level, can be DEBUG, INFO, WARNING, ERROR

# 1 - Read .idat files

### 1.1 - Define and read annotation
First, define the array type of your data, and the genome version, to read the associated information files (manifest, masks, genome information...)

In [None]:
# define those parameters to match your data
array_type = ArrayType.HUMAN_EPIC_V2
genome_version = GenomeVersion.HG38

# read annotation
annos = Annotations(ArrayType.HUMAN_EPIC_V2, GenomeVersion.HG38)

### 1.2 - Read samples .idat

Now set the paths to your data, and to the sample sheet if you have one. Uncomment the version you want to use.

For more parameters and information, run `read_samples?`

In [None]:
# path to the folder containing the .idat files (they can be in sub-folders, this is the root directory)
datadir = '/home/elsa/Documents/data/methylation/EPIC/full methylome FSHD'

# Option 1 : no sample sheet provided : it will be rendered automatically
# my_samples = read_samples(datadir, annotation=annos)  

# Option 2 : with sample sheet name - works if the sample sheet is in the root directory
sample_sheet_name = 'samplesheet.csv'
my_samples = read_samples(datadir, sample_sheet_name=sample_sheet_name, annotation=annos)  

# Option 3 : with sample sheet path - if the sample sheet is located elsewhere
# sample_sheet_path = '/home/user/Documents/samplesheet.csv'
# my_samples = read_samples(datadir, sample_sheet_path=sample_sheet_path, annotation=annos)  

### 1.3 - Save illuminator samples (optional)

As reading lots of samples can take a couple of minutes, you might want to save the Samples object to a file for later use. Here is how.

In [None]:
my_samples.save('raw_samples')

### 1.4 - Load illuminator samples (optional)

Now for later use, here is how to load samples :

In [None]:
my_samples = Samples.load('raw_samples')

# 2 - Calculate and Plot Beta values

Once your samples are loaded, you can already calculate and plot the beta values of the raw data :

In [None]:
my_samples.calculate_betas()
plot_betas(my_samples)

# 3 - Preprocessing

Here is the usual preprocessing pipeline for human samples. Note that each step modifies the Samples object directly, so it's useful to save the raw samples first (step 1.3) if you want to try different preprocessing methods.

In [None]:
my_samples.apply_quality_mask()
my_samples.infer_type1_channel()
my_samples.dye_bias_correction_nl()
my_samples.poobah()
my_samples.noob_background_correction()
my_samples.calculate_betas()
# my_samples.save('preprocessed_samples')  

Now let's see what preprocessing changed in our beta values !

In [None]:
my_samples.calculate_betas()
plot_betas(my_samples)

# 4 - Data insights

In [None]:
betas_mds(my_samples)

In [None]:
betas_dendrogram(my_samples.betas())