# Postprocessing Template (Tutorial)

Before running this script, extract the example data into the `data/raw` folder in the analysis directory of this repository.

In [None]:
import sys
sys.path.append("..") # Allow imports from project directory
print(sys.executable) # Verify conda environment is active
print(sys.version)

# Load postprocessing modules
from src import data_access, pipelines, reporting, utils

## Configuration

Configuration variables are set in the file: `config.yml`.

Here, the processed data and results will be output into the `template_example` folder.

In [None]:
cfg = utils.load_config("config.yml", print_config = True)

## Initialize directories

Ensure each folder is created to store the processed data and results.

In [None]:
utils.init_directories(cfg)

## Check raw data

First we can check for the valid files in the raw data directory

In [None]:
raw_data_check = data_access.check_files_in_directory(cfg)
raw_data_check

We can automatically flag any records that are not within our configured length or sample rate.

In [None]:
data_access.check_records_ts(raw_data_check, cfg)

The configuration file has set the `minimum_record_length` value to 6000 seconds, so in this example, we can exclude W004 from the analysis.

In [None]:
exclude_raw_files = ['W004']
raw_data_check.drop(exclude_raw_files, inplace=True, errors='ignore')
raw_data_check

## Read raw data

Now all of the raw data can be read and combined into a single dataframe.

In [None]:
raw_data_full = data_access.get_raw_watch_data(raw_data_check, cfg, save_data=True)

In [None]:
raw_data_full.head()

### Trim raw data to time period

Optionally, data outside of a specified time window can be removed from the dataframe.

In [None]:
raw_data_trimmed = data_access.trim_raw_watch_data(raw_data_full, cfg, save_data=True)

In [None]:
raw_data_trimmed

## Visualization

In [None]:
reporting.plot_raw_data(raw_data_trimmed, cfg)

TODO

In [None]:
# Plot heartRate by default
reporting.plot_raw_individual_watches(raw_data_trimmed, cfg)

In [None]:
# Plot ppgRaw
reporting.plot_raw_individual_watches(raw_data_trimmed, cfg, value='ppgRaw')

## Heart rate processing

### Resampling
The raw data must be resampled at a constant rate for all watches.

In [None]:
resampled_data_HR_1000ms = pipelines.resample_HR(raw_data_trimmed, cfg, save_data=True)

In [None]:
resampled_data_HR_1000ms

## PPG processing

In [None]:
raw_data_trimmed.reset_index(inplace=True) # TODO: Fix in resample_PPG()
resampled_data_PPG_25ms = pipelines.resample_PPG(
    raw_data_trimmed, cfg, save_data=True
)

In [None]:
resampled_data_PPG_25ms

## Peak finding

In [None]:
PPG_peaks = pipelines.PPG_find_peaks(resampled_data_PPG_25ms, cfg)

In [None]:
PPG_peaks

# More

## Read processed data