# About
Use this notebook to extract ECG data from ASCERTAIN_Raw dataset. ASCERTAIN has 58 participants watching 36 movieclips. This notebook will generate one CSV for each Person-MovieClip combo named like so:
    person_#_clip_#.csv

The CSV will have 3 columns:
    TIMESTAMP, CHANNEL1, CHANNEL2

ASCERTAIN provides a start time for each ECG, so the timestamp is the actual time for each sample taken.

NOTE: This will generate 3.2GB of CSV files.

# Setup
Before using this notebook, you must obtain the ASCERTAIN Dataset and extract the contents of ASCERTAIN_Raw/ECGData.zip. Set PATH_TO_ASCERTAIN_RAW_ECGDATA in the first cell to path where you extracted these files.

In [10]:
import numpy as np
import scipy.io
from datetime import datetime, timedelta

# Path to the extracted contents of ASCERTAIN_Raw's ECGData.zip
PATH_TO_ASCERTAIN_RAW_ECG='/home/timcsf/AER_Datasets/ascertain/ASCERTAIN_Raw/ecg'

# Path to write CSV files for each ECG
PATH_TO_OUTPUT_ECGCSV='./processed'

PARTICIPANT_COUNT=58
MOVIECLIP_COUNT=36

In [11]:
def convert_to_epoch(timestamp, starttime):
    ts = starttime + timedelta(milliseconds=timestamp);
    return ts.timestamp();

def process(matlab_file, person, clip):
    starttime_arr = matlab_file['timeECG'][0];
    starttime = datetime(int(starttime_arr[0]), int(starttime_arr[1]), int(starttime_arr[2]), int(starttime_arr[3]),
                         int(starttime_arr[4]), int(int(starttime_arr[5])), int(1000 * (starttime_arr[5] % 1)))

    # Get a vectorized lambda so we can apply this over an array ...
    timeconverter = np.vectorize(lambda ts: convert_to_epoch(ts, starttime));

    ecg_data = matlab_file['Data_ECG']

    leftArmIdx = 1 if (len(ecg_data[0]) < 6) else 4;
    rightArmIdx = 2 if (len(ecg_data[0]) < 6) else 5;
    ecg = ecg_data[:, [0, leftArmIdx, rightArmIdx]];
    ts = np.apply_along_axis(timeconverter, 0, ecg[:, 0]).reshape(-1, 1);

    result = np.append(ts, ecg[:, [1, 2]], 1)

    np.savetxt(f'{PATH_TO_OUTPUT_ECGCSV}/person_{person}_clip_{clip}.csv', result, delimiter=',')
    return;


In [12]:
for person in range(1, PARTICIPANT_COUNT+1):
    for clip in range(1, MOVIECLIP_COUNT+1):
        p = str(person).rjust(2, '0');
        print(f'Processing person {p}, clip {clip}');
        matfile_path = f'{PATH_TO_ASCERTAIN_RAW_ECG}/ECGData/Movie_P{p}/ECG_Clip{clip}.mat';
        matfile = scipy.io.loadmat(matfile_path)
        process(matfile, person, clip);

Processing person 01, clip 1
Processing person 01, clip 2
Processing person 01, clip 3
Processing person 01, clip 4
Processing person 01, clip 5
Processing person 01, clip 6
Processing person 01, clip 7
Processing person 01, clip 8
Processing person 01, clip 9
Processing person 01, clip 10
Processing person 01, clip 11
Processing person 01, clip 12
Processing person 01, clip 13
Processing person 01, clip 14
Processing person 01, clip 15
Processing person 01, clip 16
Processing person 01, clip 17
Processing person 01, clip 18
Processing person 01, clip 19
Processing person 01, clip 20
Processing person 01, clip 21
Processing person 01, clip 22
Processing person 01, clip 23
Processing person 01, clip 24
Processing person 01, clip 25
Processing person 01, clip 26
Processing person 01, clip 27
Processing person 01, clip 28
Processing person 01, clip 29
Processing person 01, clip 30
Processing person 01, clip 31
Processing person 01, clip 32
Processing person 01, clip 33
Processing person 0