# Getting started notebook for Naturalistic Data

Implementing dbs-in-the-wild Repo and its functionality to load and access naturalistic EMA and ACC data

## 0. Import packages


In [None]:
# import packages
import pandas as pd
import numpy as np
import sys
import importlib
import matplotlib.pyplot as plt


In [None]:
# document your own versions for reproducibility
print('Python sys', sys.version)
print('pandas', pd.__version__)
print('numpy', np.__version__)
# print('mne_bids', mne_bids.__version__)
# print('mne', mne.__version__)
# print('sci-py', scipy.__version__)
# print('sci-kit learn', sk.__version__)
# print('matplotlib', plt_version)

"""
Python sys 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]
pandas 2.1.1
numpy 1.26.0

from 16.09

Python sys 3.11.5 | packaged by Anaconda, Inc. | (main, Sep 11 2023, 13:26:23) [MSC v.1916 64 bit (AMD64)]
pandas 2.3.2
numpy 2.3.3
"""

Import custom functions

read the README of dbs_in_the_wild to `pip install -e dbs_home`


In [None]:
import dbs_home
from dbs_home.load_raw.main_load_raw import loadSubject 
import dbs_home.utils.helpers as home_helpers
import dbs_home.utils.ema_utils as home_ema_utils
import dbs_home.plot_data.plot_compliance as plot_home_compl

In [None]:
# from current repo
from utils import load_utils, load_data, prep_data
from plotting import plot_help



## 1. Explore available Home-Data



Import EMA home data

In [None]:
MOMENTS = ['pre-op', 'pre 3MFU', 'post 3MFU']

sub_skip = [] # ['hm25',]  # skip full subject
# skip per session
ses_skip = [['hm20', 'ses03'],]  # data not available yet
# ses_skip = [['hm14', 'ses03']]

In [None]:
sessions_include = {m: {} for m in MOMENTS}

for rec_moment in MOMENTS:

    sel_info = home_helpers.select_sessions(target_session=rec_moment)
    sel_info = sel_info.set_index(sel_info['study_id'],)
    sel_sessions = {sub: ses for sub, ses in sel_info[['study_id', 'Session']].values}

    for key, val in sel_sessions.items():
        sessions_include[rec_moment][key] = val


In [None]:
print(sessions_include.keys())
print(sessions_include)

## 2. Explore naturalistic EMAs

NOTE: loadSubject takes argument whether to load ACC and load EMA

Preprocess EMA

- merge scores (done)
- invert negative-items (higher = clinically better) (done)
- mean-correct EMA (for now start with absolute values)
    - test different normalizations:
        - normalize with grand-mean per sub
        - normalize with session mean

load EMA data

In [None]:
importlib.reload(load_data)
importlib.reload(prep_data)


data = {m: {} for m in MOMENTS}

for rec_moment, sub_sess in sessions_include.items():
    # rec_moment contains 'pre-op', or 'pre 3MFU', 'post 3MFU', etc
    for sub, ses in sub_sess.items():

        if sub in sub_skip: continue
                    
        if [sub, ses] in ses_skip: continue
                
        ses_class = loadSubject(
            sub=sub,
            ses=ses,
            incl_EMA=True,
            incl_ACC=False,
        )
        temp_df = home_ema_utils.load_ema_df(sub_ses_class=ses_class)
        # prepare
        temp_df = prep_data.prepare_ema_df(temp_df, ADD_MEANMOVE=True, INVERT_NEG_ITEMS=False,)

        data[rec_moment][sub] = temp_df


correct data

In [None]:
importlib.reload(prep_data)

allsubs = []
for mom in list(data.keys()): allsubs.extend(list(data[mom].keys()))
allsubs = np.unique(allsubs)


corr_data = {m: {} for m in MOMENTS}

for sub in allsubs:

    subdf = prep_data.merge_sub_ema_df(datadict=data, sub=sub)
    subdf = prep_data.mean_correct_ema_df(subdf)

    # split and palce back as moment dfs    
    for moment in MOMENTS:
        corr_data[moment][sub] = subdf[subdf['moment'] == moment].reset_index(drop=True)



In [None]:
importlib.reload(home_helpers)

data = {}

# Define pre-operative sessions
sel_info = home_helpers.select_sessions()
sel_info = sel_info.set_index(sel_info['study_id'],)
sel_sessions = {sub: ses for sub, ses in sel_info[['study_id', 'Session']].values}
print(sel_sessions)


for sub, ses in sel_sessions.items():

    data[sub] = loadSubject(
        sub=sub,
        ses=ses,
        incl_EMA=True,
        incl_ACC=False,
    )



## 3. Explore naturalistic ACC

include loading option for ACC only for EMA windows, store these selected windows separately, to prevent loading of full acc data

Load ACC data, create SVM and filtered data wihtin the dataclass

In [None]:
# import naturalistic data via dbs_home repo

sub_id = 'hm24'
ses_id = 'ses01'

dev_day_selection = ['2025-07-17', '2025-07-18']

home_dat = loadSubject(
    sub=sub_id,
    ses=ses_id,
    incl_STEPS=False,
    incl_EPHYS=False,
    incl_EMA=True,
    incl_ACC=True,
    day_selection=dev_day_selection
)

Check available EMAs

In [None]:

print(home_dat.ses_firstday, home_dat.ses_lastday)
print(home_dat.watch_days)

In [None]:
plot_home_compl.plot_EMA_completion_perSession(home_dat)

Get Acc-Windows aligned to EMAs

In [None]:
from dbs_home.preprocessing import acc_preprocessing as acc_prep

In [None]:

# create dict with ACC data per EMA window

acc_ema_combis = {}

for i_day, str_day in enumerate(home_dat.watch_days):
    # define current day
    print(str_day)
    # check default parameters here
    acc_ema_combis[str_day] = acc_prep.get_day_EMA_AccWindows(
        subSesClass=home_dat, str_day=str_day,
    )
    


        

In [None]:
# check dict by plotting all windows for one day in same plot

fig, ax = plt.subplots(1, 1)

ax_ema = ax.twinx()

for ema, win_times, win_svm in zip(
    acc_ema_combis['2025-07-17']['ema'],
    acc_ema_combis['2025-07-17']['acc_times'],
    acc_ema_combis['2025-07-17']['acc_svm']
):
    ax.plot(win_times, win_svm)
    
    # scatter "move well"-value
    ax_ema.scatter(win_times[0], ema['Q6'], color='gray', s=50, alpha=.5,)

ax.set_ylabel('ACC SVM (vector-g)')
ax_ema.set_ylabel('EMA answer (Likert-scale)')

plt.show()

#### Get submovement timings and indices

will be added soon