In [1]:
import pandas as pd
import numpy as np

In [2]:
data_path = '/scratch/c.c21013066/data/ukbiobank'

In [9]:
features = pd.read_csv(f'{data_path}/phenotypes/accelerometer/matched_all_HCnoOsteo.csv').columns

In [4]:
classes = ['sleep','light','sedentary','MVPA','imputed']
features = np.hstack(['covered_days','complete_days_starting_10h','complete_days_starting_0h','complete_days_starting_7h', [f'mean_{cl}_hours_perday' for cl in classes],
                  [f'std_{cl}_hours_perday' for cl in classes],
                  [f'mean_{cl}_hours_per24h' for cl in classes],
                  [f'std_{cl}_hours_per24h' for cl in classes],
                  [f'mean_movement_during_{cl}' for cl in classes],
                  [f'std_movement_during_{cl}' for cl in classes],
                  [f'mean_max_{cl}_hours_consecutive_perday' for cl in classes],
                  [f'mean_max_{cl}_hours_consecutive_per24h' for cl in classes],
                  [f'max_{cl}_hours_consecutive' for cl in classes],
                  [f'mean_N_{cl}_intervals_per24h' for cl in classes],
                  [f'mean_N_{cl}_intervals_perday' for cl in classes],
                  [f'mean_N_{cl}_intervals_22-10' for cl in classes],
                  [f'mean_N_{cl}_intervals_10-22' for cl in classes],
                  [f'mean_N_{cl}_intervals_07-23' for cl in classes],
                  [f'mean_N_{cl}_intervals_23-07' for cl in classes]])

In [5]:
table = pd.DataFrame(columns=['name','description'],index = features)

In [6]:
for activity in ['sleep','MVPA','light','sedentary','imputed']:
    table[activity] = table.index.str.contains(activity)

In [3]:
dicto = {
        'mean_([\w]+)_hours_per24h':'for each 24h cycle (starting 10am) get all epochs of that day and count how many epochs labelled as activity and convert to hours, get mean over days',
        'std_([\w]+)_hours_per24h':'for each 24h cycle (starting 10am)  get all epochs of that day and count how many epochs labelled as activity and convert to hours, get std over days',
        'mean_movement_during_([\w]+)':'get all epochs classified as activity and compute mean of acceleration',
        'std_movement_during_([\w]+)':'get all epochs classified as activity and compute std of acceleration',
        'mean_max_([\w]+)_hours_consecutive_per24h': 'identify uninterrupted sequences of activity, for each 24h cycle (starting 10am) get maximum of uninterrupted sequence length in hours, compute mean over cycles',
        'max_([\w]+)_hours_consecutive': 'get overall longest sequence of uninterrupted activity in hours',
        'mean_N_([\w]+)_intervals_per24h':'identify when a new sequence of activity starts, for each 24h cycle (starting 10am) count how often a new sequence of activity starts, compute mean over cycles',
        'mean_N_([\w]+)_intervals_10-22': 'identify when a new sequence of activity starts, for each 12h cycle (starting 10am till 10pm) count how often a new sequence of activity starts, compute mean over cycles',
        'mean_N_([\w]+)_intervals_22-10': 'identify when a new sequence of activity starts, for each 12h cycle (starting 10pm till 10am) count how often a new sequence of activity starts, compute mean over cycles',
        'mean_N_([\w]+)_intervals_07-23': 'identify when a new sequence of activity starts, for each 8h cycle (here combining two 8h intervals starting 7am till 11pm) count how often a new sequence of activity starts, compute mean over cycles',
        'mean_N_([\w]+)_intervals_23-07': 'identify when a new sequence of activity starts, for each 8h cycle (starting 11pm till 8am) count how often a new sequence of activity starts, compute mean over cycles'
        }

In [7]:
for key in dicto:
    print(key)
    table.loc[table.index.str.match(key),'description'] = dicto[key]

mean_([\w]+)_hours_per24h
std_([\w]+)_hours_per24h
mean_movement_during_([\w]+)
std_movement_during_([\w]+)
mean_max_([\w]+)_hours_consecutive_per24h
max_([\w]+)_hours_consecutive
mean_N_([\w]+)_intervals_per24h
mean_N_([\w]+)_intervals_10-22
mean_N_([\w]+)_intervals_22-10
mean_N_([\w]+)_intervals_07-23
mean_N_([\w]+)_intervals_23-07


In [78]:
name_dict = {'mean_sleep_hours_per24h':'mean sleep [h]','std_movement_during_sleep':'std movement during sleep [milligal]',
                        'mean_max_sleep_hours_consecutive_per24h':'mean maximum consecutive sleep [h]',
            'mean_N_sleep_intervals_22-10':'mean wake-ups nighttime [N]',
                           'mean_N_sleep_intervals_10-22':'mean naps daytime [N]'}
for key in name_dict:
    print(key)
    table.loc[key,'name'] = name_dict[key]

mean_sleep_hours_per24h
std_movement_during_sleep
mean_max_sleep_hours_consecutive_per24h
mean_N_sleep_intervals_22-10
mean_N_sleep_intervals_10-22


In [80]:
table.to_csv('/scratch/c.c21013066/data/ukbiobank/phenotypes/accelerometer/derived_features_description.csv')

In [8]:
table

Unnamed: 0,name,description,sleep,MVPA,light,sedentary,imputed
covered_days,,,False,False,False,False,False
complete_days_starting_10h,,,False,False,False,False,False
complete_days_starting_0h,,,False,False,False,False,False
complete_days_starting_7h,,,False,False,False,False,False
mean_sleep_hours_perday,,,True,False,False,False,False
...,...,...,...,...,...,...,...
mean_N_sleep_intervals_23-07,,identify when a new sequence of activity start...,True,False,False,False,False
mean_N_light_intervals_23-07,,identify when a new sequence of activity start...,False,False,True,False,False
mean_N_sedentary_intervals_23-07,,identify when a new sequence of activity start...,False,False,False,True,False
mean_N_MVPA_intervals_23-07,,identify when a new sequence of activity start...,False,True,False,False,False


In [12]:
table[~table['description'].isna()][['description','sleep','MVPA','light','sedentary','imputed']].to_csv('/scratch/c.c21013066/data/ukbiobank/phenotypes/accelerometer/derived_features_description.csv')