<a href="https://colab.research.google.com/github/Yiyuan80/MP/blob/main/Indice_calculation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import google.colab
google.colab.drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# !pip install tsfel

In [None]:
import os
import pandas as pd
import numpy as np
import datetime
import math
import time
import itertools
from matplotlib import pyplot as plt
from sklearn import preprocessing
import tsfel

In [None]:
# load name_list
id_list = []
with open("/content/drive/MyDrive/mesa/filtered_id.txt") as f:
  for line in f:
    id_list.append(line.strip())

In [None]:
def time_series(filenames):
  """Extract time series features from every participant."""

  # Retrieves a pre-defined feature configuration file to extract all available features
  cfg = tsfel.get_features_by_domain()

  time_series = pd.DataFrame([])

  for file in filenames:

    print('Parsing:',file)

    df = pd.read_csv(file)
    last_day = df['daybymidnight'].unique()[-1]

    # preprocess data
    # impute missingness less than 30 and exclude days with >30 missing
    missing = df.loc[df['interval']=='EXCLUDED']
    missing_count = missing.groupby(['daybymidnight']).count()
    missing_exclude = missing_count.loc[missing_count['interval']>30]
    missing_impute = missing_count.loc[missing_count['interval']<=30]
    missing_ex_index = list(missing_exclude.index)
    missing_im_index = list(missing_impute.index)
    missing_ex_index.append(1) 
    missing_ex_index.append(last_day) # exclude first and last day

    df_excluded = df[~df['daybymidnight'].isin(missing_ex_index)] # exclude days with larger than 30 excluded
    # df_impute = df_excluded[df_excluded['daybymidnight'].isin(missing_im_index)] # find days left need to be imputed
    df_imputed = df_excluded.fillna(method='ffill') # conduct forward fill
    days = df_excluded['daybymidnight'].unique()

    # Extract features
    X = tsfel.time_series_features_extractor(cfg, df_imputed[['activity']])
    # Insert id
    X.insert(0,'mesaid',df['mesaid'].unique()[0])
    # Combine features of all the ids
    time_series = pd.concat([time_series,X])

  # save to csv
  time_series.to_csv('/content/drive/MyDrive/mesa/time_series_features.csv')
  return time_series

In [None]:
time_series(id_list)

In [None]:
def sleep_reg_index(sleep, epoch = 2880):
  """Calculate SRI."""
  sleep_arr = sleep.reshape(-1,2880)
  diff = np.array([], dtype=np.int64).reshape(0,2880)
  for i in range(sleep_arr.shape[0]-1):
    diff = np.concatenate((diff, (sleep_arr[i,:] == sleep_arr[i+1,:]).astype(int).reshape(-1,2880)))
  return np.sum(diff)*200 / ((sleep_arr.shape[0]-1) * epoch) - 100


In [None]:
def mdpt(sleep,start=0):
    '''Circular mean:

    Note that sleep==1 -> sleep, sleep==0 -> wake''' 

    sleep_mat = np.reshape(sleep,(2880,-1),order='F') 

    cosines = np.expand_dims(np.cos(np.arange(2880)*2*np.pi/2880),axis=1)

    sines = np.expand_dims(np.sin(np.arange(2880)*2*np.pi/2880),axis=1) 
    
    tm = 1440*np.arctan2(np.nansum(sines*sleep_mat),np.nansum(cosines*sleep_mat))/np.pi 

    return (tm+start*2)%2880

In [None]:
def wake_aso(data, days):
  """Calculate waso, which is in minutes."""
  sleep_time = data.loc[(data['interval']=='REST-S')]
  waso_epoch=sleep_time['wake'].loc[sleep_time['wake']==1]
  waso = len(waso_epoch)/(len(days)*2)
  return waso

In [None]:
def se(data):
  """Calculate sleep efficiency."""
  sleep_time = data['interval'].loc[data['interval']=='REST-S']
  TIB = data['interval'].loc[(data['interval']=='REST-S')|(data['interval']=='REST')] #Time in bed
  se = len(sleep_time)/len(TIB)
  return se

In [None]:
def mean_sd_activity(activity):
  """Calculate mean and sd of activity."""
  activity = np.nan_to_num(activity) # replace nan with 0
  return activity.mean(),activity.std()

In [None]:
def sleep_indices(filenames):
  """Calculate sleep indices of each participant in mesa data.
  The first day and last generally do not start or end at noon or midnight.
  Therefore, we exclude data of first day and last day"""

  # dir = os.listdir(path)

  mesaid = []
  total_sleep = []
  SRI = []
  midpoint = []
  waso = []
  mean_activity = []
  sd_activity = []

  # for i in dir:
  for file in filenames:

    print('parsing:',file)
    # load data
    df = pd.read_csv(file) # load actigraphy
    last_day = df['daybymidnight'].unique()[-1]

    # impute missingness less than 30 and exclude days with >30 missing
    missing = df.loc[df['interval']=='EXCLUDED']
    missing_count = missing.groupby(['daybymidnight']).count()
    missing_exclude = missing_count.loc[missing_count['interval']>30]
    missing_impute = missing_count.loc[missing_count['interval']<=30]
    missing_ex_index = list(missing_exclude.index)
    missing_im_index = list(missing_impute.index)

    missing_ex_index.append(1) 
    missing_ex_index.append(last_day) # exclude first and last day
    
    df_excluded = df[~df['daybymidnight'].isin(missing_ex_index)] # exclude days with larger than 30 excluded
    # df_impute = df_excluded[df_excluded['daybymidnight'].isin(missing_im_index)] # find days left need to be imputed
    df_imputed = df_excluded.fillna(method='ffill') # conduct forward fill
    days = df_excluded['daybymidnight'].unique()

    # append id
    mesaid.append(df_imputed['mesaid'].iloc[0])

    # # data preparation
    # days = list(set(data['daybymidnight']))[1:-1] # create a list of days excluding first and last day
    # data = data.loc[data['daybymidnight'].isin(days)] # exclude data of first and last day
    # data['wake'].mask(data['wake'].isnull().values == True, 1, inplace = True) # impute missing values with 1

    sleep = df_imputed['wake'].to_numpy()
    activity = df_excluded['activity'].to_numpy()


    # Total sleep time
    aver_total_sleep = (1-df_imputed['wake'].sum()/(len(days)*2880))*1440 # compute the average total sleep time
    total_sleep.append(aver_total_sleep)

    # SRI
    sleep_reg = sleep_reg_index(sleep, epoch = 2880)
    SRI.append(sleep_reg)

    # Midpoint
    midpoint.append(mdpt(sleep,start=0))

    # Wake after sleep onsite
    waso_index = wake_aso(df_imputed, days)
    waso.append(waso_index)


    # # sleep efficiency
    # se = se(data)
    
    # activity mean and sd
    mean_act, sd_act = mean_sd_activity(activity)
    mean_activity.append(mean_act)
    sd_activity.append(sd_act)

  indices = {
      'mesaid': mesaid,
      'total_sleep': total_sleep,
      'SRI': SRI,   
      'sleep_midpoint': midpoint,
      'waso': waso,
      'mean_activity': mean_activity,
      'sd_activity': sd_activity
  }
  indices = pd.DataFrame(indices)
  indices.to_csv('/content/drive/MyDrive/mesa/mesa_indices.csv')
  return indices

In [None]:
sleep_indices(id_list)