In [1]:
from google.colab import drive
drive.mount("/content/drive")
import os
print(os.getcwd())
print(os.listdir())

Mounted at /content/drive
/content
['.config', 'drive', 'sample_data']


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

dexcom = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ResearchProject/datafiles/002/Dexcom_002.csv")
HR = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ResearchProject/datafiles/002/HR_002.csv")
acc = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ResearchProject/datafiles/002/ACC_002.csv")
#bvp = pd.read_csv(".//datafiles//002//BVP_002.csv")
#eda = pd.read_csv(".//datafiles//002//EDA_002.csv")


In [4]:
def csvGrabber(file):
  dataframes = []
  for filename in os.listdir(file):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory, filename)
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        # Append the DataFrame to the list
        dataframes.append(df)

In [5]:
# Grabs only time and glucose level data and renames time to "datetime"
# Used for dexcom
def glucoseGrabber (data):
  start_index = data["Timestamp (YYYY-MM-DDThh:mm:ss)"].first_valid_index()
  relevent = data.iloc[start_index:]
  return relevent[["Timestamp (YYYY-MM-DDThh:mm:ss)", "Glucose Value (mg/dL)"]].rename(columns={"Timestamp (YYYY-MM-DDThh:mm:ss)": "datetime"})

In [6]:
# Returns interpolated glucose data of 5 minute intervals
# Used for dexcom
def glucoseInterpolater(data):
  glucose = glucoseGrabber(data)
  del data
  glucose.loc[:, "datetime"] = pd.to_datetime(glucose["datetime"])
  glucose.set_index("datetime", inplace=True)
  ds_glucose = glucose.resample("5min").mean()
  return ds_glucose

In [7]:
# Resamples data to 5 minute intervals according to the mean
# Used for HR (BVP, EDA)
def Resampler(data):
  data.loc[:, "datetime"] = pd.to_datetime(data["datetime"])
  ds_data = data.resample("5min", on="datetime").mean()
  del data
  return ds_data

In [8]:
# Resamples tri-axial accelerometry and calculates the magnitude of each vector
# Used for ACC
def magnitudeGrabber(acc):
  # Returns the magnitude of a 3D vector
  def magnitude(x, y, z):
    return (x**2 + y**2 + z**2)**(1/2)

  # Calculates magnitude of vectors
  ds_acc = Resampler(acc) #resamples to 5 min intervals
  del acc
  ds_acc["magnitude"] = ds_acc.apply(lambda row: magnitude(row[' acc_x'], row[' acc_y'], row[' acc_z']), axis=1)
  ds_acc["magnitude"] = np.sqrt(ds_acc[' acc_x']**2 + ds_acc[' acc_y']**2 + ds_acc[' acc_z']**2)
  return ds_acc[["magnitude"]]

In [9]:
# Lines data according to glucose start and end time
def combine(dexcom, hr, acc):
  ds_glucose = glucoseInterpolater(dexcom)
  ds_hr = Resampler(hr)
  #ds_bvp = Resampler(bvp)
  #ds_eda = Resampler(eda)
  mag = magnitudeGrabber(acc)
  joined = ds_glucose.join(ds_hr).join(mag).reset_index(drop=False)
  return joined

In [10]:
data = combine(dexcom, HR, acc)

In [11]:
data

Unnamed: 0,datetime,Glucose Value (mg/dL),hr,magnitude
0,2020-02-21 11:05:00,186.0,89.619200,57.048610
1,2020-02-21 11:10:00,181.0,88.518300,58.645851
2,2020-02-21 11:15:00,175.0,87.909533,63.562111
3,2020-02-21 11:20:00,168.0,88.003800,63.822572
4,2020-02-21 11:25:00,163.0,86.766700,58.733364
...,...,...,...,...
2282,2020-02-29 09:15:00,177.0,85.025900,64.068738
2283,2020-02-29 09:20:00,171.0,83.188767,62.683183
2284,2020-02-29 09:25:00,166.0,84.574767,64.563053
2285,2020-02-29 09:30:00,163.0,86.143967,59.304552


In [12]:
data.rename(columns={' hr' : 'hr'})

Unnamed: 0,datetime,Glucose Value (mg/dL),hr,magnitude
0,2020-02-21 11:05:00,186.0,89.619200,57.048610
1,2020-02-21 11:10:00,181.0,88.518300,58.645851
2,2020-02-21 11:15:00,175.0,87.909533,63.562111
3,2020-02-21 11:20:00,168.0,88.003800,63.822572
4,2020-02-21 11:25:00,163.0,86.766700,58.733364
...,...,...,...,...
2282,2020-02-29 09:15:00,177.0,85.025900,64.068738
2283,2020-02-29 09:20:00,171.0,83.188767,62.683183
2284,2020-02-29 09:25:00,166.0,84.574767,64.563053
2285,2020-02-29 09:30:00,163.0,86.143967,59.304552


In [13]:
data.columns

Index(['datetime', 'Glucose Value (mg/dL)', ' hr', 'magnitude'], dtype='object')

In [14]:
# Determining Activity Bouts
def act_bout(dataset):
  hr_mean = dataset[' hr'].mean()
  hr_std = dataset[' hr'].std()
  mag_mean = dataset['magnitude'].mean()
  mag_std = dataset['magnitude'].std()

  hr_high = []
  mag_high = []
  for i in dataset.index:
    if dataset.loc[i, ' hr'] > hr_mean:
      hr_high.append(i)
    if dataset.loc[i, 'magnitude'] > mag_mean:
      mag_high.append(i)

  act_bouts = []
  for time in hr_high:
    if time in mag_high:
      act_bouts.append(time)
  return act_bouts

In [15]:
activity = act_bout(data)
act_time = data['datetime'].iloc[activity]
act_time

1      2020-02-21 11:10:00
2      2020-02-21 11:15:00
3      2020-02-21 11:20:00
4      2020-02-21 11:25:00
5      2020-02-21 11:30:00
               ...        
2281   2020-02-29 09:10:00
2282   2020-02-29 09:15:00
2283   2020-02-29 09:20:00
2284   2020-02-29 09:25:00
2285   2020-02-29 09:30:00
Name: datetime, Length: 444, dtype: datetime64[ns]

In [16]:
from google.colab import files

data.to_csv('data.csv')
files.download('data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
def acc_dev(mag):
  transformed_mag = []
  for val in mag:
    transformed_mag.append(abs(64 - val))
  return transformed_mag

data['transformed_magnitude'] = data.apply(lambda row: acc_dev(row['magnitude']), axis=1)

def act_bout(dataset):

  hr_q3 = dataset[' hr'].quantile(q=0.75)
  mag_q3 = dataset['transformed_magnitude'].quantile(q=0.75)

  hr_high = []
  mag_high = []
  for i in dataset.index:
    if dataset.loc[i, ' hr'] > hr_q3:
      hr_high.append(i)
    if dataset.loc[i, 'transformed_magnitude'] > mag_q3:
      mag_high.append(i)

  act_bouts = []
  for time in hr_high:
    if time in mag_high:
      act_bouts.append(time)
  return act_bouts

TypeError: 'float' object is not iterable

In [18]:
def slopes(timeseries):
  n = len(timeseries)
  slopes = []
  for i in range(1, n):
    slope = (timeseries[i] - timeseries[i-1])
    slopes.append(slope)
  return slopes


def slope_act_bout(dataset):
  hr_slope = pd.DataFrame(slopes(dataset[' hr']))
  mag_slope = pd.DataFrame(slopes(dataset['magnitude']))

  hr_q3 = hr_slope.quantile(q=0.75)
  mag_q3 = mag_slope.quantile(q=0.75)

  hr_slope.rename(columns={0: 'slope'}, inplace=True)
  mag_slope.rename(columns={0: 'slope'}, inplace=True)

  # for i in hr_slope.index:
  #   print(hr_slope.at[i, 'slope'] > hr_q3)

  hr_high = []
  mag_high = []
  for i in hr_slope.index:
    if hr_slope['slope'].loc(i) > hr_q3:
      hr_high.append(i)
    if mag_slope['slope'].loc(i) > mag_q3:
      mag_high.append(i)

  act_bouts = []
  for time in hr_high:
    if time in mag_high:
      act_bouts.append(time)
  return act_bouts

slope_act_bout(data)

TypeError: '<' not supported between instances of 'float' and '_LocIndexer'