The purpose of this notebook is to explore the anglez variable for possible transformation into a measure of something that might indicate some sort of activity

In [2]:
# Start by importing packages we'll need
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

In [3]:
# Load the parquet data file series_train.parquet/id=0a418b57/part-0.parquet
data0a418b57 = pd.read_parquet('series_train.parquet/id=0a418b57/part-0.parquet')

# Add a new column that converts time_of_day into datetime
data0a418b57['dt'] = pd.to_datetime(data0a418b57['time_of_day'])

# Change the day in the dt variable to be equal to the relative_date_PCIAT value
data0a418b57['dt_mod'] = data0a418b57['dt'] + pd.to_timedelta(data0a418b57['relative_date_PCIAT'], unit='D')

In [4]:
data0a418b57.head()

Unnamed: 0,step,X,Y,Z,enmo,anglez,non-wear_flag,light,battery_voltage,time_of_day,weekday,quarter,relative_date_PCIAT,dt,dt_mod
0,0,-0.075242,-0.256743,-0.973791,0.038081,-72.952141,0.0,5.0,4202.0,51250000000000,2,4,-9.0,1970-01-01 14:14:10,1969-12-23 14:14:10
1,1,-0.265893,-0.270508,-0.76547,0.07743,-52.84922,0.0,0.5,4185.333496,51255000000000,2,4,-9.0,1970-01-01 14:14:15,1969-12-23 14:14:15
2,2,0.334517,-0.548602,-0.588596,0.039162,-44.118084,0.0,11.5,4185.5,51260000000000,2,4,-9.0,1970-01-01 14:14:20,1969-12-23 14:14:20
3,3,0.000193,-0.021069,-0.999681,0.00145,-88.759613,0.0,0.0,4185.666504,51265000000000,2,4,-9.0,1970-01-01 14:14:25,1969-12-23 14:14:25
4,4,-0.000685,-0.020681,-0.997677,0.000491,-88.756958,0.0,8.5,4185.833496,51270000000000,2,4,-9.0,1970-01-01 14:14:30,1969-12-23 14:14:30


Let's start by making a graph of the anglez values over the first full day (-8) and the first five full days

In [6]:
# Create a dotplot of the anglez variable for relative_date_PCIAT=-8
fig = px.scatter(data0a418b57[data0a418b57['relative_date_PCIAT']==-8], x='dt_mod', y='anglez')
fig.show()

In [16]:
data0a418b57.head()

Unnamed: 0,step,X,Y,Z,enmo,anglez,non-wear_flag,light,battery_voltage,time_of_day,weekday,quarter,relative_date_PCIAT,dt,dt_mod
0,0,-0.075242,-0.256743,-0.973791,0.038081,-72.952141,0.0,5.0,4202.0,51250000000000,2,4,-9.0,1970-01-01 14:14:10,1969-12-23 14:14:10
1,1,-0.265893,-0.270508,-0.76547,0.07743,-52.84922,0.0,0.5,4185.333496,51255000000000,2,4,-9.0,1970-01-01 14:14:15,1969-12-23 14:14:15
2,2,0.334517,-0.548602,-0.588596,0.039162,-44.118084,0.0,11.5,4185.5,51260000000000,2,4,-9.0,1970-01-01 14:14:20,1969-12-23 14:14:20
3,3,0.000193,-0.021069,-0.999681,0.00145,-88.759613,0.0,0.0,4185.666504,51265000000000,2,4,-9.0,1970-01-01 14:14:25,1969-12-23 14:14:25
4,4,-0.000685,-0.020681,-0.997677,0.000491,-88.756958,0.0,8.5,4185.833496,51270000000000,2,4,-9.0,1970-01-01 14:14:30,1969-12-23 14:14:30


That's a lot of variation. Maybe take a 10-minute rolling average as a sort of high-pass filter?

In [33]:
# Compute a 10-minute rolling average of the anglez variable
data0a418b57['anglez_rolling'] = data0a418b57.rolling(window='10min', on='dt_mod')['anglez'].mean()

In [34]:
# Create a dotplot of the anglez_rolling variable in data0a418b57 for relative_date_PCIAT value of -8
fig = px.scatter(data0a418b57[data0a418b57['relative_date_PCIAT']==-8], x='dt_mod', y='anglez_rolling')
fig.show()

In [35]:
# Create a dotplot of the anglez_rolling variable for relative_date_PCIAT values between -8 and -3
fig = px.scatter(data0a418b57[(data0a418b57['relative_date_PCIAT']>=-8) & (data0a418b57['relative_date_PCIAT']<=-3)], x='dt_mod', y='anglez_rolling')
fig.show()