# Importing the libraries

In [105]:
import pandas as pd
import numpy as np
from datetime import datetime
from pandas.tseries.frequencies import to_offset
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Process the activity data by first reading it, ensuring its cleanliness, and then resampling it as needed.

In [106]:
def read_and_process(file_path):
    df = pd.read_csv(file_path)
    df['Timestamp'] = df['startTimeInSeconds'].apply(
        lambda x: datetime.utcfromtimestamp(x)
    )
    df = df[['Timestamp', 'steps', 'activityType', 'intensity', 'maxMotionIntensity']]  # Selecting only 'timestamp' and 'steps' columns
    return df
file_paths = [
    '01-10-2023---31-10-2023.csv',
    '31-10-2023---30-11-2023.csv',
    '30-11-2023---30-12-2023.csv',
    '30-12-2023---19-01-2024.csv'
]
df_list = [read_and_process(file_path) for file_path in file_paths]
df_activity = pd.concat(df_list)
df_activity.drop_duplicates(inplace=True)
df_activity.set_index("Timestamp", inplace=True)
df_activity.rename(columns={'steps': 'Activity'}, inplace=True)
df_activity.sort_index(inplace=True)
df_activity

Unnamed: 0_level_0,Activity,activityType,intensity,maxMotionIntensity
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-10-01 04:45:00,0,SEDENTARY,SEDENTARY,2.0
2023-10-01 05:00:00,0,SEDENTARY,SEDENTARY,1.0
2023-10-01 05:15:00,0,SEDENTARY,SEDENTARY,3.0
2023-10-01 05:30:00,0,SEDENTARY,SEDENTARY,4.0
2023-10-01 05:45:00,0,SEDENTARY,SEDENTARY,3.0
...,...,...,...,...
2024-01-18 22:45:00,0,SEDENTARY,SEDENTARY,0.0
2024-01-18 23:00:00,0,SEDENTARY,SEDENTARY,3.0
2024-01-18 23:15:00,0,SEDENTARY,SEDENTARY,0.0
2024-01-18 23:30:00,0,SEDENTARY,SEDENTARY,0.0


In [107]:
df_activity['Activity'] = df_activity['Activity'] * df_activity['maxMotionIntensity']
df_activity = df_activity.groupby(df_activity.index).mean()
df_activity.drop(columns= 'maxMotionIntensity', inplace = True)
df_activity

Unnamed: 0_level_0,Activity
Timestamp,Unnamed: 1_level_1
2023-10-01 04:45:00,0.0
2023-10-01 05:00:00,0.0
2023-10-01 05:15:00,0.0
2023-10-01 05:30:00,0.0
2023-10-01 05:45:00,0.0
...,...
2024-01-18 22:45:00,0.0
2024-01-18 23:00:00,0.0
2024-01-18 23:15:00,0.0
2024-01-18 23:30:00,0.0


# Process the glucose data by first reading it, ensuring its cleanliness, and then resampling it as needed

In [108]:
df_glucose = pd.read_csv('BG.csv', skiprows=1)
df_glucose = df_glucose.rename(columns={'CGM Glucose Value (mmol/L)': 'Glucose'})
df_glucose['Timestamp'] = pd.to_datetime(df_glucose['Timestamp'], dayfirst=True)
df_glucose = df_glucose.set_index('Timestamp').sort_index()
df_glucose = df_glucose[df_glucose['Serial number'].str.contains('Dexcom g6 \(1f2432f\)')]
df_glucose = df_glucose[['Glucose']]
df_glucose.index = df_glucose.index.round('5T')
df_glucose

Unnamed: 0_level_0,Glucose
Timestamp,Unnamed: 1_level_1
2023-12-24 10:30:00,8.3
2023-12-24 10:35:00,8.0
2023-12-24 10:40:00,7.7
2023-12-24 10:45:00,7.4
2023-12-24 10:50:00,7.0
...,...
2024-02-07 06:35:00,5.4
2024-02-07 06:40:00,5.6
2024-02-07 06:45:00,5.5
2024-02-07 06:50:00,5.4


In [109]:
fig = make_subplots(rows=1, cols=2, subplot_titles=('Activity', 'Glucose'))
fig.add_trace(go.Scatter(x=df_activity.index, y=df_activity['Activity'], 
                         mode='lines+markers', marker=dict(symbol='x', size=7.5), 
                         line=dict(color='royalblue'), name='Activity'), row=1, col=1)
fig.add_trace(go.Scatter(x=df_glucose.index, y=df_glucose['Glucose'], 
                         mode='lines+markers', marker=dict(symbol='circle', size=7.5), 
                         line=dict(color='firebrick'), name='Glucose'), row=1, col=2)
fig.update_layout(title='Plots')
fig.show()
display(df_activity)
display(df_glucose)

Unnamed: 0_level_0,Activity
Timestamp,Unnamed: 1_level_1
2023-10-01 04:45:00,0.0
2023-10-01 05:00:00,0.0
2023-10-01 05:15:00,0.0
2023-10-01 05:30:00,0.0
2023-10-01 05:45:00,0.0
...,...
2024-01-18 22:45:00,0.0
2024-01-18 23:00:00,0.0
2024-01-18 23:15:00,0.0
2024-01-18 23:30:00,0.0


Unnamed: 0_level_0,Glucose
Timestamp,Unnamed: 1_level_1
2023-12-24 10:30:00,8.3
2023-12-24 10:35:00,8.0
2023-12-24 10:40:00,7.7
2023-12-24 10:45:00,7.4
2023-12-24 10:50:00,7.0
...,...
2024-02-07 06:35:00,5.4
2024-02-07 06:40:00,5.6
2024-02-07 06:45:00,5.5
2024-02-07 06:50:00,5.4


# Merge Activity and Glucose Data from 6 AM to 12 AM the following day

In [110]:
actgluco = pd.merge(df_activity, df_glucose, left_index=True, right_index=True, how='inner')
actgluco.dropna(inplace=True)
actgluco = actgluco[(actgluco.index.hour >= 6) & (actgluco.index.hour <= 23)]
actgluco = actgluco.drop_duplicates()
actgluco.reset_index(inplace = True)
actgluco

Unnamed: 0,Timestamp,Activity,Glucose
0,2023-12-24 10:30:00,0.000000,8.3
1,2023-12-24 10:45:00,19.333333,7.4
2,2023-12-24 11:00:00,1805.000000,6.7
3,2023-12-24 11:15:00,450.000000,6.8
4,2023-12-24 11:30:00,0.000000,7.1
...,...,...,...
902,2024-01-18 16:00:00,13.000000,9.4
903,2024-01-18 17:45:00,280.000000,11.8
904,2024-01-18 18:30:00,49.333333,9.9
905,2024-01-18 22:15:00,64.500000,6.6


In [111]:
fig = go.Figure()

for day, group in actgluco.groupby(actgluco.Timestamp.dt.date):
    fig.add_trace(go.Scatter(x=group.Timestamp, y=group['Activity'], mode='lines+markers', name=f'Activity - {day}',
                             yaxis='y1', marker=dict(symbol='x', size=7.5), line=dict(color='royalblue')))

for day, group in actgluco.groupby(actgluco.Timestamp.dt.date):
    fig.add_trace(go.Scatter(x=group.Timestamp, y=group['Glucose'], mode='lines+markers', name=f'Glucose - {day}',
                             yaxis='y2', marker=dict(symbol='circle', size=7.5), line=dict(color='firebrick')))

fig.add_shape(type="line", x0=min(actgluco.Timestamp), y0=actgluco['Activity'].mean(), x1=max(actgluco.Timestamp), y1=actgluco['Activity'].mean(),
              line=dict(color="royalblue", width=1, dash='dash'), yref='y1')
fig.add_shape(type="line", x0=min(actgluco.Timestamp), y0=actgluco['Glucose'].mean(), x1=max(actgluco.Timestamp), y1=actgluco['Glucose'].mean(),
              line=dict(color="firebrick", width=1, dash='dash'), yref='y2')

fig.update_layout(
    yaxis=dict(title='Activity', side='left', showgrid=False),
    yaxis2=dict(title='Glucose', overlaying='y', side='right', showgrid=False),
    legend=dict(x=1.1, y=1, font=dict(size=10))
)

fig.update_xaxes(title='Timestamp')
fig.show()

In [112]:
Total_days = (actgluco['Timestamp'].max() - actgluco['Timestamp'].min()).days + 1
if Total_days <= 10:
    print("Sorry, the data is not sufficent to predict accurately")

# Calculating Glucose Change in the next intervals

In [113]:
activityglucose = actgluco
activityglucose['nextGlucose'] = activityglucose['Glucose'].shift(-1)
activityglucose['GlucoseChange'] = activityglucose['Glucose'] - activityglucose['nextGlucose']
activityglucose = activityglucose[:-1]
activityglucose

Unnamed: 0,Timestamp,Activity,Glucose,nextGlucose,GlucoseChange
0,2023-12-24 10:30:00,0.000000,8.3,7.4,0.9
1,2023-12-24 10:45:00,19.333333,7.4,6.7,0.7
2,2023-12-24 11:00:00,1805.000000,6.7,6.8,-0.1
3,2023-12-24 11:15:00,450.000000,6.8,7.1,-0.3
4,2023-12-24 11:30:00,0.000000,7.1,9.8,-2.7
...,...,...,...,...,...
901,2024-01-18 13:30:00,17.000000,7.4,9.4,-2.0
902,2024-01-18 16:00:00,13.000000,9.4,11.8,-2.4
903,2024-01-18 17:45:00,280.000000,11.8,9.9,1.9
904,2024-01-18 18:30:00,49.333333,9.9,6.6,3.3


# Remvoing rows where time is inconsistent

In [114]:
mean_activity = activityglucose['Activity'].mean()

result_df = activityglucose[(activityglucose['Timestamp'].shift(-1) - activityglucose['Timestamp']).dt.total_seconds() / 60 == 15]
result_df

Unnamed: 0,Timestamp,Activity,Glucose,nextGlucose,GlucoseChange
0,2023-12-24 10:30:00,0.000000,8.3,7.4,0.9
1,2023-12-24 10:45:00,19.333333,7.4,6.7,0.7
2,2023-12-24 11:00:00,1805.000000,6.7,6.8,-0.1
3,2023-12-24 11:15:00,450.000000,6.8,7.1,-0.3
4,2023-12-24 11:30:00,0.000000,7.1,9.8,-2.7
...,...,...,...,...,...
893,2024-01-18 09:45:00,223.333333,8.8,8.7,0.1
894,2024-01-18 10:00:00,294.000000,8.7,8.7,0.0
895,2024-01-18 10:15:00,2047.500000,8.7,8.4,0.3
898,2024-01-18 12:30:00,83.333333,7.2,6.9,0.3


# Finding different levels of Activity and filtering rows where Glucose Drops after Activity in the next interval

In [115]:
filtered_df = result_df[result_df['Activity'] >= mean_activity]
x0 = filtered_df.Activity.min()
x1 = filtered_df.Activity.quantile(0.25)
x2 = filtered_df.Activity.quantile(0.5)
x3 = filtered_df.Activity.quantile(0.75)
x4 = filtered_df.Activity.max()
filtered_df.describe()

Unnamed: 0,Activity,Glucose,nextGlucose,GlucoseChange
count,199.0,199.0,199.0,199.0
mean,1108.880653,8.040201,7.923116,0.117085
std,1000.67323,2.300723,2.197397,1.067287
min,306.0,3.6,3.6,-4.3
25%,498.666667,6.25,6.25,-0.35
50%,712.5,7.6,7.7,0.1
75%,1361.0,9.8,9.45,0.7
max,5584.0,15.0,14.9,3.0


In [116]:
filter_result_df = filtered_df[filtered_df['Glucose'] > filtered_df['nextGlucose']]
filter_result_df

Unnamed: 0,Timestamp,Activity,Glucose,nextGlucose,GlucoseChange
6,2023-12-24 12:00:00,3344.000000,10.7,9.2,1.5
7,2023-12-24 12:15:00,3278.000000,9.2,6.7,2.5
8,2023-12-24 12:30:00,795.000000,6.7,5.3,1.4
13,2023-12-24 13:45:00,468.666667,10.8,9.9,0.9
67,2023-12-25 09:30:00,500.000000,7.3,6.3,1.0
...,...,...,...,...,...
841,2024-01-16 17:45:00,517.000000,7.2,6.9,0.3
848,2024-01-17 07:45:00,368.000000,8.2,7.7,0.5
850,2024-01-17 08:30:00,309.333333,7.2,6.8,0.4
871,2024-01-17 18:15:00,861.000000,5.3,4.7,0.6


In [117]:
filtered_data_lows = filter_result_df[(filter_result_df['Activity'] >= x0) & (filter_result_df['Activity'] <= x1)]
filtered_data_mediums = filter_result_df[(filter_result_df['Activity'] >= x1) & (filter_result_df['Activity'] <= x2)]
filtered_data_highs = filter_result_df[(filter_result_df['Activity'] >= x2) & (filter_result_df['Activity'] <= x3)]
filtered_data_very_highs = filter_result_df[(filter_result_df['Activity'] >= x3) & (filter_result_df['Activity'] <= x4)]
filtered_data_lows

Unnamed: 0,Timestamp,Activity,Glucose,nextGlucose,GlucoseChange
13,2023-12-24 13:45:00,468.666667,10.8,9.9,0.9
69,2023-12-25 10:00:00,404.0,5.3,5.1,0.2
76,2023-12-25 12:00:00,454.0,7.2,5.9,1.3
137,2023-12-26 13:15:00,350.0,6.2,5.1,1.1
191,2023-12-27 12:00:00,317.0,6.2,5.8,0.4
196,2023-12-27 13:30:00,376.0,13.2,12.0,1.2
207,2023-12-27 16:15:00,353.0,7.8,7.3,0.5
254,2023-12-28 16:00:00,328.0,7.0,6.7,0.3
316,2023-12-30 11:30:00,330.0,6.4,6.3,0.1
373,2023-12-31 16:45:00,306.0,4.7,3.6,1.1


# Find mean and variance for different levels of activity

In [145]:
def mean_variance(filtered_data):
    filtered_data['Date'] = pd.to_datetime(filtered_data['Timestamp']).dt.date
    unique_days = sorted(filtered_data['Date'].unique())
    lower_bound = 0
    upper_bound = 0
    all_results = []
    for start_day in range(len(unique_days) - 9):
        dates = []
        activities = []
        means = []
        variances = []
        current_window = unique_days[start_day:start_day + 10]
        cumulative_filtered_data = filtered_data[filtered_data['Date'] >= unique_days[start_day]]
        for current_date in current_window:
            cumulative_activity = cumulative_filtered_data[cumulative_filtered_data['Date'] <= current_date]['Activity']
            cumulative_data = cumulative_filtered_data[cumulative_filtered_data['Date'] <= current_date]['GlucoseChange']
            activities.append(cumulative_activity.mean() if len(cumulative_activity) > 0 else 0)
            means.append(cumulative_data.mean() if len(cumulative_data) > 0 else 0)
            variances.append(cumulative_data.var() if len(cumulative_data) > 1 else 0)
            dates.append(current_date)
        result_df = pd.DataFrame({
            'Date': dates,
            'Activity': activities,
            'Mean': means,
            'Variance': variances
        })
        all_results.append(result_df)
        mean_last = result_df.Mean.iloc[-1]
        variance_last = result_df.Variance.iloc[-1]
        if start_day > 0:
            if lower_bound <= mean_last <= upper_bound:
                print(True)
            else:
                print(False)
            print("Lower Bound:",lower_bound, "Mean:", mean_last, "Upper Bound:", upper_bound,"\n")
        lower_bound = mean_last - variance_last
        upper_bound = mean_last + variance_last
    final_result_df = pd.concat(all_results).reset_index(drop=True)
    final_result_df['Date'] = pd.to_datetime(final_result_df['Date'])
    return final_result_df
result_df_lows = mean_variance(filtered_data_lows)
result_df_lows

True
Lower Bound: 0.4790441176470591 Mean: 0.6105263157894737 Upper Bound: 0.8268382352941176 

True
Lower Bound: 0.44175438596491245 Mean: 0.6166666666666667 Upper Bound: 0.7792982456140349 

True
Lower Bound: 0.4669607843137257 Mean: 0.6421052631578948 Upper Bound: 0.7663725490196077 

True
Lower Bound: 0.47842105263157925 Mean: 0.6222222222222222 Upper Bound: 0.8057894736842104 

True
Lower Bound: 0.4709803921568628 Mean: 0.6210526315789473 Upper Bound: 0.7734640522875816 





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,Date,Activity,Mean,Variance
0,2023-12-24,468.666667,0.9,0.0
1,2023-12-25,442.222222,0.8,0.31
2,2023-12-26,419.166667,0.875,0.229167
3,2023-12-27,388.952381,0.8,0.186667
4,2023-12-28,381.333333,0.7375,0.19125
5,2023-12-30,375.62963,0.666667,0.2125
6,2023-12-31,368.666667,0.71,0.207667
7,2024-01-04,377.875,0.716667,0.225152
8,2024-01-06,377.8,0.653333,0.195524
9,2024-01-07,375.647059,0.652941,0.173897


In [119]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=result_df_low['Date'],
    y=result_df_low['Activity'],
    mode='lines+markers',
    name='Quantile 0 - Lows',
    marker=dict(color='orange')
))

fig.add_trace(go.Scatter(
    x=result_df_medium['Date'],
    y=result_df_medium['Activity'],
    mode='lines+markers',
    name='Quantile 1 - Mediums',
    marker=dict(color='green')
))

fig.add_trace(go.Scatter(
    x=result_df_high['Date'],
    y=result_df_high['Activity'],
    mode='lines+markers',
    name='Quantile 2 - Highs',
    marker=dict(color='red')
))

fig.add_trace(go.Scatter(
    x=result_df_very_high['Date'],
    y=result_df_very_high['Activity'],
    mode='lines+markers',
    name='Quantile 3 - Very Highs',
    marker=dict(color='blue')
))

fig.update_layout(
    title='Cumulative Mean of Activity Change Over Days',
    xaxis_title='Cumulative Days',
    yaxis_title='Activity',
    legend_title='Activity Levels'
)

fig.show()

In [120]:
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=result_df_low['Date'],
    y=result_df_low['Mean'],
    mode='lines+markers',
    name='Quantile 0 - Lows',
    marker=dict(color='orange')
))

fig.add_trace(go.Scatter(
    x=result_df_medium['Date'],
    y=result_df_medium['Mean'],
    mode='lines+markers',
    name='Quantile 1 - Mediums',
    marker=dict(color='green')
))

fig.add_trace(go.Scatter(
    x=result_df_high['Date'],
    y=result_df_high['Mean'],
    mode='lines+markers',
    name='Quantile 2 - Highs',
    marker=dict(color='red')
))

fig.add_trace(go.Scatter(
    x=result_df_very_high['Date'],
    y=result_df_very_high['Mean'],
    mode='lines+markers',
    name='Quantile 3 - Very Highs',
    marker=dict(color='blue')
))

fig.update_layout(
    title='Cumulative Mean of Glucose Change Over Days',
    xaxis_title='Cumulative Days',
    yaxis_title='Mean',
    legend_title='Glucose Levels'
)

fig.show()