In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
import torch
from torch.utils.data import TensorDataset, DataLoader
import os 

In [21]:
df = pd.read_csv('/Users/dmitrykorzhov/Desktop/Root/projects/oura_ai/backend/oura_2024-04-16_2024-06-16_trends.csv')

df['date'] = pd.to_datetime(df['date'])
df['week'] = df['date'].dt.isocalendar().week

In [29]:
def compute_weekly_importances(df, target):
    imputer_X = SimpleImputer(strategy='mean')
    imputer_y = SimpleImputer(strategy='mean')
    
    weekly_importances = []
    weeks = df['week'].unique()

    for week in weeks:
        df_week = df[df['week'] == week]
        X_week = df_week.drop(columns=[target, 'date', 'Bedtime Start', 'Bedtime End'])
        X_week = X_week.select_dtypes(include=[float, int])
        X_week = pd.DataFrame(imputer_X.fit_transform(X_week), columns=X_week.columns)
        
        y_week = df_week[target]
        imputer_y.fit(y_week.values.reshape(-1, 1))
        y_week = pd.Series(imputer_y.transform(y_week.values.reshape(-1, 1)).flatten())
        
        X_train, X_test, y_train, y_test = train_test_split(X_week, y_week, test_size=0.2, random_state=42)
        
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        
        importances = model.feature_importances_
        feature_importance = pd.DataFrame({'feature': X_week.columns, 'importance': importances, 'week': week})
        weekly_importances.append(feature_importance)
    
    weekly_importances_df = pd.concat(weekly_importances)
    return weekly_importances_df


def analyze_feature_importance(importance_df):
    weeks = importance_df['week'].unique()
    last_week = weeks.max()
    

    all_time_top_10 = importance_df.groupby('feature')['importance'].mean().sort_values(ascending=False).head(10)
    

    last_week_top_10 = importance_df[importance_df['week'] == last_week].sort_values(by='importance', ascending=False).head(10)

    top_10_weekly = {week: importance_df[importance_df['week'] == week].sort_values(by='importance', ascending=False).head(10)['feature'].tolist() for week in weeks}
    
    three_week_consistent = set()
    for i in range(len(weeks) - 2):
        week1, week2, week3 = weeks[i], weeks[i+1], weeks[i+2]
        top_10_3weeks = set(top_10_weekly[week1]) & set(top_10_weekly[week2]) & set(top_10_weekly[week3])
        three_week_consistent.update(top_10_3weeks)
    
    dropped_features = [feature for feature in three_week_consistent if feature not in top_10_weekly[last_week]]
    
    return {
        'all_time_top_10': all_time_top_10,
        'last_week_top_10': last_week_top_10,
        'three_week_consistent': three_week_consistent,
        'dropped_features': dropped_features
    }

In [30]:
metrics = ['Total Sleep Score', 'Readiness Score', 'Recovery Index Score', 'Average HRV']
importance_dfs = {metric: compute_weekly_importances(df, metric) for metric in metrics}

In [31]:
analysis_results = {metric: analyze_feature_importance(importance_df) for metric, importance_df in importance_dfs.items()}

In [33]:
for metric, results in analysis_results.items():
    print(f"\nMetric: {metric}")
    print("\nAll time top 10 features:")
    print(results['all_time_top_10'])
    print("\nLast week top 10 features:")
    print(results['last_week_top_10'])
    print("\nFeatures in top 10 for at least three weeks:")
    print(results['three_week_consistent'])
    print("\nFeatures that were in top 10 for three weeks but not in the last week:")
    print(results['dropped_features'])


Metric: Total Sleep Score

All time top 10 features:
feature
Sleep Efficiency Score         0.040380
Previous Day Activity Score    0.036514
Stay Active Score              0.033468
Restless Sleep                 0.032936
Sleep Score                    0.032320
Readiness Score                0.032234
Activity Balance Score         0.031609
Inactive Time                  0.031161
Sleep Latency                  0.030438
Sleep Timing                   0.030080
Name: importance, dtype: float64

Last week top 10 features:
                        feature  importance  week
24            Stay Active Score    0.069571    24
34                    Rest Time    0.051801    24
23               Activity Score    0.047807    24
44  Previous Day Activity Score    0.042683    24
6             Sleep Timin Score    0.038981    24
4             Restfulness Score    0.038901    24
45       Activity Balance Score    0.038523    24
16                 Sleep Timing    0.038432    24
32  Equivalent Walking Dist

In [36]:
import requests


In [87]:
url = 'https://api.ouraring.com/v2/usercollection/daily_stress' 
params={ 
    'start_date': '2021-11-01', 
    'end_date': '2024-12-01' 
}
headers = { 
  'Authorization': f'Bearer {os.getenv("OURA_TOKEN")}' 
}
response = requests.request('GET', url, headers=headers, params=params) 
data = response.json()['data']

In [88]:
def get_data(api):
    url = f'https://api.ouraring.com/v2/usercollection/{api}'
    headers = { 
        'Authorization': f'Bearer {os.getenv("OURA_TOKEN")}' 
    }
    response = requests.request('GET', url, headers=headers, params=params) 
    data = response.json()['data']

    rows = []

    for item in data: 
        row = item.copy()

        if 'contributors' in item:
            row.update(item['contributors'])
            del row['contributors']
        
        rows.append(row)
    
    return rows

In [89]:
from datetime import datetime
import pandas as pd

def group_data(data):
    # Convert the timestamp to a datetime object and extract the date
    for item in data:
        item['day'] = datetime.strptime(item['timestamp'], '%Y-%m-%dT%H:%M:%S+00:00').date()

    # Convert the data into a DataFrame
    df = pd.DataFrame(data)

    # Group by 'day' and aggregate 'bpm', 'source' and 'timestamp' into lists
    grouped = df.groupby('day').agg({'bpm': list, 'source': list, 'timestamp': list}).reset_index()

    # Convert the grouped DataFrame back into a list of dictionaries (JSON-like structure)
    return grouped.to_dict('records')

In [90]:
get_data('daily_sleep')

[{'id': 'efe54725-ccde-4004-a36c-f91eee7b398e',
  'day': '2023-11-22',
  'score': 65,
  'timestamp': '2023-11-22T00:00:00+00:00',
  'deep_sleep': 96,
  'efficiency': 83,
  'latency': 91,
  'rem_sleep': 65,
  'restfulness': 53,
  'timing': 3,
  'total_sleep': 67},
 {'id': 'f1a504b5-8688-4969-bdbe-a6702c98a685',
  'day': '2023-11-23',
  'score': 56,
  'timestamp': '2023-11-23T00:00:00+00:00',
  'deep_sleep': 77,
  'efficiency': 83,
  'latency': 86,
  'rem_sleep': 46,
  'restfulness': 51,
  'timing': 13,
  'total_sleep': 51},
 {'id': '3bf2a18d-f8f4-4532-8cf8-d09a41cb3e38',
  'day': '2023-11-24',
  'score': 49,
  'timestamp': '2023-11-24T00:00:00+00:00',
  'deep_sleep': 49,
  'efficiency': 74,
  'latency': 78,
  'rem_sleep': 43,
  'restfulness': 33,
  'timing': 18,
  'total_sleep': 50},
 {'id': 'e060e8db-8efa-48fd-a8b2-911037aaf0cb',
  'day': '2023-11-25',
  'score': 51,
  'timestamp': '2023-11-25T00:00:00+00:00',
  'deep_sleep': 95,
  'efficiency': 53,
  'latency': 89,
  'rem_sleep': 53,
