# Beiwe Sample Dataset Accelerometer EDA Pt. 2

Prototype helper functions to load and summarize an hour's worth of accelerometer data 


In [35]:
%load_ext autoreload
%autoreload 2 

import pandas as pd
import numpy as np

# TODO: move to its own script
def summarize_hourly_file(file_path):
    """Analyze a single hourly accelerometer CSV file."""
    
    # Load file
    df = pd.read_csv(file_path)

    # Convert timestamps
    df['datetime_utc'] = pd.to_datetime(df['timestamp'], unit='ms', utc=True)

    # Compute magnitude
    df['magnitude'] = np.sqrt(df['x']**2 + df['y']**2 + df['z']**2)

    # Calculate metrics
    n_rows = len(df)
    start_time = df['datetime_utc'].iloc[0]
    end_time = df['datetime_utc'].iloc[-1]
    
    # Duration in minutes
    duration_sec = (end_time - start_time).total_seconds()
    duration_min = duration_sec / 60
    
    # Sampling time in minutes (at 10 Hz)
    sampling_min = n_rows / 10 / 60
    
    # Duty cycle
    duty_cycle = sampling_min / duration_min
    
    # Count bursts
    df['time_diff_ms'] = df['timestamp'].diff()
    gaps = df['time_diff_ms'] > 1000 
    gaps_count = gaps.sum()
    n_bursts = gaps_count + 1
    
    # Mean magnitude
    mean_magnitude = df['magnitude'].mean()

    return {
        'n_rows': n_rows,
        'start_time': start_time,
        'end_time': end_time,
        'duration_min': duration_min,
        'sampling_min': sampling_min,
        'duty_cycle': duty_cycle,
       'n_bursts': n_bursts,
        'mean_magnitude': mean_magnitude
    }

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Check that summarize_hourly_file() works on the hr_9 CSV 

In [36]:
base_path = "/n/home01/egraff/sample_imputation/data/raw/3si9xdvl/accelerometer/"
test_file_path = base_path + "2022-03-26 09_00_00+00_00.csv"

result = summarize_hourly_file(test_file_path)
print(result)

{'n_rows': 18090, 'start_time': Timestamp('2022-03-26 09:00:38.792000+0000', tz='UTC'), 'end_time': Timestamp('2022-03-26 09:59:39.055000+0000', tz='UTC'), 'duration_min': 59.00438333333333, 'sampling_min': 30.15, 'duty_cycle': 0.5109789865894144, 'n_bursts': np.int64(30), 'mean_magnitude': np.float64(1.0069258994313353)}


Now try on different files 