In [None]:
import pandas as pd

function that takes in the device name, start time, end time, and output file name

In [2]:
def extract_device_data(device_name, start_time, end_time, output_file):
    # Load datasets (files that we have)
    oura_df = pd.read_csv('oura.csv', parse_dates=['timestamp'])
    mz3_df = pd.read_csv('mz3.csv', parse_dates=['Time'])
    empatica_df = pd.read_csv('empatica.csv', parse_dates=['minute'])

    # Standardize column names so that when we Filter data, it is easier to call columns 'timestamp'
    oura_df.rename(columns={'timestamp': 'Timestamp'}, inplace=True)
    mz3_df.rename(columns={'Time': 'Timestamp'}, inplace=True)
    empatica_df.rename(columns={'minute': 'Timestamp'}, inplace=True)

    # Remove timezone info if present
    for df in [oura_df, mz3_df, empatica_df]:
        df['Timestamp'] = df['Timestamp'].dt.tz_localize(None)

    # Set Timestamp as index
    oura_df.set_index('Timestamp', inplace=True)
    mz3_df.set_index('Timestamp', inplace=True)
    empatica_df.set_index('Timestamp', inplace=True)

    # Dictionary mapping device names to their corresponding data and column name
    devices = {
        'oura': (oura_df, 'bpm'),
        'mz3': (mz3_df, 'hr'),
        'empatica': (empatica_df, 'entry_count')
    }

    if device_name not in devices:
        print("Invalid device name. Choose from: oura, mz3, empatica")
        return

    df, col_name = devices[device_name]

    # Resample and compute statistics
    df_resampled = df.resample('3min').agg({col_name: ['mean', 'max', 'min', 'std']})
    df_resampled.columns = ['mean', 'max', 'min', 'std']

    # Filter by the specified time range
    filtered_data = df_resampled.loc[start_time:end_time].reset_index()
    filtered_data['date'] = filtered_data['Timestamp'].dt.date

    # Save to CSV
    filtered_data.to_csv(output_file, index=False)
    print(f"Extracted data saved to {output_file}")

# Example usage in Google Colab:
device = 'oura'  # Change to 'mz3' or 'empatica' as needed
start_time = '2024-01-22 05:36:00'
end_time = '2024-01-22 06:06:00'
output_file = 'output.csv'

extract_device_data(device, start_time, end_time, output_file)

NameError: name 'pd' is not defined