# 3. Initial EDF Data Exploration

This notebook focuses on loading and exploring the CPAP data from the EDF files.

## 3.1 Setup and Data Loading

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os

# Add src directory to Python path to import modules
module_path = os.path.abspath(os.path.join('..')) # Assumes notebook is in 'notebooks' directory
if module_path not in sys.path:
    sys.path.append(module_path)

from src.data_loader import load_edf_data, resample_data

# Configure plotting
plt.rcParams['figure.figsize'] = (15, 5)
pd.set_option('display.max_rows', 100)

In [None]:
# Specify path to your data file
edf_data_filepath = '../data/2025/20250617_023551_SA2.edf' # Replace with actual path

# Load data using the data_loader module
raw_df = load_edf_data(edf_data_filepath)

if raw_df is not None:
    print("Data loaded successfully:")
    raw_df.info()
    raw_df.head()
else:
    print("Failed to load data.")

## 3.2 Data Resampling

In [None]:
SAMPLING_FREQ_HZ = 25 # Target sampling frequency

if raw_df is not None:
    df_resampled = resample_data(raw_df, target_freq_hz=SAMPLING_FREQ_HZ)
    if df_resampled is not None:
        print(f"\nResampled data to {SAMPLING_FREQ_HZ} Hz:")
        df_resampled.info()
        df_resampled.head()
    else:
        print("Resampling failed.")
else:
    print("Skipping preprocessing as data loading failed.")

## 3.3 Data Visualization

In [None]:
if 'df_resampled' in locals() and df_resampled is not None:
    # Plot to verify
    plt.figure(figsize=(15, 8))
    ax1 = plt.subplot(211)
    df_resampled['Pulse.1s'].plot(label='Pulse', ax=ax1)
    ax1.set_title('Pulse Data')
    ax1.legend()
    
    ax2 = plt.subplot(212, sharex=ax1)
    df_resampled['SpO2.1s'].plot(label='SpO2', ax=ax2)
    ax2.set_title('SpO2 Data')
    ax2.legend()
    plt.tight_layout()
    plt.show()
else:
    print("Skipping visualization as resampling failed or was skipped.")