# Part 3: Advanced Analysis

In this notebook, we will implement advanced analysis techniques for physiological time series data.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
import pywt
import os

# Set plotting style
plt.style.use('seaborn')
sns.set_context('notebook')

## 1. Time Domain Feature Extraction

Implement the `extract_time_domain_features` function to calculate time-domain features from physiological signals.

In [None]:
def extract_time_domain_features(data, window_size=60):
    """Extract time-domain features from physiological signals.
    
    Parameters:
    -----------
    data : pandas.DataFrame
        Input data with columns: ['timestamp', 'heart_rate', 'eda', 'temperature', 'subject_id', 'session']
    window_size : int, optional
        Size of the rolling window in seconds (default: 60)
        
    Returns:
    --------
    dict
        Dictionary containing time-domain features for each signal
    """
    # Import required libraries
    import numpy as np
    import pandas as pd
    from scipy import stats
    
    # Create copy of data to avoid modifying original
    df = data.copy()
    
    # Ensure data is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Initialize features dictionary
    features = {}
    
    # Extract features for each signal
    for col in ['heart_rate', 'eda', 'temperature']:
        signal_features = {}
        
        # Basic statistics
        signal_features['mean'] = df[col].mean()
        signal_features['std'] = df[col].std()
        signal_features['min'] = df[col].min()
        signal_features['max'] = df[col].max()
        signal_features['median'] = df[col].median()
        signal_features['skewness'] = stats.skew(df[col].dropna())
        signal_features['kurtosis'] = stats.kurtosis(df[col].dropna())
        
        # Rolling statistics
        rolling = df[col].rolling(window=window_size, min_periods=1)
        signal_features['rolling_mean'] = rolling.mean().mean()
        signal_features['rolling_std'] = rolling.std().mean()
        
        # Rate of change
        diff = df[col].diff()
        signal_features['mean_change'] = diff.mean()
        signal_features['std_change'] = diff.std()
        
        # Store features for this signal
        features[col] = signal_features
    
    return features

## 2. Frequency Analysis

Implement the `analyze_frequency_components` function to perform frequency-domain analysis.

In [None]:
def analyze_frequency_components(data, sampling_rate, window_size=60):
    """Perform frequency-domain analysis using Welch's method.
    
    Parameters:
    -----------
    data : pandas.DataFrame
        Input data with columns: ['timestamp', 'heart_rate', 'eda', 'temperature', 'subject_id', 'session']
    sampling_rate : float
        Sampling rate of the signals in Hz
    window_size : int, optional
        Size of the window for Welch's method in seconds (default: 60)
        
    Returns:
    --------
    dict
        Dictionary containing frequency components and power spectrum for each signal
    """
    # Import required libraries
    import numpy as np
    import pandas as pd
    from scipy import signal
    
    # Create copy of data to avoid modifying original
    df = data.copy()
    
    # Initialize results dictionary
    results = {}
    
    # Define frequency bands (in Hz)
    bands = {
        'vlf': (0.003, 0.04),  # Very low frequency
        'lf': (0.04, 0.15),    # Low frequency
        'hf': (0.15, 0.4)      # High frequency
    }
    
    # Analyze each signal
    for col in ['heart_rate', 'eda', 'temperature']:
        # Get signal data
        signal_data = df[col].dropna().values
        
        # Compute power spectral density using Welch's method
        frequencies, power = signal.welch(signal_data, fs=sampling_rate, nperseg=window_size*sampling_rate)
        
        # Calculate power in each frequency band
        band_powers = {}
        for band_name, (low_freq, high_freq) in bands.items():
            # Find indices for this frequency band
            idx_band = np.logical_and(frequencies >= low_freq, frequencies <= high_freq)
            # Calculate power in this band
            band_powers[band_name] = np.trapz(power[idx_band], frequencies[idx_band])
        
        # Calculate LF/HF ratio for heart rate
        if col == 'heart_rate':
            band_powers['lf_hf_ratio'] = band_powers['lf'] / band_powers['hf']
        
        # Store results
        results[col] = {
            'frequencies': frequencies,
            'power': power,
            'band_powers': band_powers
        }
    
    return results

## 3. Time-Frequency Analysis

Implement the `analyze_time_frequency_features` function to perform wavelet-based time-frequency analysis.

In [None]:
def analyze_time_frequency_features(data, sampling_rate, window_size=60):
    """Perform time-frequency analysis using wavelet transform.
    
    Parameters:
    -----------
    data : pandas.DataFrame
        Input data with columns: ['timestamp', 'heart_rate', 'eda', 'temperature', 'subject_id', 'session']
    sampling_rate : float
        Sampling rate of the signals in Hz
    window_size : int, optional
        Size of the window for wavelet analysis in seconds (default: 60)
        
    Returns:
    --------
    dict
        Dictionary containing wavelet coefficients and derived features for each signal
    """
    # Import required libraries
    import numpy as np
    import pandas as pd
    import pywt
    
    # Create copy of data to avoid modifying original
    df = data.copy()
    
    # Initialize results dictionary
    results = {}
    
    # Wavelet parameters
    wavelet = 'db4'  # Daubechies 4 wavelet
    level = 4        # Decomposition level
    
    # Analyze each signal
    for col in ['heart_rate', 'eda', 'temperature']:
        # Get signal data
        signal_data = df[col].dropna().values
        
        # Perform wavelet decomposition
        coeffs = pywt.wavedec(signal_data, wavelet, level=level)
        
        # Calculate energy at each level
        energies = [np.sum(np.square(c)) for c in coeffs]
        total_energy = sum(energies)
        energy_ratio = [e/total_energy for e in energies]
        
        # Continuous wavelet transform
        scales = np.arange(1, min(len(signal_data), window_size*sampling_rate))
        [coefficients, frequencies] = pywt.cwt(signal_data, scales, wavelet)
        
        # Calculate wavelet-based features
        features = {
            'wavelet_coeffs': coeffs,
            'energy_levels': energies,
            'energy_ratio': energy_ratio,
            'cwt_coeffs': coefficients,
            'cwt_freqs': frequencies,
            'dominant_scales': scales[np.argmax(np.mean(np.abs(coefficients), axis=1))]
        }
        
        # Store results
        results[col] = features
    
    return results

## Example Usage

Here's how to use the implemented functions:

In [None]:
# Load preprocessed data
from part1_exploration import load_data, preprocess_data

# Load and preprocess data
data = load_data()
processed_data = preprocess_data(data)

# Calculate sampling rate (assuming regular sampling)
timestamps = pd.to_datetime(processed_data['timestamp'])
sampling_rate = 1 / np.mean(np.diff(timestamps.values).astype('timedelta64[s]').astype(float))

# Extract time-domain features
time_features = extract_time_domain_features(processed_data)
print("\nTime-domain features:")
print(time_features['heart_rate'])

# Analyze frequency components
freq_features = analyze_frequency_components(processed_data, sampling_rate)
print("\nFrequency-domain features:")
print(freq_features['heart_rate']['band_powers'])

# Analyze time-frequency features
tf_features = analyze_time_frequency_features(processed_data, sampling_rate)
print("\nTime-frequency features:")
print("Energy ratios:", tf_features['heart_rate']['energy_ratio'])