# Part 2: Time Series Modeling

In this notebook, we will implement functions for time series feature extraction and ARIMA modeling.

In [None]:
# Set plotting style
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn')
sns.set_context('notebook')

## 1. Time Series Feature Extraction

Implement the `extract_time_series_features` function to extract rolling window features.

In [None]:
def extract_time_series_features(data, window_size=60):
    """Extract rolling window features from time series data.
    
    Parameters:
    -----------
    data : pandas.DataFrame
        Input data with columns: ['timestamp', 'heart_rate', 'eda', 'temperature', 'subject_id', 'session']
    window_size : int, optional
        Size of the rolling window in seconds (default: 60)
        
    Returns:
    --------
    pandas.DataFrame
        DataFrame containing extracted features
    """
    # Import required libraries
    import numpy as np
    import pandas as pd
    
    # Create copy of data to avoid modifying original
    df = data.copy()
    
    # Ensure data is sorted by timestamp
    df = df.sort_values('timestamp')
    
    # Initialize features DataFrame
    features = pd.DataFrame()
    
    # Extract features for each signal
    for col in ['heart_rate', 'eda', 'temperature']:
        # Basic statistics
        features[f'{col}_mean'] = df[col].rolling(window=window_size, min_periods=1).mean()
        features[f'{col}_std'] = df[col].rolling(window=window_size, min_periods=1).std()
        features[f'{col}_min'] = df[col].rolling(window=window_size, min_periods=1).min()
        features[f'{col}_max'] = df[col].rolling(window=window_size, min_periods=1).max()
        
        # Autocorrelation at lag 1
        features[f'{col}_autocorr'] = df[col].rolling(window=window_size, min_periods=2).apply(
            lambda x: x.autocorr(lag=1) if len(x) > 1 else np.nan
        )
    
    # Add metadata
    features['timestamp'] = df['timestamp']
    features['subject_id'] = df['subject_id']
    features['session'] = df['session']
    
    return features

## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit and evaluate ARIMA models.

In [None]:
def build_arima_model(series, order=(1,1,1), output_dir='plots'):
    """Build and evaluate an ARIMA model for time series data.
    
    Parameters:
    -----------
    series : pandas.Series
        Input time series data
    order : tuple, optional
        ARIMA model order (p,d,q) (default: (1,1,1))
    output_dir : str, optional
        Directory to save diagnostic plots (default: 'plots')
        
    Returns:
    --------
    statsmodels.tsa.arima.model.ARIMAResults
        Fitted ARIMA model
    """
    # Import required libraries
    import os
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    from statsmodels.tsa.arima.model import ARIMA
    from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
    from scipy import stats
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Fit ARIMA model
    model = ARIMA(series, order=order)
    results = model.fit()
    
    # Create diagnostic plots
    fig, axes = plt.subplots(2, 1, figsize=(12, 8))
    
    # Plot 1: Original vs Fitted
    axes[0].plot(series.index, series.values, 'b-', label='Original')
    axes[0].plot(series.index, results.fittedvalues, 'r--', label='Fitted')
    axes[0].set_title('Original vs Fitted Values')
    axes[0].legend()
    axes[0].grid(True)
    
    # Plot 2: Residuals
    residuals = results.resid
    axes[1].plot(series.index, residuals, 'g-')
    axes[1].axhline(y=0, color='r', linestyle='--')
    axes[1].set_title('Model Residuals')
    axes[1].grid(True)
    
    # Save diagnostic plots
    plt.tight_layout()
    plt.savefig(f'{output_dir}/arima_diagnostics.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    # Create residual analysis plots
    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    
    # Histogram of residuals
    sns.histplot(residuals, kde=True, ax=axes[0,0])
    axes[0,0].set_title('Residuals Distribution')
    
    # Q-Q plot
    stats.probplot(residuals, dist="norm", plot=axes[0,1])
    axes[0,1].set_title('Q-Q Plot')
    
    # ACF plot
    plot_acf(residuals, ax=axes[1,0], lags=40)
    axes[1,0].set_title('ACF of Residuals')
    
    # PACF plot
    plot_pacf(residuals, ax=axes[1,1], lags=40)
    axes[1,1].set_title('PACF of Residuals')
    
    # Save residual analysis plots
    plt.tight_layout()
    plt.savefig(f'{output_dir}/arima_residuals.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    return results

## Example Usage

Here's how to use the implemented functions:

In [None]:
# Load preprocessed data
from part1_exploration import load_data, preprocess_data

# Load and preprocess data
data = load_data()
processed_data = preprocess_data(data)

# Extract time series features
features = extract_time_series_features(processed_data)
print("Extracted features shape:", features.shape)

# Build ARIMA model for heart rate data
# Select data for one subject and session
subject_data = processed_data[
    (processed_data['subject_id'] == processed_data['subject_id'].iloc[0]) &
    (processed_data['session'] == processed_data['session'].iloc[0])
]

# Set timestamp as index
subject_data.set_index('timestamp', inplace=True)

# Fit ARIMA model
model = build_arima_model(subject_data['heart_rate'])
print("\nARIMA model summary:")
print(model.summary())