# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [7]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn-v0_8')
%matplotlib inline

## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [None]:
def extract_time_series_features(data, window_size=60):
    """Extract rolling window features from time series data.
    
    Parameters
    ----------
    data : pd.DataFrame
        Preprocessed physiological data
    window_size : int
        Size of the rolling window in seconds
        
    Returns
    -------
    pd.DataFrame
        DataFrame containing extracted features for each signal
    """
      
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data['time_diff'] = data['timestamp'].diff().dt.total_seconds()
    feature_columns = []
    
    # Function to calculate autocorrelation at lag 1
    def autocorrelation(series, lag=1):
        return series.autocorr(lag)
    
    for signal in ['heart_rate', 'eda', 'temperature']:
        rolling = data[signal].rolling(window=window_size, min_periods=1)
        
        data[f'{signal}_mean'] = rolling.mean()
        data[f'{signal}_std'] = rolling.std()
        data[f'{signal}_min'] = rolling.min()
        data[f'{signal}_max'] = rolling.max()
        data[f'{signal}_autocorr_lag1'] = data[signal].rolling(window=window_size, min_periods=1).apply(lambda x: autocorrelation(x, lag=1), raw=False)
        
        feature_columns.extend([f'{signal}_mean', f'{signal}_std', f'{signal}_min', f'{signal}_max', f'{signal}_autocorr_lag1'])

    features = data[feature_columns]
    
    return features
    
    pass

In [None]:
data = pd.read_csv('data/processed/S1_processed.csv')
features_df = extract_time_series_features(data, window_size=60)


## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [None]:
def build_arima_model(series, order=(1,1,1), output_dir='plots', plot_name='arima'):
    """Fit an ARIMA model to the time series and generate diagnostic plots.
    
    Parameters
    ----------
    series : pd.Series
        Time series data to model
    order : tuple
        (p,d,q) order of the ARIMA model
    output_dir : str
        Directory to save diagnostic plots
        
    Returns
    -------
    statsmodels.tsa.arima.model.ARIMAResults
        Fitted ARIMA model
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Fit the ARIMA model
    model = ARIMA(series, order=order)
    fitted_model = model.fit()
    
    # ARIMA fit plot (Actual vs. Predicted)
    plt.figure(figsize=(10, 6))
    plt.plot(series, label='Actual', color='blue')
    plt.plot(fitted_model.fittedvalues, label='Fitted', color='red')
    plt.title(f'{plot_name}_ARIMA_fit')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.legend()
    plt.tight_layout()
    fit_plot_path = os.path.join(output_dir, f'{plot_name}_arima_fit.png')
    plt.savefig(fit_plot_path)
    plt.close()

    # Residuals plot (Residuals vs. Time)
    residuals = fitted_model.resid
    plt.figure(figsize=(10, 6))
    plt.plot(residuals, label='Residuals', color='green')
    plt.title(f'{plot_name}_ARIMA_residuals')
    plt.xlabel('Time')
    plt.ylabel('Residuals')
    plt.legend()
    plt.tight_layout()
    residuals_plot_path = os.path.join(output_dir, f'{plot_name}_arima_residuals.png')
    plt.savefig(residuals_plot_path)
    plt.close()
    
    return fitted_model

In [13]:
series = data['heart_rate']

fitted_model = build_arima_model(series, order=(1,1,1), output_dir='plots', plot_name='S1_Midterm1_heart_rate')
