# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn')
%matplotlib inline

: 

## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [1]:
import os
import pandas as pd
import numpy as np
from scipy import stats, signal
import matplotlib.pyplot as plt

def extract_time_series_features(data, window_size=60):
    """Extract rolling window features from time series data.
    
    Parameters
    ----------
    data : pd.DataFrame
        Preprocessed physiological data
    window_size : int
        Size of the rolling window in seconds
        
    Returns
    -------
    pd.DataFrame
        DataFrame containing extracted features for each signal
    """
    features = []

    signals = ['heart_rate', 'eda', 'temperature']
    for (subject, session), group in data.groupby(['subject_id', 'session']):
        group = group.set_index('timestamp').sort_index()
        window = group[signals].rolling(window = window_size)

        df_features = pd.DataFrame({
            'subject_id': subject,
            'session': session,
            'timestamp': group.index
        })

        for signal in signals:
            df_features[f'{signal}_mean'] = window[signal].mean().values
            df_features[f'{signal}_std'] = window[signal].std().values
            df_features[f'{signal}_min'] = window[signal].min().values
            df_features[f'{signal}_max'] = window[signal].max().values
            df_features[f'{signal}_autocorr_lag1'] = group[signal].rolling(window = window_size).apply(
                lambda x: x.autocorr(lag = 1), raw=False
            ).values

        features.append(df_features)

    features_df = pd.concat(features).reset_index(drop=True)

    return features_df

## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [4]:
import os
import pandas as pd
import numpy as np
from scipy import stats, signal
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

def build_arima_model(series, order = (1,1,1), subject_id='S1', session='Midterm 1', signal='heart_rate', output_dir='plots'):
    """Fit an ARIMA model to the time series and generate diagnostic plots.

    Parameters
    ----------
    series : pd.Series
        Time series data to model
    order : tuple
        (p,d,q) order of the ARIMA model
    subject_id : str
        Identifier of the subject
    session : str
        Name of the session
    signal : str
        Name of the physiological signal
    output_dir : str
        Directory to save diagnostic plots
        
    Returns
    -------
    statsmodels.tsa.arima.model.ARIMAResults
        Fitted ARIMA model
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok = True)

    # Fit ARIMA model
    model = ARIMA(series, order = order)
    model_fit = model.fit()

    # Plot model fit
    plt.figure(figsize = (12,6))
    plt.plot(series, label = 'Original')
    plt.plot(model_fit.fittedvalues, color = 'red', label = 'Fitted')
    plt.title(f'ARIMA Model Fit - {subject_id} {session} {signal}')
    plt.xlabel('Time')
    plt.ylabel(signal)
    plt.legend()
    plt.tight_layout()
    fit_plot_path = os.path.join(output_dir, f'{subject_id}_{session}_{signal}_arima_fit.png')
    plt.savefig(fit_plot_path)
    plt.close()

    # Plot residuals
    plt.figure(figsize = (12,6))
    residuals = model_fit.resid
    plt.plot(residuals)
    plt.title(f'ARIMA Residuals - {subject_id} {session} {signal}')
    plt.xlabel('Time')
    plt.ylabel('Residuals')
    plt.tight_layout()
    resid_plot_path = os.path.join(output_dir, f'{subject_id}_{session}_{signal}_arima_residuals.png')
    plt.savefig(resid_plot_path)
    plt.close()

    # Forecast plot
    forecast_steps = 50
    forecast = model_fit.forecast(steps = forecast_steps)
    plt.figure(figsize = (12,6))
    plt.plot(series, label = 'Original')
    plt.plot(pd.date_range(series.index[-1], periods = forecast_steps + 1, freq = 'S')[1:], forecast, label = 'Forecast', color = 'green')
    plt.title(f'ARIMA Forecast - {subject_id} {session} {signal}')
    plt.xlabel('Time')
    plt.ylabel(signal)
    plt.legend()
    plt.tight_layout()
    forecast_plot_path = os.path.join(output_dir, f'{subject_id}_{session}_{signal}_arima_forecast.png')
    plt.savefig(forecast_plot_path)
    plt.close()

    return model