# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn')
%matplotlib inline

## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [8]:
def extract_time_series_features(data, window_size=60):
    result_data = data.copy()
    
    # signals to extract features from
    signals = ['heart_rate', 'eda', 'temperature']

    for signal in signals:
        if signal in data.columns:
            rolling = data[signal].rolling(window=window_size)
            
            # extract features
            result_data[f'{signal}_mean'] = rolling.mean()
            result_data[f'{signal}_std'] = rolling.std()
            result_data[f'{signal}_min'] = rolling.min()
            result_data[f'{signal}_max'] = rolling.max()
            
            # calculate autocorrelation at lag 1
            shifted = data[signal].shift(1)
            
            # calculate rolling correlation between the signal and its lag
            rolling_auto = data[signal].rolling(window=window_size).apply(
                lambda x: np.corrcoef(x[:-1], x[1:])[0, 1] if len(x) > 1 else np.nan
            )
            
            result_data[f'{signal}_autocorr_lag1'] = rolling_auto
    
    # drop the initial NaN values
    result_data = result_data.dropna()
    
    return result_data

## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [9]:
def build_arima_model(series, subject_id=None, session=None, order=(1,1,1), output_dir='plots', forecast_steps=30):
    # create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # convert series to dataframe if it's not already
    if isinstance(series, pd.Series):
        series_name = series.name if series.name else 'value'
        data = series.reset_index()
        data.columns = ['timestamp' if col == 'index' else col for col in data.columns]
    else:
        raise ValueError("Input must be a pandas Series with datetime index")
    
    data = data.sort_values('timestamp')
    
    # set timestamp as index for ARIMA modeling
    data = data.set_index('timestamp')
    
    # fit ARIMA model
    model = ARIMA(data, order=order)
    model_fit = model.fit()
    print(f"ARIMA{order} model summary:")
    print(model_fit.summary())
    
    # generate in-sample predictions
    predictions = model_fit.predict(typ='levels')
    
    # generate forecasts
    forecasts = model_fit.forecast(steps=forecast_steps)
    forecast_index = pd.date_range(
        start=data.index[-1], 
        periods=forecast_steps+1, 
        freq=pd.infer_freq(data.index) or 'S'
    )[1:]  

    if subject_id and session:
        base_filename = f"{subject_id}_{session}_{series_name}_arima"
    else:
        base_filename = f"{series_name}_arima"
    
    # generate diagnostic plots
    # model fit plot
    fig_fit, ax_fit = plt.subplots(figsize=(12, 6))
    ax_fit.plot(data.index, data, 'b-', label='Observed')
    ax_fit.plot(predictions.index, predictions, 'r--', label='Fitted')
    ax_fit.set_title(f'ARIMA{order} Model Fit - {series_name}')
    ax_fit.set_xlabel('Time')
    ax_fit.set_ylabel(series_name)
    ax_fit.legend()
    ax_fit.grid(True)
    
    fit_path = f"{output_dir}/{base_filename}_fit.png"
    fig_fit.savefig(fit_path, dpi=300, bbox_inches='tight')
    plt.close(fig_fit)
    print(f"Fit plot saved to {fit_path}")
    
    # residuals plot
    fig_resid, ax_resid = plt.subplots(figsize=(12, 6))
    residuals = model_fit.resid
    ax_resid.plot(residuals.index, residuals, 'k-')
    ax_resid.axhline(y=0, color='r', linestyle='-')
    ax_resid.set_title(f'ARIMA{order} Residuals - {series_name}')
    ax_resid.set_xlabel('Time')
    ax_resid.set_ylabel('Residuals')
    ax_resid.grid(True)
    
    # histogram of residuals
    inset_ax = ax_resid.inset_axes([0.65, 0.05, 0.3, 0.3])
    inset_ax.hist(residuals, bins=20, color='skyblue', alpha=0.7)
    inset_ax.axvline(x=0, color='r', linestyle='-')
    inset_ax.set_title('Residual Distribution')
    
    resid_path = f"{output_dir}/{base_filename}_residuals.png"
    fig_resid.savefig(resid_path, dpi=300, bbox_inches='tight')
    plt.close(fig_resid)
    print(f"Residuals plot saved to {resid_path}")
    
    # Forecast plot
    fig_forecast, ax_forecast = plt.subplots(figsize=(12, 6))
    ax_forecast.plot(data.index, data, 'b-', label='Historical Data')
    ax_forecast.plot(predictions.index, predictions, 'r--', label='Fitted')
    ax_forecast.plot(forecast_index, forecasts, 'g--', label='Forecast')

    confidence_intervals = pd.DataFrame(
        model_fit.get_forecast(steps=forecast_steps).conf_int(),
        index=forecast_index
    )
    ax_forecast.fill_between(
        forecast_index,
        confidence_intervals.iloc[:, 0], 
        confidence_intervals.iloc[:, 1],
        alpha=0.2, color='green'
    )
    
    ax_forecast.set_title(f'ARIMA{order} Forecast - {series_name}')
    ax_forecast.set_xlabel('Time')
    ax_forecast.set_ylabel(series_name)
    ax_forecast.legend()
    ax_forecast.grid(True)
    
    forecast_path = f"{output_dir}/{base_filename}_forecast.png"
    fig_forecast.savefig(forecast_path, dpi=300, bbox_inches='tight')
    plt.close(fig_forecast)
    print(f"Forecast plot saved to {forecast_path}")
    
    results_path = f"{output_dir}/{base_filename}_results.txt"
    with open(results_path, 'w') as f:
        f.write(str(model_fit.summary()))
    print(f"Model results saved to {results_path}")
    
    return model_fit