# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn-v0_8')
%matplotlib inline

## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [6]:
import pandas as pd
import numpy as np

def extract_time_series_features(data, window_size=60):
    """
    Extract rolling window features from time series data.

    Parameters
    ----------
    data : pd.DataFrame
        Preprocessed physiological data (must include 'timestamp', 'heart_rate', 'eda', 'temperature')
    window_size : int
        Size of the rolling window in seconds

    Returns
    -------
    pd.DataFrame
        DataFrame containing extracted features for each signal
    """
    import pandas as pd
    import numpy as np

    # Ensure timestamp is datetime and set as index
    df = data.copy()
    if not pd.api.types.is_datetime64_any_dtype(df['timestamp']):
        df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.set_index('timestamp').sort_index()

    features = []
    signals = ['heart_rate', 'eda', 'temperature']

    for signal in signals:
        if signal not in df.columns:
            continue
        # Rolling window
        roll = df[signal].rolling(f'{window_size}s')

        # Compute features
        feat = pd.DataFrame({
            f'{signal}_mean': roll.mean(),
            f'{signal}_std': roll.std(),
            f'{signal}_min': roll.min(),
            f'{signal}_max': roll.max(),
            f'{signal}_autocorr1': roll.apply(lambda x: x.autocorr(lag=1) if x.count() > 1 else np.nan)
        })
        features.append(feat)

    # Concatenate features for all signals
    features_df = pd.concat(features, axis=1)
    features_df = features_df.dropna().reset_index()

    return features_df

## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pywt
import os
import glob
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.arima.model import ARIMA


def build_arima_model(series, order=(1,1,1), output_dir='plots'):
    """
    Split series into train/test, fit ARIMA model, predict, compute RMSE/MAE, 
    and plot actual vs. predicted for the test set.
    
    Parameters
    ----------
    series : pd.Series
        Time series data to model
    order : tuple
        (p,d,q) order of the ARIMA model
    output_dir : str
        Directory to save diagnostic plots
        
    Returns
    -------
    statsmodels.tsa.arima.model.ARIMAResults
        Fitted ARIMA model (to match your original function signature)
    """
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Split series into train/test (80/20 split)
    split_point = int(len(series) * 0.8)
    train_series = series[:split_point]
    test_series = series[split_point:]
    
    print(f"Train size: {len(train_series)}, Test size: {len(test_series)}")
    
    # Fit ARIMA model on training data
    model = ARIMA(train_series, order=order)
    fit = model.fit()
    
    # Make predictions on test set
    predictions = fit.get_forecast(steps=len(test_series))
    predicted_values = predictions.predicted_mean
    
    # Compute RMSE and MAE
    rmse = np.sqrt(mean_squared_error(test_series, predicted_values))
    mae = mean_absolute_error(test_series, predicted_values)
    
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    
    # Model fit plot (training data)
    plt.figure(figsize=(12, 5))
    plt.plot(train_series, label='Training Data')
    plt.plot(fit.fittedvalues, label='Fitted Values', alpha=0.7)
    plt.title('ARIMA Model Fit on Training Data')
    plt.legend()
    fit_plot_path = os.path.join(output_dir, 'arima_fit.png')
    plt.savefig(fit_plot_path)
    plt.close()
    
    # Residuals plot
    plt.figure(figsize=(12, 4))
    plt.plot(fit.resid)
    plt.title('ARIMA Model Residuals')
    plt.xlabel('Time')
    plt.ylabel('Residuals')
    resid_plot_path = os.path.join(output_dir, 'arima_residuals.png')
    plt.savefig(resid_plot_path)
    plt.close()
    
    # Plot actual vs. predicted for test set
    plt.figure(figsize=(12, 6))
    
    # Plot full series for context
    plt.plot(series.index, series.values, label='Full Series', alpha=0.5, color='gray')
    
    # Highlight train/test split
    plt.plot(train_series.index, train_series.values, label='Training Data', color='blue')
    plt.plot(test_series.index, test_series.values, label='Actual Test Data', color='green', linewidth=2)
    plt.plot(test_series.index, predicted_values, label='Predicted Test Data', color='red', linewidth=2, linestyle='--')
    
    # Add confidence intervals for predictions
    conf_int = predictions.conf_int()
    plt.fill_between(test_series.index,
                     conf_int.iloc[:, 0],
                     conf_int.iloc[:, 1],
                     color='pink', alpha=0.3, label='95% Confidence Interval')
    
    plt.axvline(x=train_series.index[-1], color='black', linestyle=':', alpha=0.7, label='Train/Test Split')
    plt.title(f'ARIMA Actual vs. Predicted (Test Set)\nRMSE: {rmse:.4f}, MAE: {mae:.4f}')
    plt.xlabel('Time')
    plt.ylabel('Value')
    plt.legend()
    plt.grid(True, alpha=0.3)
    
    actual_vs_pred_path = os.path.join(output_dir, 'arima_actual_vs_predicted.png')
    plt.savefig(actual_vs_pred_path)
    plt.close()
    
    return model

In [4]:
import pandas as pd
import glob
import os
import statsmodels.api as sm
import numpy as np

# Load all processed CSVs and concatenate
files = glob.glob('data/processed/*_processed.csv')
dfs = [pd.read_csv(f) for f in files]
processed_data = pd.concat(dfs, ignore_index=True)

In [13]:
signals = ['heart_rate', 'eda', 'temperature']
subjects = processed_data['subject_id'].unique()
sessions = processed_data['session'].unique()

for signal in signals:
    for subject in subjects:
        for session in sessions:
            mask = (processed_data['subject_id'] == subject) & (processed_data['session'] == session)
            series = processed_data.loc[mask, signal].dropna()
            if len(series) < 10:  # skip if too little data
                continue
            print(f"Fitting ARIMA for {signal}, {subject}, {session}")
            # You can customize the filename to include subject/session/signal
            model = build_arima_model(
                series,
                order=(1,1,1),
                output_dir=f'plots/{subject}_{session}_{signal}'
            )

Fitting ARIMA for heart_rate, S1, Midterm 2
Fitting ARIMA for heart_rate, S5, Midterm 2


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for heart_rate, S2, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for heart_rate, S6, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for heart_rate, S8, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for heart_rate, S9, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for heart_rate, S10, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for heart_rate, S7, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'


Fitting ARIMA for heart_rate, S3, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for heart_rate, S4, Midterm 2


  return get_prediction_index(
  return get_prediction_index(


Fitting ARIMA for eda, S1, Midterm 2
Fitting ARIMA for eda, S5, Midterm 2


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for eda, S2, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for eda, S6, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for eda, S8, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for eda, S9, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


Fitting ARIMA for eda, S10, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for eda, S7, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for eda, S3, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for eda, S4, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Fitting ARIMA for temperature, S1, Midterm 2
Fitting ARIMA for temperature, S5, Midterm 2


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for temperature, S2, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for temperature, S6, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for temperature, S8, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


Fitting ARIMA for temperature, S9, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for temperature, S10, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for temperature, S7, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for temperature, S3, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Fitting ARIMA for temperature, S4, Midterm 2


  return get_prediction_index(
  return get_prediction_index(
