# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [3]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [5]:
def extract_time_series_features(data, window_size=60):
    """Extract rolling window features from time series data.
    
    Parameters
    ----------
    data : pd.DataFrame
        Preprocessed physiological data
    window_size : int
        Size of the rolling window in seconds
        
    Returns
    -------
    pd.DataFrame
        DataFrame containing extracted features for each signal
    """
    # Your code here
    # 1. Calculate rolling window statistics
    # 2. Include mean, std, min, max, and autocorrelation

    data = data.sort_values('timestamp')
    result = data[['timestamp', 'subject_id', 'session']].copy()

    for col in ['heart_rate', 'eda', 'temperature']:
        result[f'{col}_mean'] = data[col].rolling(window_size).mean()
        result[f'{col}_std'] = data[col].rolling(window_size).std()
        result[f'{col}_min'] = data[col].rolling(window_size).min()
        result[f'{col}_max'] = data[col].rolling(window_size).max()
        result[f'{col}_acf1'] = data[col].rolling(window_size).apply(lambda x: x.autocorr(lag=1), raw=False)

    return result
    
    pass

In [7]:
import pandas as pd

data_list = []

for i in range(1, 11):
    filename = f'data/processed/S{i}_processed.csv'
    df = pd.read_csv(filename, parse_dates=['timestamp']) 
    data_list.append(df) 

preprocessed_df = pd.concat(data_list, ignore_index=True)

extracted_df = extract_time_series_features(preprocessed_df)
extracted_df

Unnamed: 0,timestamp,subject_id,session,heart_rate_mean,heart_rate_std,heart_rate_min,heart_rate_max,heart_rate_acf1,eda_mean,eda_std,eda_min,eda_max,eda_acf1,temperature_mean,temperature_std,temperature_min,temperature_max,temperature_acf1
198385,2018-10-13 12:55:31,S5,midterm_1,,,,,,,,,,,,,,,
198386,2018-10-13 12:55:32,S5,midterm_1,,,,,,,,,,,,,,,
198387,2018-10-13 12:55:33,S5,midterm_1,,,,,,,,,,,,,,,
198388,2018-10-13 12:55:34,S5,midterm_1,,,,,,,,,,,,,,,
413305,2018-10-13 12:55:35,S10,midterm_1,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120781,2018-12-05 23:39:11,S3,Final,103.219333,2.533562,95.82,105.87,0.975427,0.000085,0.000322,0.0,0.001281,-0.072727,23.032000,0.492420,22.81,26.23,0.445025
120782,2018-12-05 23:39:12,S3,Final,103.072167,2.755993,94.80,105.87,0.979171,0.000085,0.000322,0.0,0.001281,-0.072727,23.038000,0.492606,22.81,26.23,0.445179
120783,2018-12-05 23:39:13,S3,Final,102.932333,2.954091,94.80,105.87,0.981885,0.000085,0.000322,0.0,0.001281,-0.072727,23.044333,0.492590,22.81,26.23,0.445144
120784,2018-12-05 23:39:14,S3,Final,102.769333,3.170129,93.97,105.87,0.984288,0.000085,0.000322,0.0,0.001281,-0.072727,23.050667,0.492492,22.81,26.23,0.444919


## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [1]:
def build_arima_model(series, order=(1,1,1), output_dir='plots'):
    """Fit an ARIMA model to the time series and generate diagnostic plots.
    
    Parameters
    ----------
    series : pd.Series
        Time series data to model
    order : tuple
        (p,d,q) order of the ARIMA model
    output_dir : str
        Directory to save diagnostic plots
        
    Returns
    -------
    statsmodels.tsa.arima.model.ARIMAResults
        Fitted ARIMA model
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # 1. Fit ARIMA model
    model = ARIMA(series, order=order)
    fitted = model.fit()

    # 2. Generate diagnostic plots:
    fig, axes = plt.subplots(3, 1, figsize=(12, 10))

    # Model fit plot
    axes[0].plot(series, label='Original')
    axes[0].plot(fitted.fittedvalues, color='red', label='Fitted')
    axes[0].set_title('ARIMA Model Fit')
    axes[0].legend()

    # Residuals plot
    axes[1].plot(fitted.resid, color='gray')
    axes[1].set_title('Residuals')

    # Forecast plot
    forecast = fitted.forecast(steps=30)
    future_index = pd.date_range(start=series.index[-1], periods=30, freq='S')
    axes[2].plot(series, label='Original')
    axes[2].plot(future_index, forecast, color='green', label='Forecast')
    axes[2].set_title('Forecast (30 steps)')
    axes[2].legend()

    # 3. Save plots to output directory
    plt.tight_layout()
    plot_path = os.path.join(output_dir, 'arima_diagnostics.png')
    plt.savefig(plot_path)
    plt.close()

    return fitted
    
    pass

In [7]:
import pandas as pd

df = pd.read_csv('data/processed/S3_processed.csv', parse_dates=['timestamp'])
series = df.set_index('timestamp')['heart_rate'].dropna()

model = build_arima_model(series, order=(1,1,1))
model.summary()

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  future_index = pd.date_range(start=series.index[-1], periods=30, freq='S')


0,1,2,3
Dep. Variable:,heart_rate,No. Observations:,47931.0
Model:,"ARIMA(1, 1, 1)",Log Likelihood,-42922.508
Date:,"Wed, 30 Apr 2025",AIC,85851.016
Time:,04:56:39,BIC,85877.349
Sample:,0,HQIC,85859.281
,- 47931,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,0.9613,0.000,2064.072,0.000,0.960,0.962
ma.L1,-0.8197,0.001,-767.595,0.000,-0.822,-0.818
sigma2,0.3510,0.000,1296.391,0.000,0.351,0.352

0,1,2,3
Ljung-Box (L1) (Q):,5634.84,Jarque-Bera (JB):,153588912.12
Prob(Q):,0.0,Prob(JB):,0.0
Heteroskedasticity (H):,0.62,Skew:,-2.16
Prob(H) (two-sided):,0.0,Kurtosis:,280.29
