# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [2]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
sns.set_theme(style='darkgrid')  
%matplotlib inline

## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [None]:
def build_arima_model(series, order=(1, 1, 1), output_dir='plots', label='arima'):
    """Fit an ARIMA model to the time series and generate diagnostic plots."""
    os.makedirs(output_dir, exist_ok=True)

    # Fit the model
    model = ARIMA(series, order=order)
    model_fit = model.fit()

    # plot 1
    plt.figure(figsize=(12, 4))
    plt.plot(series, label='Original', color='blue')
    plt.plot(model_fit.fittedvalues, label='Fitted', color='orange')
    plt.title('ARIMA Model Fit')
    plt.legend()
    fit_path = os.path.join(output_dir, f'{label}_arima_fit.png')
    plt.savefig(fit_path)
    plt.close()

    # plot 2
    residuals = model_fit.resid
    plt.figure(figsize=(12, 4))
    plt.plot(residuals, color='purple')
    plt.title('ARIMA Model Residuals')
    plt.axhline(0, color='black', linestyle='--')
    res_path = os.path.join(output_dir, f'{label}_arima_residuals.png')
    plt.savefig(res_path)
    plt.close()

    print(f"Saved plots to:\n- {fit_path}\n- {res_path}")
    return model_fit

## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [None]:
def extract_time_series_features(data, window_size=60):
    """Extract rolling window features from time series data."""
    data = data.sort_values('timestamp').set_index('timestamp')
    features = pd.DataFrame(index=data.index)
    
    for signal in ['heart_rate', 'eda', 'temperature']:
        signal_series = data[signal]

        # Rolling stats
        rolling = signal_series.rolling(f'{window_size}s')

        features[f'{signal}_mean'] = rolling.mean()
        features[f'{signal}_std'] = rolling.std()
        features[f'{signal}_min'] = rolling.min()
        features[f'{signal}_max'] = rolling.max()

        # Rolling autocorrelation (lag 1)
        def rolling_autocorr(x):
            if len(x) < 2:
                return np.nan
            return x.autocorr(lag=1)

        features[f'{signal}_autocorr'] = signal_series.rolling(window=window_size).apply(rolling_autocorr, raw=False)

   
    features = features.reset_index()
    return features


In [None]:
# Step 1: Load the preprocessed data
processed_data = pd.read_csv('data/processed/preprocessed_data.csv')
processed_data['timestamp'] = pd.to_datetime(processed_data['timestamp'])

# Step 2: Extract rolling features for a specific subject and session
subject_id = 'S1'
session = 'Midterm 1'
window_size = 60  

subject_data = processed_data[(processed_data['subject_id'] == subject_id) & 
                              (processed_data['session'] == session)]

features = extract_time_series_features(subject_data, window_size)

# Step 3: Fit ARIMA model to the heart rate time series
series = subject_data['heart_rate'].dropna()
series.index = subject_data['timestamp']

# Use the ARIMA model to analyze the heart rate time series
model_fit = build_arima_model(series, order=(1, 1, 1), output_dir='plots', label=f'{subject_id}_{session}_heart_rate')

print("Model and features extracted successfully.")


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Saved plots to:
- plots/S1_Midterm 1_heart_rate_arima_fit.png
- plots/S1_Midterm 1_heart_rate_arima_residuals.png
Model and features extracted successfully.
