# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn')
%matplotlib inline

OSError: 'seaborn' is not a valid package style, path of style file, URL of style file, or library style name (library styles are listed in `style.available`)

## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [2]:
def extract_time_series_features(data, window_size=60):
    """Extract rolling window features from time series data.
    
    Parameters
    ----------
    data : pd.DataFrame
        Preprocessed physiological data
    window_size : int
        Size of the rolling window in seconds
        
    Returns
    -------
    pd.DataFrame
        DataFrame containing extracted features for each signal
    """
    # Your code here
    # 1. Calculate rolling window statistics
    # 2. Include mean, std, min, max, and autocorrelation
    
    def autocorr(x):
        return x.autocorr(lag=1) if len(x) > 1 else np.nan

    # Ensure timestamp is datetime and sorted
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data = data.sort_values('timestamp')

    results = data[['timestamp', 'subject_id', 'session']].copy()
    signals = ['heart_rate', 'eda', 'temperature']

    for col in signals:
        results[f'{col}_mean'] = data[col].rolling(window=window_size).mean()
        results[f'{col}_sd'] = data[col].rolling(window=window_size).std()
        results[f'{col}_min'] = data[col].rolling(window=window_size).min()
        results[f'{col}_max'] = data[col].rolling(window=window_size).max()
        results[f'{col}_autocor'] = data[col].rolling(window=window_size).apply(autocorr, raw=False)
        
    return results

# Load and combine all data
all_data = []
for i in range(1, 11):
    file_path = f'data/processed/S{i}_processed.csv'
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        df['subject_id'] = f'S{i}'  # Ensure subject_id is consistent
        all_data.append(df)

combined_data = pd.concat(all_data, ignore_index=True)

features_df = extract_time_series_features(combined_data, window_size=60)
print("Features extracted.") #dam take so long

Features extracted.


## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [6]:
test_series = features_df['heart_rate_mean'].dropna().iloc[:300]
test_series.name = 'Test_heart_rate'

build_arima_model(test_series)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


<statsmodels.tsa.arima.model.ARIMAResultsWrapper at 0x726b692b95b0>

In [4]:
def build_arima_model(series, order=(1,1,1), output_dir='plots'):
    """Fit an ARIMA model to the time series and generate diagnostic plots.
    
    Parameters
    ----------
    series : pd.Series
        Time series data to model
    order : tuple
        (p,d,q) order of the ARIMA model
    output_dir : str
        Directory to save diagnostic plots
        
    Returns
    -------
    statsmodels.tsa.arima.model.ARIMAResults
        Fitted ARIMA model
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Your code here
    # 1. Fit ARIMA model
    # 2. Generate diagnostic plots:
    #    - Model fit plot
    #    - Residuals plot
    #    - Forecast plot
    # 3. Save plots to output directory
    
    series = series.dropna()
    name = series.name or "time_series"
    
    # Fit ARIMA 
    model = ARIMA(series, order=order)
    model_fit = model.fit()
    
    # Plot: Actual vs. Fitted
    plt.figure(figsize=(10, 4))
    plt.plot(series, label='Observed')
    plt.plot(model_fit.fittedvalues, label='Fitted', alpha=0.7)
    plt.title(f'{name} - ARIMA Fit')
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'{name}_arima_fit.png'))
    plt.close()

    # Plot: Residuals
    residuals = model_fit.resid
    plt.figure(figsize=(10, 4))
    plt.plot(residuals, label='Residuals')
    plt.axhline(0, color='gray', linestyle='--')
    plt.title(f'{name} - ARIMA Residuals')
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f'{name}_arima_residuals.png'))
    plt.close()

    return model_fit

#series = features_df['heart_rate_mean'].dropna()
#series.name = 'S5_Midterm2_heart_rate' 
#model = build_arima_model(series)