# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn-v0_8')
%matplotlib inline

def load_processed_data(input_dir='data/processed'):
    all_subject_data = []
    input_dir_path = os.path.join(os.getcwd(), input_dir)
    print(f"Input directory path: {input_dir_path}")
    input_dir_path_obj = Path(input_dir_path)
    if not input_dir_path_obj.is_dir():
        print(f"Error: Input directory not found: {input_dir_path}")
        return pd.DataFrame()
    for file_path in input_dir_path_obj.glob('*_processed.csv'):
        subject_id = file_path.stem.replace('_processed', '')
        print(f"Loading data for subject: {subject_id}")
        subject_data = pd.read_csv(file_path, index_col=0)
        subject_data['subject_id'] = subject_id
        all_subject_data.append(subject_data)
    if all_subject_data:
        combined_data = pd.concat(all_subject_data, ignore_index=False)
    else:
        combined_data = pd.DataFrame()
    return combined_data

data = load_processed_data()
# print(loaded_data.head())


Input directory path: /Users/macbook/Documents/ucsf_couses/ds223/assignments/4-it-s-about-time-Exynos-8890/data/processed
Loading data for subject: S1
Loading data for subject: S5
Loading data for subject: S2
Loading data for subject: S6
Loading data for subject: S8
Loading data for subject: S9
Loading data for subject: S10
Loading data for subject: S7
Loading data for subject: S3
Loading data for subject: S4


## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [2]:
def extract_time_series_features(data, window_size=60):
    """Extract rolling window features from time series data.
    
    Parameters
    ----------
    data : pd.DataFrame
        Preprocessed physiological data
    window_size : int
        Size of the rolling window in seconds
        
    Returns
    -------
    pd.DataFrame
        DataFrame containing extracted features for each signal
    """
    # Your code here
    # 1. Calculate rolling window statistics
    # 2. Include mean, std, min, max, and autocorrelation
    return_df = pd.DataFrame()
    # return_df['mean'] = data.groupby(['subject_id', 'session']).rolling(window=window_size).mean()
    data_grouped = data.groupby(['subject_id', 'session'])
    signals = ['eda','heart_rate','temperature']
    for signal in signals:
        rolling_signal = data_grouped[signal].rolling(window=window_size)
        return_df[f'{signal}_mean'] = rolling_signal.mean()
        return_df[f'{signal}_std'] = rolling_signal.std()
        return_df[f'{signal}_min'] = rolling_signal.min()
        return_df[f'{signal}_max'] = rolling_signal.max()
        return_df[f'{signal}_autocorr'] = rolling_signal.apply(lambda x: x.autocorr(lag=1))
    # remove nas
    return return_df.reset_index().dropna()

featrues = extract_time_series_features(data)

## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [None]:
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error
def build_arima_model(series, order=(1,1,1), output_dir='plots'):
    """Fit an ARIMA model to the time series and generate diagnostic plots.
    Parameters
    ----------
    series : pd.Series
        Time series data to model
    order : tuple
        (p,d,q) order of the ARIMA model
    output_dir : str
        Directory to save diagnostic plots
        
    Returns
    -------
    statsmodels.tsa.arima.model.ARIMAResults
        Fitted ARIMA model
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    # Your code here
    # 1. Fit ARIMA model
    model = ARIMA(series, order=order)
    model_fit = model.fit()
    # 2. Generate diagnostic plots:
    #    - Model fit plot
    model_fit.plot_diagnostics(figsize=(15, 12))
    plt.savefig(os.path.join(output_dir, 'arima_diagnostic_plots.png'))
    #    - Residuals plot
    residuals = model_fit.resid
    plt.figure(figsize=(10, 6))
    plt.plot(residuals)
    plt.title('Residuals of ARIMA Model')
    plt.xlabel('Time')
    plt.ylabel('Residuals')
    plt.savefig(os.path.join(output_dir, 'arima_residuals_plot.png'))
    #    - Forecast plot
    # forecast = model_fit.get_forecast(steps=10)
    # forecast_index = pd.date_range(start=series.index[-1], periods=11, freq='S')[1:]
    # forecast_series = pd.Series(forecast.predicted_mean, index=forecast_index)
    # plt.figure(figsize=(10, 6))
    # plt.plot(series, label='Observed')
    # plt.plot(forecast_series, label='Forecast', color='red')
    # plt.fill_between(forecast_index, forecast.conf_int().iloc[:, 0], forecast.conf_int().iloc[:, 1], color='pink', alpha=0.5)
    # plt.title('ARIMA Forecast')
    # plt.xlabel('Time')
    # plt.ylabel('Value')
    # plt.legend()
    # plt.savefig(os.path.join(output_dir, 'arima_forecast_plot.png'))
    # 3. Save plots to output directory

    return model_fit
    




In [None]:

# slice a time series to test
series_test = data[(data['subject_id'] == 'S1') & (data['session'] == 'Final')]
series_test = series_test['eda']
# Display the first few rows to verify
print(series_test.head())
from statsmodels.tsa.stattools import adfuller
def check_stationarity(series):
    result = adfuller(series)
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'   {key}: {value}')

check_stationarity(series_test)
build_arima_model(series_test)