# Part 2: Time Series Modeling

In this notebook, you will implement functions to extract features from time series data and build ARIMA models.

In [22]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from pathlib import Path
import os

# Set style for plots
plt.style.use('seaborn')
%matplotlib inline

  plt.style.use('seaborn')


## 1. Feature Extraction

Implement the `extract_time_series_features` function to calculate rolling window features.

In [31]:
def extract_time_series_features(data, window_size=60):
    """
    Extract rolling window features from time series data.

    Parameters
    ----------
    data : pd.DataFrame
        Preprocessed physiological data (must include 'timestamp', 'subject_id', 'session', 'heart_rate', 'eda', 'temperature').
    window_size : int
        Size of the rolling window in seconds.

    Returns
    -------
    pd.DataFrame
        DataFrame containing extracted features for each signal.
    """
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data = data.sort_values(by=['subject_id', 'session', 'timestamp'])

    data = data.set_index('timestamp')

    signal_columns = ['heart_rate', 'eda', 'temperature']
    
    features = []

    grouped = data.groupby(['subject_id', 'session'])

    for (subject_id, session), group in grouped:
        rolled = group[signal_columns].rolling(f'{window_size}s', min_periods=1)
        feature_df = rolled.agg(['mean', 'std', 'min', 'max'])
        feature_df.columns = ['_'.join(col) for col in feature_df.columns]
        feature_df['subject_id'] = subject_id
        feature_df['session'] = session
        features.append(feature_df.reset_index())

    result = pd.concat(features, ignore_index=True)
    return result



In [None]:
# import pandas as pd
# from pathlib import Path

# data_dir = Path('/Users/hteshome/Desktop/4-it-s-about-time-haile-teshome/processed_data/')
# all_files = list(data_dir.glob("S*_processed.csv"))
# dataframes = [pd.read_csv(file) for file in all_files]
# preprocessed_data = pd.concat(dataframes, ignore_index=True)
# preprocessed_data['timestamp'] = pd.to_datetime(preprocessed_data['timestamp'])
# time_domain_df = extract_time_series_features(preprocessed_data, window_size=60)
# time_domain_df.head()


Unnamed: 0,timestamp,heart_rate_mean,heart_rate_std,heart_rate_min,heart_rate_max,eda_mean,eda_std,eda_min,eda_max,temperature_mean,temperature_std,temperature_min,temperature_max,subject_id,session
0,2018-12-05 16:29:07,116.0,,116.0,116.0,0.0,,0.0,0.0,21.89,,21.89,21.89,S1,Final
1,2018-12-05 16:29:08,99.25,23.688077,82.5,116.0,0.002563,0.003624,0.0,0.005125,21.89,0.0,21.89,21.89,S1,Final
2,2018-12-05 16:29:09,98.276667,16.834626,82.5,116.0,0.008542,0.010669,0.0,0.020501,21.89,0.0,21.89,21.89,S1,Final
3,2018-12-05 16:29:10,95.27,15.00322,82.5,116.0,0.011852,0.010942,0.0,0.021783,21.89,0.0,21.89,21.89,S1,Final
4,2018-12-05 16:29:11,95.936,13.078235,82.5,116.0,0.014095,0.01072,0.0,0.023064,21.89,0.0,21.89,21.89,S1,Final


## 2. ARIMA Modeling

Implement the `build_arima_model` function to fit ARIMA models and generate diagnostic plots.

In [33]:
def build_arima_model(series, order=(1,1,1), output_dir='plots'):
    """
    Build and fit an ARIMA model on a univariate time series.

    Args:
        series (pd.Series): The input time series data (e.g., heart_rate)
        order (tuple): ARIMA order (p,d,q)
        output_dir (str): Directory to save diagnostic plots

    Returns:
        model_fit (ARIMAResultsWrapper): Fitted ARIMA model object
    """
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)
    model = ARIMA(series, order=order)
    model_fit = model.fit()
    fig = model_fit.plot_diagnostics(figsize=(10, 6))
    plot_file = output_path / 'arima_diagnostics.png'
    fig.savefig(plot_file)
    plt.close(fig)

    return model_fit



In [34]:
# series = features_df['heart_rate_mean'].dropna()
# model = build_arima_model(series, order=(1, 1, 1), output_dir='plots')



  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
