# Time Series Machine Learning

This notebook demonstrates:
- Converting time series to supervised learning
- Feature engineering (lags, rolling, calendar, Fourier)
- TimeSeriesSplit cross-validation
- Tree-based forecasting (GradientBoosting)
- Walk-forward backtesting
- Time series classification

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, classification_report

np.random.seed(42)
print('Libraries loaded')

## 1. Generate Time Series Data

In [None]:
n_days = 730
dates = pd.date_range('2022-01-01', periods=n_days, freq='D')
trend = np.linspace(100, 150, n_days)
weekly = 15 * np.sin(2 * np.pi * np.arange(n_days) / 7)
yearly = 30 * np.sin(2 * np.pi * np.arange(n_days) / 365.25)
noise = np.random.normal(0, 5, n_days)
sales = trend + weekly + yearly + noise

df = pd.DataFrame({'date': dates, 'sales': sales})

plt.figure(figsize=(14, 4))
plt.plot(df['date'], df['sales'], linewidth=0.8)
plt.title('Daily Sales (2 years)')
plt.xlabel('Date'); plt.ylabel('Sales')
plt.tight_layout()
plt.show()

## 2. Feature Engineering

In [None]:
# Lag features
for lag in [1, 7, 14, 28, 365]:
    df[f'lag_{lag}'] = df['sales'].shift(lag)

# Why: shift(1) before rolling prevents target leakage — each row's rolling statistic
# must use only past data, not include the current day's actual sales value.
for w in [7, 14, 30]:
    shifted = df['sales'].shift(1)
    df[f'roll_mean_{w}'] = shifted.rolling(w).mean()
    df[f'roll_std_{w}'] = shifted.rolling(w).std()

# Calendar features
df['dow'] = df['date'].dt.dayofweek
df['month'] = df['date'].dt.month
df['is_weekend'] = df['date'].dt.dayofweek.isin([5, 6]).astype(int)

# Why: Cyclical sin/cos encoding ensures the model treats day 6 (Sunday) and day 0
# (Monday) as neighbors, and December and January as adjacent months.
df['dow_sin'] = np.sin(2 * np.pi * df['dow'] / 7)
df['dow_cos'] = np.cos(2 * np.pi * df['dow'] / 7)
df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

# Why: Multiple Fourier harmonics (k=1,2,3) capture both the fundamental yearly cycle
# and its sharper sub-patterns (e.g., holiday season peaks), providing richer seasonal
# representation than a single sine wave.
t = np.arange(len(df))
for k in range(1, 4):
    df[f'year_sin_{k}'] = np.sin(2 * np.pi * k * t / 365.25)
    df[f'year_cos_{k}'] = np.cos(2 * np.pi * k * t / 365.25)

print(f'Features created: {len(df.columns) - 2}')
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
print(f'Rows after dropping NaN: {len(df)}')

## 3. TimeSeriesSplit Cross-Validation

In [None]:
feat_cols = [c for c in df.columns if c not in ['date', 'sales']]
X = df[feat_cols]
y = df['sales']

# Why: TimeSeriesSplit respects temporal ordering — each fold uses only past data for
# training and future data for validation. Standard KFold would leak future information
# into training, producing overly optimistic scores that don't reflect real forecasting.
tscv = TimeSeriesSplit(n_splits=5)
gb = GradientBoostingRegressor(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42)
scores = cross_val_score(gb, X, y, cv=tscv, scoring='neg_mean_absolute_error')
print(f'TimeSeriesSplit MAE: {-scores.mean():.2f} (+/- {scores.std():.2f})')

## 4. Train and Evaluate

In [None]:
split = len(X) - 60
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y.iloc[:split], y.iloc[split:]
dates_test = df['date'].iloc[split:]

gb.fit(X_train, y_train)
y_pred = gb.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print(f'MAE: {mae:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.1f}%')

fig, ax = plt.subplots(figsize=(14, 5))
ax.plot(dates_test, y_test, 'b-', label='Actual', lw=2)
ax.plot(dates_test, y_pred, 'r--', label='Predicted', lw=2)
ax.set_title(f'GB Forecast (MAE={mae:.2f})')
ax.legend(); plt.tight_layout(); plt.show()

In [None]:
# Feature importance
imp = pd.Series(gb.feature_importances_, index=feat_cols).nlargest(15)
imp.sort_values().plot(kind='barh', figsize=(10, 6), color='steelblue')
plt.title('Top 15 Features'); plt.xlabel('Importance')
plt.tight_layout(); plt.show()

## 5. Time Series Classification

In [None]:
from sklearn.model_selection import train_test_split

n_series, length = 300, 100
X_ts, y_ts = [], []
for _ in range(n_series):
    label = np.random.choice([0, 1, 2])
    t = np.arange(length)
    if label == 0: s = np.cumsum(np.random.normal(0, 1, length))
    elif label == 1: s = 0.5 * t + np.random.normal(0, 3, length)
    else: s = 10 * np.sin(2 * np.pi * t / 20) + np.random.normal(0, 1, length)
    X_ts.append(s); y_ts.append(label)

X_ts = np.array(X_ts); y_ts = np.array(y_ts)

# Why: Hand-crafted statistical features (trend slope, autocorrelation, zero-crossing
# rate, skewness, kurtosis) convert variable-length time series into a fixed-size
# feature vector that standard classifiers can consume — this is the "feature-based"
# approach to time series classification.
feats = pd.DataFrame([{
    'mean': s.mean(), 'std': s.std(), 'trend': np.polyfit(range(len(s)), s, 1)[0],
    'autocorr': pd.Series(s).autocorr(1), 'crossing': np.mean(np.diff(np.sign(s - s.mean())) != 0),
    'skew': pd.Series(s).skew(), 'kurtosis': pd.Series(s).kurtosis()
} for s in X_ts])

Xtr, Xte, ytr, yte = train_test_split(feats, y_ts, test_size=0.3, random_state=42)
clf = RandomForestClassifier(100, random_state=42)
clf.fit(Xtr, ytr)
print(classification_report(yte, clf.predict(Xte), target_names=['Stationary','Trending','Seasonal']))