# TSIOT Forecasting Models Example

This notebook demonstrates how to use TSIOT-generated synthetic data to train and evaluate various forecasting models.

## Models Covered:
1. ARIMA
2. Prophet
3. LSTM
4. GRU
5. Transformer-based models
6. Ensemble methods

In [None]:
# Install required packages
!pip install requests pandas numpy matplotlib seaborn scikit-learn statsmodels prophet tensorflow torch

In [None]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.arima.model import ARIMA
from prophet import Prophet
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (15, 8)

## 1. Generate and Prepare Data

In [None]:
# TSIOT API configuration
TSIOT_BASE_URL = "http://localhost:8080"
API_KEY = "your-api-key-here"

headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}"
}

# Generate time series data
def generate_forecast_data():
    """Generate time series suitable for forecasting."""
    data = {
        "type": "lstm",
        "length": 2000,
        "parameters": {
            "trend": 0.1,
            "seasonality": 24,
            "noise": 0.05,
            "complexity": "high"
        }
    }
    
    response = requests.post(f"{TSIOT_BASE_URL}/api/v1/generate", json=data, headers=headers)
    return response.json() if response.status_code == 200 else None

# Generate data
ts_data = generate_forecast_data()
if ts_data:
    values = np.array(ts_data['values'])
    timestamps = pd.date_range(start='2023-01-01', periods=len(values), freq='H')
    df = pd.DataFrame({'timestamp': timestamps, 'value': values})
    df.set_index('timestamp', inplace=True)
    print(f"✅ Generated {len(df)} data points")
else:
    # Use synthetic data if API is not available
    print("⚠️ Using synthetic fallback data")
    np.random.seed(42)
    t = np.arange(2000)
    trend = 0.1 * t
    seasonal = 10 * np.sin(2 * np.pi * t / 24)
    noise = np.random.normal(0, 2, 2000)
    values = 100 + trend + seasonal + noise
    timestamps = pd.date_range(start='2023-01-01', periods=2000, freq='H')
    df = pd.DataFrame({'timestamp': timestamps, 'value': values})
    df.set_index('timestamp', inplace=True)

# Split data
train_size = int(0.8 * len(df))
train_data = df[:train_size]
test_data = df[train_size:]

print(f"Training data: {len(train_data)} points")
print(f"Test data: {len(test_data)} points")

In [None]:
# Visualize the data
plt.figure(figsize=(15, 6))
plt.plot(train_data.index, train_data['value'], label='Training Data', alpha=0.7)
plt.plot(test_data.index, test_data['value'], label='Test Data', alpha=0.7)
plt.axvline(x=train_data.index[-1], color='red', linestyle='--', alpha=0.5)
plt.title('Time Series Data Split')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 2. ARIMA Model

In [None]:
# ARIMA forecasting
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Check stationarity
def check_stationarity(timeseries):
    result = adfuller(timeseries.dropna())
    print('ADF Statistic:', result[0])
    print('p-value:', result[1])
    print('Critical Values:', result[4])
    
    if result[1] <= 0.05:
        print("✅ Data is stationary")
    else:
        print("❌ Data is non-stationary")

check_stationarity(train_data['value'])

# If non-stationary, difference the data
train_diff = train_data['value'].diff().dropna()

# Plot ACF and PACF
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
plot_acf(train_diff, lags=40, ax=axes[0])
plot_pacf(train_diff, lags=40, ax=axes[1])
plt.tight_layout()
plt.show()

In [None]:
# Fit ARIMA model
print("Fitting ARIMA model...")
arima_model = ARIMA(train_data['value'], order=(2, 1, 2))
arima_fit = arima_model.fit()
print(arima_fit.summary())

# Forecast
arima_forecast = arima_fit.forecast(steps=len(test_data))
arima_forecast = pd.Series(arima_forecast, index=test_data.index)

# Calculate metrics
arima_mse = mean_squared_error(test_data['value'], arima_forecast)
arima_mae = mean_absolute_error(test_data['value'], arima_forecast)
arima_r2 = r2_score(test_data['value'], arima_forecast)

print(f"\nARIMA Performance:")
print(f"MSE: {arima_mse:.4f}")
print(f"MAE: {arima_mae:.4f}")
print(f"R²: {arima_r2:.4f}")

## 3. Prophet Model

In [None]:
# Prepare data for Prophet
prophet_train = train_data.reset_index()
prophet_train.columns = ['ds', 'y']

# Fit Prophet model
print("Fitting Prophet model...")
prophet_model = Prophet(
    daily_seasonality=True,
    yearly_seasonality=False,
    changepoint_prior_scale=0.05
)
prophet_model.fit(prophet_train)

# Make predictions
future = prophet_model.make_future_dataframe(periods=len(test_data), freq='H')
prophet_forecast = prophet_model.predict(future)

# Extract test predictions
prophet_test_pred = prophet_forecast[['ds', 'yhat']].iloc[-len(test_data):]
prophet_test_pred.set_index('ds', inplace=True)

# Calculate metrics
prophet_mse = mean_squared_error(test_data['value'], prophet_test_pred['yhat'])
prophet_mae = mean_absolute_error(test_data['value'], prophet_test_pred['yhat'])
prophet_r2 = r2_score(test_data['value'], prophet_test_pred['yhat'])

print(f"\nProphet Performance:")
print(f"MSE: {prophet_mse:.4f}")
print(f"MAE: {prophet_mae:.4f}")
print(f"R²: {prophet_r2:.4f}")

## 4. LSTM Model

In [None]:
# Prepare data for LSTM
def create_lstm_dataset(data, look_back=24):
    X, y = [], []
    for i in range(look_back, len(data)):
        X.append(data[i-look_back:i])
        y.append(data[i])
    return np.array(X), np.array(y)

# Scale the data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[['value']])

# Split scaled data
scaled_train = scaled_data[:train_size]
scaled_test = scaled_data[train_size-24:]  # Include look_back period

# Create datasets
look_back = 24
X_train, y_train = create_lstm_dataset(scaled_train.flatten(), look_back)
X_test, y_test = create_lstm_dataset(scaled_test.flatten(), look_back)

# Reshape for LSTM [samples, time steps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

In [None]:
# Build LSTM model
lstm_model = Sequential([
    LSTM(50, activation='relu', return_sequences=True, input_shape=(look_back, 1)),
    Dropout(0.2),
    LSTM(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
print(lstm_model.summary())

# Train model
print("\nTraining LSTM model...")
history = lstm_model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=0
)

# Make predictions
lstm_predictions = lstm_model.predict(X_test)
lstm_predictions = scaler.inverse_transform(lstm_predictions)
y_test_inverse = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate metrics
lstm_mse = mean_squared_error(y_test_inverse, lstm_predictions)
lstm_mae = mean_absolute_error(y_test_inverse, lstm_predictions)
lstm_r2 = r2_score(y_test_inverse, lstm_predictions)

print(f"\nLSTM Performance:")
print(f"MSE: {lstm_mse:.4f}")
print(f"MAE: {lstm_mae:.4f}")
print(f"R²: {lstm_r2:.4f}")

## 5. GRU Model

In [None]:
# Build GRU model
gru_model = Sequential([
    GRU(50, activation='relu', return_sequences=True, input_shape=(look_back, 1)),
    Dropout(0.2),
    GRU(50, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

gru_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train model
print("Training GRU model...")
gru_history = gru_model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=0
)

# Make predictions
gru_predictions = gru_model.predict(X_test)
gru_predictions = scaler.inverse_transform(gru_predictions)

# Calculate metrics
gru_mse = mean_squared_error(y_test_inverse, gru_predictions)
gru_mae = mean_absolute_error(y_test_inverse, gru_predictions)
gru_r2 = r2_score(y_test_inverse, gru_predictions)

print(f"\nGRU Performance:")
print(f"MSE: {gru_mse:.4f}")
print(f"MAE: {gru_mae:.4f}")
print(f"R²: {gru_r2:.4f}")

## 6. Ensemble Model

In [None]:
# Create ensemble predictions
def create_ensemble(predictions_dict, weights=None):
    """Create weighted ensemble of predictions."""
    if weights is None:
        weights = {model: 1/len(predictions_dict) for model in predictions_dict}
    
    ensemble_pred = np.zeros_like(list(predictions_dict.values())[0])
    
    for model, pred in predictions_dict.items():
        ensemble_pred += weights[model] * pred
    
    return ensemble_pred

# Align predictions
min_length = min(len(arima_forecast), len(prophet_test_pred), len(lstm_predictions), len(gru_predictions))

predictions_dict = {
    'ARIMA': arima_forecast.values[:min_length],
    'Prophet': prophet_test_pred['yhat'].values[:min_length],
    'LSTM': lstm_predictions.flatten()[:min_length],
    'GRU': gru_predictions.flatten()[:min_length]
}

# Create simple average ensemble
ensemble_pred = create_ensemble(predictions_dict)

# Create weighted ensemble based on individual R² scores
r2_scores = {
    'ARIMA': max(0, arima_r2),
    'Prophet': max(0, prophet_r2),
    'LSTM': max(0, lstm_r2),
    'GRU': max(0, gru_r2)
}

# Normalize weights
total_r2 = sum(r2_scores.values())
weights = {model: score/total_r2 for model, score in r2_scores.items()}

weighted_ensemble_pred = create_ensemble(predictions_dict, weights)

# Calculate ensemble metrics
test_values_aligned = test_data['value'].values[:min_length]

ensemble_mse = mean_squared_error(test_values_aligned, ensemble_pred)
ensemble_mae = mean_absolute_error(test_values_aligned, ensemble_pred)
ensemble_r2 = r2_score(test_values_aligned, ensemble_pred)

weighted_mse = mean_squared_error(test_values_aligned, weighted_ensemble_pred)
weighted_mae = mean_absolute_error(test_values_aligned, weighted_ensemble_pred)
weighted_r2 = r2_score(test_values_aligned, weighted_ensemble_pred)

print("Ensemble Weights:")
for model, weight in weights.items():
    print(f"{model}: {weight:.3f}")

print(f"\nSimple Ensemble Performance:")
print(f"MSE: {ensemble_mse:.4f}")
print(f"MAE: {ensemble_mae:.4f}")
print(f"R²: {ensemble_r2:.4f}")

print(f"\nWeighted Ensemble Performance:")
print(f"MSE: {weighted_mse:.4f}")
print(f"MAE: {weighted_mae:.4f}")
print(f"R²: {weighted_r2:.4f}")

## 7. Results Visualization

In [None]:
# Visualize all predictions
fig, axes = plt.subplots(3, 2, figsize=(20, 15))

# Plot individual models
models = ['ARIMA', 'Prophet', 'LSTM', 'GRU', 'Simple Ensemble', 'Weighted Ensemble']
predictions = [
    arima_forecast.values[:min_length],
    prophet_test_pred['yhat'].values[:min_length],
    lstm_predictions.flatten()[:min_length],
    gru_predictions.flatten()[:min_length],
    ensemble_pred,
    weighted_ensemble_pred
]

test_index_aligned = test_data.index[:min_length]

for i, (ax, model, pred) in enumerate(zip(axes.flat, models, predictions)):
    ax.plot(test_index_aligned, test_values_aligned, label='Actual', alpha=0.7)
    ax.plot(test_index_aligned, pred, label=f'{model} Forecast', alpha=0.7)
    ax.set_title(f'{model} Forecasting Results')
    ax.set_xlabel('Time')
    ax.set_ylabel('Value')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    # Add metrics text
    mse = mean_squared_error(test_values_aligned, pred)
    mae = mean_absolute_error(test_values_aligned, pred)
    r2 = r2_score(test_values_aligned, pred)
    
    metrics_text = f'MSE: {mse:.2f}\nMAE: {mae:.2f}\nR²: {r2:.3f}'
    ax.text(0.02, 0.98, metrics_text, transform=ax.transAxes, 
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8),
            verticalalignment='top')

plt.tight_layout()
plt.show()

In [None]:
# Performance comparison
performance_data = pd.DataFrame({
    'Model': ['ARIMA', 'Prophet', 'LSTM', 'GRU', 'Simple Ensemble', 'Weighted Ensemble'],
    'MSE': [arima_mse, prophet_mse, lstm_mse, gru_mse, ensemble_mse, weighted_mse],
    'MAE': [arima_mae, prophet_mae, lstm_mae, gru_mae, ensemble_mae, weighted_mae],
    'R²': [arima_r2, prophet_r2, lstm_r2, gru_r2, ensemble_r2, weighted_r2]
})

fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# MSE comparison
axes[0].bar(performance_data['Model'], performance_data['MSE'], color='skyblue')
axes[0].set_title('Mean Squared Error Comparison')
axes[0].set_ylabel('MSE')
axes[0].tick_params(axis='x', rotation=45)
axes[0].grid(True, alpha=0.3)

# MAE comparison
axes[1].bar(performance_data['Model'], performance_data['MAE'], color='lightgreen')
axes[1].set_title('Mean Absolute Error Comparison')
axes[1].set_ylabel('MAE')
axes[1].tick_params(axis='x', rotation=45)
axes[1].grid(True, alpha=0.3)

# R² comparison
axes[2].bar(performance_data['Model'], performance_data['R²'], color='salmon')
axes[2].set_title('R² Score Comparison')
axes[2].set_ylabel('R²')
axes[2].tick_params(axis='x', rotation=45)
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n📊 Performance Summary:")
print(performance_data.round(4))

## 8. Forecast Future Values

In [None]:
# Forecast next 48 hours using the best model
best_model_idx = performance_data['R²'].idxmax()
best_model = performance_data.loc[best_model_idx, 'Model']
print(f"Best model based on R² score: {best_model}")

# Generate future forecast
forecast_horizon = 48

if best_model == 'ARIMA':
    # Refit ARIMA on full data
    full_arima = ARIMA(df['value'], order=(2, 1, 2))
    full_arima_fit = full_arima.fit()
    future_forecast = full_arima_fit.forecast(steps=forecast_horizon)
    
elif best_model in ['LSTM', 'GRU']:
    # Use the last sequence to predict future values
    last_sequence = scaled_data[-look_back:]
    future_forecast = []
    
    model = lstm_model if best_model == 'LSTM' else gru_model
    
    for _ in range(forecast_horizon):
        next_pred = model.predict(last_sequence.reshape(1, look_back, 1), verbose=0)
        future_forecast.append(next_pred[0, 0])
        last_sequence = np.append(last_sequence[1:], next_pred)
    
    future_forecast = scaler.inverse_transform(np.array(future_forecast).reshape(-1, 1)).flatten()

# Create future dates
future_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(hours=1), 
                            periods=forecast_horizon, freq='H')

# Visualize future forecast
plt.figure(figsize=(15, 8))
plt.plot(df.index[-100:], df['value'].iloc[-100:], label='Historical Data', alpha=0.7)
plt.plot(future_dates, future_forecast, label=f'{best_model} Forecast', 
         color='red', linestyle='--', alpha=0.8)
plt.axvline(x=df.index[-1], color='green', linestyle=':', alpha=0.5, label='Forecast Start')
plt.fill_between(future_dates, 
                 future_forecast - np.std(future_forecast),
                 future_forecast + np.std(future_forecast),
                 alpha=0.2, color='red', label='Uncertainty')
plt.title(f'Future Forecast using {best_model}')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 9. Summary and Recommendations

In [None]:
print("\n" + "="*80)
print("📋 FORECASTING ANALYSIS SUMMARY")
print("="*80)

print(f"\n📊 Dataset Information:")
print(f"   Total data points: {len(df)}")
print(f"   Training samples: {len(train_data)}")
print(f"   Test samples: {len(test_data)}")
print(f"   Time range: {df.index.min().strftime('%Y-%m-%d')} to {df.index.max().strftime('%Y-%m-%d')}")

print(f"\n🏆 Model Rankings (by R² score):")
ranked_models = performance_data.sort_values('R²', ascending=False)
for i, row in ranked_models.iterrows():
    print(f"   {i+1}. {row['Model']}: R²={row['R²']:.4f}, MSE={row['MSE']:.4f}, MAE={row['MAE']:.4f}")

print(f"\n💡 Key Insights:")
if weighted_r2 > max(arima_r2, prophet_r2, lstm_r2, gru_r2):
    print("   ✅ Ensemble methods outperform individual models")
else:
    print("   ⚠️ Individual models perform better than ensemble")

if lstm_r2 > arima_r2 and gru_r2 > arima_r2:
    print("   ✅ Deep learning models capture complex patterns better")
else:
    print("   ✅ Traditional models are competitive for this dataset")

print(f"\n🔧 Recommendations:")
print(f"   1. Use {best_model} for production forecasting (best R² score)")
print(f"   2. Consider ensemble methods for improved robustness")
print(f"   3. Retrain models periodically with new data")
print(f"   4. Monitor forecast accuracy and adjust parameters")
print(f"   5. Implement prediction intervals for uncertainty quantification")

print(f"\n📈 Future Improvements:")
print(f"   • Hyperparameter optimization for all models")
print(f"   • Feature engineering (external variables, holidays, etc.)")
print(f"   • Advanced architectures (Transformer, N-BEATS)")
print(f"   • Online learning for adaptive forecasting")
print(f"   • Multi-step ahead forecasting evaluation")

print("\n" + "="*80)

## Export Results

In [None]:
# Export forecasts and results
import os
from datetime import datetime

output_dir = "forecast_results"
os.makedirs(output_dir, exist_ok=True)

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Save performance metrics
performance_data.to_csv(f"{output_dir}/model_performance_{timestamp}.csv", index=False)

# Save forecasts
forecasts_df = pd.DataFrame({
    'timestamp': test_index_aligned,
    'actual': test_values_aligned,
    'arima': predictions[0],
    'prophet': predictions[1],
    'lstm': predictions[2],
    'gru': predictions[3],
    'ensemble': predictions[4],
    'weighted_ensemble': predictions[5]
})
forecasts_df.to_csv(f"{output_dir}/forecasts_{timestamp}.csv", index=False)

# Save future forecast
future_df = pd.DataFrame({
    'timestamp': future_dates,
    'forecast': future_forecast
})
future_df.to_csv(f"{output_dir}/future_forecast_{timestamp}.csv", index=False)

print(f"✅ Results exported to {output_dir}/")
print(f"📁 Files created:")
print(f"   - model_performance_{timestamp}.csv")
print(f"   - forecasts_{timestamp}.csv")
print(f"   - future_forecast_{timestamp}.csv")

## Next Steps

This notebook demonstrated various forecasting techniques on TSIOT-generated data. Consider:

1. **Advanced Models**: Try Transformer-based models, N-BEATS, or DeepAR
2. **Feature Engineering**: Add external variables, calendar features, or domain-specific indicators
3. **Hyperparameter Tuning**: Use grid search or Bayesian optimization
4. **Production Deployment**: Integrate the best model into your application
5. **Real-time Forecasting**: Implement streaming predictions with Kafka integration

For more examples, visit the [TSIOT documentation](../../docs/).