# Machine Learning for Cost Forecasting

This notebook demonstrates advanced machine learning techniques for LLM cost prediction and forecasting.

## Learning Objectives
- Feature engineering for time series forecasting
- Training multiple forecasting models (ARIMA, Prophet, LSTM)
- Model evaluation and comparison
- Production deployment considerations

## Prerequisites
```bash
pip install pandas numpy matplotlib scikit-learn prophet statsmodels tensorflow keras
```

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import requests
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# ML libraries
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Statistical models
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Prophet
try:
    from prophet import Prophet
    PROPHET_AVAILABLE = True
except ImportError:
    print("Prophet not available. Install with: pip install prophet")
    PROPHET_AVAILABLE = False

# Deep Learning
try:
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import LSTM, Dense, Dropout
    from tensorflow.keras.callbacks import EarlyStopping
    TENSORFLOW_AVAILABLE = True
except ImportError:
    print("TensorFlow not available. Install with: pip install tensorflow")
    TENSORFLOW_AVAILABLE = False

plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline

# Set random seeds for reproducibility
np.random.seed(42)
if TENSORFLOW_AVAILABLE:
    tf.random.set_seed(42)

## 1. Load and Prepare Data

In [None]:
# API Configuration
BASE_URL = 'http://localhost:3000/api'
API_KEY = 'your-api-key-here'
HEADERS = {
    'Authorization': f'Bearer {API_KEY}',
    'Content-Type': 'application/json'
}

# Fetch 90 days of data for training
end_date = datetime.now()
start_date = end_date - timedelta(days=90)

params = {
    'start_date': start_date.isoformat(),
    'end_date': end_date.isoformat(),
    'limit': 10000
}

response = requests.get(f'{BASE_URL}/cost-tracking', headers=HEADERS, params=params)
cost_data = response.json()

df = pd.DataFrame(cost_data['data'])
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['total_tokens'] = df['input_tokens'] + df['output_tokens']

# Aggregate to daily level
daily_df = df.groupby(df['timestamp'].dt.date).agg({
    'total_cost': 'sum',
    'request_id': 'count',
    'total_tokens': 'sum',
    'input_tokens': 'sum',
    'output_tokens': 'sum'
}).reset_index()

daily_df.columns = ['date', 'total_cost', 'request_count', 'total_tokens', 
                    'input_tokens', 'output_tokens']
daily_df['date'] = pd.to_datetime(daily_df['date'])
daily_df = daily_df.sort_values('date').reset_index(drop=True)

print(f"Loaded {len(daily_df)} days of data")
print(f"Date range: {daily_df['date'].min()} to {daily_df['date'].max()}")
daily_df.head()

## 2. Feature Engineering

In [None]:
# Create time-based features
daily_df['day_of_week'] = daily_df['date'].dt.dayofweek
daily_df['day_of_month'] = daily_df['date'].dt.day
daily_df['week_of_year'] = daily_df['date'].dt.isocalendar().week
daily_df['month'] = daily_df['date'].dt.month
daily_df['is_weekend'] = daily_df['day_of_week'].isin([5, 6]).astype(int)

# Lag features
for lag in [1, 3, 7, 14]:
    daily_df[f'cost_lag_{lag}'] = daily_df['total_cost'].shift(lag)
    daily_df[f'requests_lag_{lag}'] = daily_df['request_count'].shift(lag)

# Rolling statistics
for window in [7, 14, 30]:
    daily_df[f'cost_ma_{window}'] = daily_df['total_cost'].rolling(window=window, min_periods=1).mean()
    daily_df[f'cost_std_{window}'] = daily_df['total_cost'].rolling(window=window, min_periods=1).std()

# Exponential moving average
daily_df['cost_ema_7'] = daily_df['total_cost'].ewm(span=7, adjust=False).mean()
daily_df['cost_ema_14'] = daily_df['total_cost'].ewm(span=14, adjust=False).mean()

# Trend feature
daily_df['trend'] = range(len(daily_df))

print("Feature engineering complete!")
print(f"Total features: {len(daily_df.columns)}")
print(f"\nFeature list: {daily_df.columns.tolist()}")

In [None]:
# Visualize some features
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# Original cost
axes[0, 0].plot(daily_df['date'], daily_df['total_cost'], label='Original', alpha=0.7)
axes[0, 0].plot(daily_df['date'], daily_df['cost_ma_7'], label='7-day MA', linewidth=2)
axes[0, 0].set_title('Cost with Moving Average', fontweight='bold')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# Lag features
axes[0, 1].scatter(daily_df['cost_lag_1'], daily_df['total_cost'], alpha=0.6)
axes[0, 1].set_xlabel('Cost (t-1)')
axes[0, 1].set_ylabel('Cost (t)')
axes[0, 1].set_title('Cost Autocorrelation (lag=1)', fontweight='bold')
axes[0, 1].grid(True, alpha=0.3)

# Day of week pattern
daily_df.groupby('day_of_week')['total_cost'].mean().plot(kind='bar', ax=axes[1, 0], color='coral')
axes[1, 0].set_title('Average Cost by Day of Week', fontweight='bold')
axes[1, 0].set_xlabel('Day of Week (0=Monday)')
axes[1, 0].set_ylabel('Average Cost')
axes[1, 0].grid(True, alpha=0.3, axis='y')

# Weekend vs weekday
daily_df.groupby('is_weekend')['total_cost'].mean().plot(kind='bar', ax=axes[1, 1], color='seagreen')
axes[1, 1].set_title('Average Cost: Weekday vs Weekend', fontweight='bold')
axes[1, 1].set_xticklabels(['Weekday', 'Weekend'], rotation=0)
axes[1, 1].set_ylabel('Average Cost')
axes[1, 1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## 3. Prepare Training and Test Sets

In [None]:
# Remove rows with NaN (from lag features)
df_clean = daily_df.dropna().copy()

# Split into train/test (80/20)
train_size = int(len(df_clean) * 0.8)
train_df = df_clean[:train_size]
test_df = df_clean[train_size:]

print(f"Training set: {len(train_df)} days ({train_df['date'].min()} to {train_df['date'].max()})")
print(f"Test set: {len(test_df)} days ({test_df['date'].min()} to {test_df['date'].max()})")

# Visualize split
plt.figure(figsize=(14, 5))
plt.plot(train_df['date'], train_df['total_cost'], label='Training Data', color='blue')
plt.plot(test_df['date'], test_df['total_cost'], label='Test Data', color='orange')
plt.axvline(x=train_df['date'].iloc[-1], color='red', linestyle='--', label='Train/Test Split')
plt.title('Train/Test Split', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Total Cost')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Model 1: ARIMA

In [None]:
# Prepare data for ARIMA
train_series = train_df.set_index('date')['total_cost']
test_series = test_df.set_index('date')['total_cost']

# Fit ARIMA model
print("Training ARIMA model...")
arima_model = ARIMA(train_series, order=(5, 1, 2))  # (p, d, q)
arima_fitted = arima_model.fit()

print("ARIMA model trained!")
print(arima_fitted.summary())

In [None]:
# Make predictions
arima_predictions = arima_fitted.forecast(steps=len(test_df))

# Calculate metrics
arima_mae = mean_absolute_error(test_series, arima_predictions)
arima_rmse = np.sqrt(mean_squared_error(test_series, arima_predictions))
arima_r2 = r2_score(test_series, arima_predictions)

print(f"\nARIMA Performance:")
print(f"  MAE: ${arima_mae:.2f}")
print(f"  RMSE: ${arima_rmse:.2f}")
print(f"  R²: {arima_r2:.4f}")

# Visualize
plt.figure(figsize=(14, 6))
plt.plot(train_series.index, train_series, label='Training Data', alpha=0.7)
plt.plot(test_series.index, test_series, label='Actual Test Data', color='orange', linewidth=2)
plt.plot(test_series.index, arima_predictions, label='ARIMA Predictions', 
         color='red', linestyle='--', linewidth=2)
plt.title('ARIMA Forecast', fontsize=14, fontweight='bold')
plt.xlabel('Date')
plt.ylabel('Total Cost ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 5. Model 2: Prophet

In [None]:
if PROPHET_AVAILABLE:
    # Prepare data for Prophet
    prophet_train = train_df[['date', 'total_cost']].copy()
    prophet_train.columns = ['ds', 'y']
    
    # Create and train Prophet model
    print("Training Prophet model...")
    prophet_model = Prophet(
        daily_seasonality=False,
        weekly_seasonality=True,
        yearly_seasonality=False,
        changepoint_prior_scale=0.05
    )
    
    # Add custom regressors
    prophet_train['is_weekend'] = train_df['is_weekend'].values
    prophet_model.add_regressor('is_weekend')
    
    prophet_model.fit(prophet_train)
    print("Prophet model trained!")
    
    # Make predictions
    prophet_future = test_df[['date', 'is_weekend']].copy()
    prophet_future.columns = ['ds', 'is_weekend']
    prophet_forecast = prophet_model.predict(prophet_future)
    prophet_predictions = prophet_forecast['yhat'].values
    
    # Calculate metrics
    prophet_mae = mean_absolute_error(test_series, prophet_predictions)
    prophet_rmse = np.sqrt(mean_squared_error(test_series, prophet_predictions))
    prophet_r2 = r2_score(test_series, prophet_predictions)
    
    print(f"\nProphet Performance:")
    print(f"  MAE: ${prophet_mae:.2f}")
    print(f"  RMSE: ${prophet_rmse:.2f}")
    print(f"  R²: {prophet_r2:.4f}")
else:
    print("Prophet not available. Skipping Prophet model.")
    prophet_predictions = None

In [None]:
if PROPHET_AVAILABLE:
    # Visualize Prophet forecast
    plt.figure(figsize=(14, 6))
    plt.plot(train_series.index, train_series, label='Training Data', alpha=0.7)
    plt.plot(test_series.index, test_series, label='Actual Test Data', color='orange', linewidth=2)
    plt.plot(test_series.index, prophet_predictions, label='Prophet Predictions', 
             color='green', linestyle='--', linewidth=2)
    
    # Add confidence interval
    plt.fill_between(test_series.index, 
                     prophet_forecast['yhat_lower'].values,
                     prophet_forecast['yhat_upper'].values,
                     alpha=0.2, color='green', label='95% Confidence')
    
    plt.title('Prophet Forecast', fontsize=14, fontweight='bold')
    plt.xlabel('Date')
    plt.ylabel('Total Cost ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## 6. Model 3: LSTM Neural Network

In [None]:
if TENSORFLOW_AVAILABLE:
    # Prepare data for LSTM
    feature_cols = ['cost_lag_1', 'cost_lag_3', 'cost_lag_7', 'cost_ma_7', 'cost_ma_14',
                   'day_of_week', 'is_weekend', 'trend']
    
    X_train = train_df[feature_cols].values
    y_train = train_df['total_cost'].values
    X_test = test_df[feature_cols].values
    y_test = test_df['total_cost'].values
    
    # Scale features
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()
    
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1))
    
    # Reshape for LSTM (samples, timesteps, features)
    X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
    X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))
    
    print(f"Training data shape: {X_train_lstm.shape}")
    print(f"Test data shape: {X_test_lstm.shape}")
else:
    print("TensorFlow not available. Skipping LSTM model.")

In [None]:
if TENSORFLOW_AVAILABLE:
    # Build LSTM model
    print("Building LSTM model...")
    lstm_model = Sequential([
        LSTM(50, activation='relu', return_sequences=True, 
             input_shape=(1, len(feature_cols))),
        Dropout(0.2),
        LSTM(50, activation='relu'),
        Dropout(0.2),
        Dense(25, activation='relu'),
        Dense(1)
    ])
    
    lstm_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    print(lstm_model.summary())
    
    # Train model
    print("\nTraining LSTM model...")
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    
    history = lstm_model.fit(
        X_train_lstm, y_train_scaled,
        epochs=100,
        batch_size=8,
        validation_split=0.2,
        callbacks=[early_stop],
        verbose=0
    )
    
    print("LSTM model trained!")

In [None]:
if TENSORFLOW_AVAILABLE:
    # Make predictions
    lstm_predictions_scaled = lstm_model.predict(X_test_lstm, verbose=0)
    lstm_predictions = scaler_y.inverse_transform(lstm_predictions_scaled).flatten()
    
    # Calculate metrics
    lstm_mae = mean_absolute_error(y_test, lstm_predictions)
    lstm_rmse = np.sqrt(mean_squared_error(y_test, lstm_predictions))
    lstm_r2 = r2_score(y_test, lstm_predictions)
    
    print(f"\nLSTM Performance:")
    print(f"  MAE: ${lstm_mae:.2f}")
    print(f"  RMSE: ${lstm_rmse:.2f}")
    print(f"  R²: {lstm_r2:.4f}")
    
    # Plot training history
    fig, axes = plt.subplots(1, 2, figsize=(14, 4))
    
    axes[0].plot(history.history['loss'], label='Training Loss')
    axes[0].plot(history.history['val_loss'], label='Validation Loss')
    axes[0].set_title('Model Loss', fontweight='bold')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)
    
    axes[1].plot(history.history['mae'], label='Training MAE')
    axes[1].plot(history.history['val_mae'], label='Validation MAE')
    axes[1].set_title('Model MAE', fontweight='bold')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('MAE')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Visualize predictions
    plt.figure(figsize=(14, 6))
    plt.plot(train_series.index, train_series, label='Training Data', alpha=0.7)
    plt.plot(test_series.index, test_series, label='Actual Test Data', color='orange', linewidth=2)
    plt.plot(test_series.index, lstm_predictions, label='LSTM Predictions', 
             color='purple', linestyle='--', linewidth=2)
    plt.title('LSTM Forecast', fontsize=14, fontweight='bold')
    plt.xlabel('Date')
    plt.ylabel('Total Cost ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## 7. Model Comparison

In [None]:
# Compare all models
results = []

results.append({
    'Model': 'ARIMA',
    'MAE': arima_mae,
    'RMSE': arima_rmse,
    'R²': arima_r2
})

if PROPHET_AVAILABLE:
    results.append({
        'Model': 'Prophet',
        'MAE': prophet_mae,
        'RMSE': prophet_rmse,
        'R²': prophet_r2
    })

if TENSORFLOW_AVAILABLE:
    results.append({
        'Model': 'LSTM',
        'MAE': lstm_mae,
        'RMSE': lstm_rmse,
        'R²': lstm_r2
    })

results_df = pd.DataFrame(results)
results_df = results_df.round(4)

print("\n" + "="*70)
print("MODEL COMPARISON")
print("="*70)
print(results_df.to_string(index=False))
print("\nBest Model (by MAE): " + results_df.loc[results_df['MAE'].idxmin(), 'Model'])
print("Best Model (by R²): " + results_df.loc[results_df['R²'].idxmax(), 'Model'])

In [None]:
# Visualize model comparison
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# MAE comparison
results_df.plot(x='Model', y='MAE', kind='bar', ax=axes[0], legend=False, color='steelblue')
axes[0].set_title('Mean Absolute Error', fontweight='bold')
axes[0].set_ylabel('MAE ($)')
axes[0].tick_params(axis='x', rotation=0)
axes[0].grid(True, alpha=0.3, axis='y')

# RMSE comparison
results_df.plot(x='Model', y='RMSE', kind='bar', ax=axes[1], legend=False, color='coral')
axes[1].set_title('Root Mean Squared Error', fontweight='bold')
axes[1].set_ylabel('RMSE ($)')
axes[1].tick_params(axis='x', rotation=0)
axes[1].grid(True, alpha=0.3, axis='y')

# R² comparison
results_df.plot(x='Model', y='R²', kind='bar', ax=axes[2], legend=False, color='seagreen')
axes[2].set_title('R² Score', fontweight='bold')
axes[2].set_ylabel('R²')
axes[2].tick_params(axis='x', rotation=0)
axes[2].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

In [None]:
# Compare all predictions visually
plt.figure(figsize=(16, 7))

plt.plot(train_series.index, train_series, label='Training Data', alpha=0.5, color='gray')
plt.plot(test_series.index, test_series, label='Actual', color='black', linewidth=2.5)
plt.plot(test_series.index, arima_predictions, label='ARIMA', linestyle='--', linewidth=2)

if PROPHET_AVAILABLE:
    plt.plot(test_series.index, prophet_predictions, label='Prophet', linestyle='--', linewidth=2)

if TENSORFLOW_AVAILABLE:
    plt.plot(test_series.index, lstm_predictions, label='LSTM', linestyle='--', linewidth=2)

plt.title('All Model Predictions Comparison', fontsize=14, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Total Cost ($)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 8. Production Deployment

In [None]:
# Save the best model (example with Prophet)
if PROPHET_AVAILABLE:
    import pickle
    
    # Save Prophet model
    with open('prophet_cost_model.pkl', 'wb') as f:
        pickle.dump(prophet_model, f)
    print("Prophet model saved as 'prophet_cost_model.pkl'")

if TENSORFLOW_AVAILABLE:
    # Save LSTM model
    lstm_model.save('lstm_cost_model.h5')
    print("LSTM model saved as 'lstm_cost_model.h5'")
    
    # Save scalers
    with open('lstm_scalers.pkl', 'wb') as f:
        pickle.dump({'scaler_X': scaler_X, 'scaler_y': scaler_y}, f)
    print("LSTM scalers saved as 'lstm_scalers.pkl'")

In [None]:
# Create production prediction function
def predict_future_costs(days_ahead=30, model_type='prophet'):
    """
    Predict future costs using the specified model.
    
    Parameters:
    - days_ahead: Number of days to forecast
    - model_type: 'prophet', 'arima', or 'lstm'
    
    Returns:
    - DataFrame with predictions
    """
    if model_type == 'prophet' and PROPHET_AVAILABLE:
        # Create future dataframe
        last_date = daily_df['date'].max()
        future_dates = pd.date_range(start=last_date + timedelta(days=1), 
                                     periods=days_ahead, freq='D')
        
        future_df = pd.DataFrame({
            'ds': future_dates,
            'is_weekend': [1 if d.dayofweek in [5, 6] else 0 for d in future_dates]
        })
        
        # Make predictions
        forecast = prophet_model.predict(future_df)
        
        result = pd.DataFrame({
            'date': forecast['ds'],
            'predicted_cost': forecast['yhat'],
            'lower_bound': forecast['yhat_lower'],
            'upper_bound': forecast['yhat_upper']
        })
        
        return result
    
    else:
        print(f"Model type '{model_type}' not available or not supported.")
        return None

# Example usage
print("\nProduction prediction function created!")
print("\nExample usage:")
print("  predictions = predict_future_costs(days_ahead=30, model_type='prophet')")

In [None]:
# Generate 30-day forecast
if PROPHET_AVAILABLE:
    future_predictions = predict_future_costs(days_ahead=30, model_type='prophet')
    
    if future_predictions is not None:
        print("\n30-Day Cost Forecast:")
        print(future_predictions.head(10))
        
        print(f"\nTotal Predicted Cost (30 days): ${future_predictions['predicted_cost'].sum():.2f}")
        print(f"Average Daily Cost: ${future_predictions['predicted_cost'].mean():.2f}")
        
        # Visualize
        plt.figure(figsize=(14, 6))
        plt.plot(daily_df['date'], daily_df['total_cost'], label='Historical', alpha=0.7)
        plt.plot(future_predictions['date'], future_predictions['predicted_cost'], 
                label='Forecast', color='red', linewidth=2)
        plt.fill_between(future_predictions['date'],
                        future_predictions['lower_bound'],
                        future_predictions['upper_bound'],
                        alpha=0.2, color='red', label='95% Confidence')
        plt.title('30-Day Cost Forecast', fontsize=14, fontweight='bold')
        plt.xlabel('Date')
        plt.ylabel('Cost ($)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        plt.show()

## 9. Export Results

In [None]:
# Export results
output_dir = 'ml_forecasting_output'
import os
os.makedirs(output_dir, exist_ok=True)

# Save model comparison
results_df.to_csv(f'{output_dir}/model_comparison.csv', index=False)
print(f"Saved: {output_dir}/model_comparison.csv")

# Save future predictions
if PROPHET_AVAILABLE and future_predictions is not None:
    future_predictions.to_csv(f'{output_dir}/30day_forecast.csv', index=False)
    print(f"Saved: {output_dir}/30day_forecast.csv")

# Save feature importance (for reference)
feature_info = pd.DataFrame({
    'Feature': feature_cols if TENSORFLOW_AVAILABLE else [],
    'Description': [
        'Cost 1 day ago',
        'Cost 3 days ago',
        'Cost 7 days ago',
        '7-day moving average',
        '14-day moving average',
        'Day of week (0-6)',
        'Weekend indicator',
        'Linear trend'
    ] if TENSORFLOW_AVAILABLE else []
})

if TENSORFLOW_AVAILABLE:
    feature_info.to_csv(f'{output_dir}/features.csv', index=False)
    print(f"Saved: {output_dir}/features.csv")

print(f"\nAll results exported to '{output_dir}/' directory")

## Summary

In this notebook, you learned:
- Feature engineering for time series forecasting
- Training and evaluating multiple forecasting models:
  - ARIMA for statistical time series forecasting
  - Prophet for robust forecasting with seasonality
  - LSTM neural networks for deep learning-based forecasting
- Model comparison and selection
- Production deployment considerations

## Best Practices for Production
1. **Model Retraining**: Retrain models regularly with new data
2. **Monitoring**: Track prediction accuracy and drift
3. **Ensemble Methods**: Consider combining multiple models
4. **Feature Updates**: Keep feature engineering pipeline current
5. **Version Control**: Track model versions and performance

## Next Steps
- Deploy the best model to production
- Set up automated retraining pipeline
- Implement monitoring and alerting
- Explore ensemble methods for improved accuracy