In [1]:
import pandas as pd
import math
from prophet import Prophet
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Read the data
path = r"C:\Users\yanzh\Desktop\code_and_data\4. Deep learning part\处理数据\2015-2019 total trips.csv"
data = pd.read_csv(path)
data['date'] = pd.to_datetime(data['date'])
data = data[['date', 'total_trips']]
data.columns = ['ds', 'y']

# Define sliding window parameters
num_windows = 5
window_size = math.ceil(len(data) // (num_windows - 1.2))
step_size = math.ceil(window_size * 0.7)

# Initialize lists to store data
windows = []
train_dataset = []
validation_dataset = []
start = 0

# Create sliding windows
while start + step_size <= len(data):
    end = start + window_size
    window_data = data[start:end]
    train_val_split = int(len(window_data) * 0.9)
    train_data = window_data[:train_val_split]
    validation_data = window_data[train_val_split:]

    windows.append(window_data)
    train_dataset.append(train_data)
    validation_dataset.append(validation_data)

    start += step_size

In [3]:
# Initialize lists to store evaluation metrics
total_mae_losses = []
total_mape_losses = []
total_rmse_losses = []
total_r2_scores = []

# Fit the Prophet model and calculate evaluation metrics
for i in range(num_windows):
    model = Prophet()
    model.fit(train_dataset[i])

    # Make predictions
    future = model.make_future_dataframe(periods=len(validation_dataset[i]), freq='H')
    forecast = model.predict(future)
    forecast_values = forecast['yhat'].iloc[-len(validation_dataset[i]):].values
    actual_values = validation_dataset[i]['y'].values

    mae_losses = []
    mape_losses = []
    rmse_losses = []
    all_predicted = []
    all_actual = []

    for idx, forecast_value in enumerate(forecast_values):
        actual_value = actual_values[idx]
        all_actual.append(actual_value)
        all_predicted.append(forecast_value)

        # Calculate MAE
        mae_loss = np.abs(actual_value - forecast_value)
        mae_losses.append(mae_loss)

        # Calculate MAPE
        mape_loss = np.abs((actual_value - forecast_value) / actual_value)
        mape_losses.append(mape_loss)

        # Calculate RMSE
        rmse_loss = np.square(actual_value - forecast_value)
        rmse_losses.append(rmse_loss)
    
    # Convert lists to numpy arrays for R-squared calculation
    all_predicted = np.array(all_predicted)
    all_actual = np.array(all_actual)

    # Calculate R-squared
    ss_res = np.sum((all_actual - all_predicted) ** 2)
    ss_tot = np.sum((all_actual - np.mean(all_actual)) ** 2)
    r2 = 1 - (ss_res / ss_tot)

    total_mae_losses.append(np.mean(mae_losses))
    total_mape_losses.append(np.mean(mape_losses) * 100)
    total_rmse_losses.append(np.sqrt(np.mean(rmse_losses)))
    total_r2_scores.append(r2)

# Calculate average of metrics across all windows
average_mae_loss = np.mean(total_mae_losses)
average_mape_loss = np.mean(total_mape_losses)
average_rmse_loss = np.mean(total_rmse_losses)
average_r2_score = np.mean(total_r2_scores)

print(f'Average MAE on Test Set: {average_mae_loss:.4f}')
print(f'Average MAPE on Test Set: {average_mape_loss:.4f}')
print(f'Average RMSE on Test Set: {average_rmse_loss:.4f}')
print(f'R^2 Score on Test Set: {average_r2_score:.4f}')

11:33:56 - cmdstanpy - INFO - Chain [1] start processing
11:33:57 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
11:33:58 - cmdstanpy - INFO - Chain [1] start processing
11:33:59 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
11:34:00 - cmdstanpy - INFO - Chain [1] start processing
11:34:01 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
11:34:02 - cmdstanpy - INFO - Chain [1] start processing
11:34:03 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
11:34:04 - cmdstanpy - INFO - Chain [1] start processing
11:34:05 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(


Average MAE on Test Set: 2366.5663
Average MAPE on Test Set: 54.3048
Average RMSE on Test Set: 3050.6197
R^2 Score on Test Set: 0.6477
