In [23]:
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
import numpy as np

# Import energy data
newData = pd.read_csv("Office_Garman.csv", index_col="timestamp", parse_dates=True)
newData = newData.drop_duplicates()
newData = newData.asfreq('H')
newData = newData.fillna(method='ffill')

# Import holiday data
holidaysdf = pd.read_csv("schedule9.csv", header=None, names=["ds", "holiday"])
holidaysdf["ds"] = pd.to_datetime(holidaysdf["ds"])
holidaysdf["holiday"] = holidaysdf["holiday"].astype(str)

# Import weather data
weatherdf = pd.read_csv("weather5.csv")
weatherdf["timestamp"] = pd.to_datetime(weatherdf["timestamp"])
weatherdf = weatherdf.set_index("timestamp")  # Set the index to timestamp

# Merge newData and weatherdf
merged_data = pd.merge(newData, weatherdf, left_index=True, right_index=True, how='outer')

# Merge merged_data and holidaysdf
merged_data = pd.merge(merged_data, holidaysdf, left_index=True, right_on='ds', how='left')

# Set the index back to DatetimeIndex
merged_data.set_index('ds', inplace=True)

# Split data into train and test sets
split_date = pd.Timestamp('2015-09-30')
train = merged_data.loc[merged_data.index <= split_date, ["Office_Garman"]]
train = train.rename(columns={"Office_Garman": "y"})
train.reset_index(inplace=True)
train.rename(columns={"timestamp": "ds"}, inplace=True)

test = merged_data.loc[merged_data.index > split_date]

# Prepare the Prophet model
model = Prophet()
for column in train.columns:
    if column != "y":  # Exclude the target variable
        model.add_regressor(column)

# Fit the Prophet model
model.fit(train)


# Generate predictions
future = model.make_future_dataframe(periods=len(test), freq='H')
future = pd.merge(future, test.reset_index(), on='ds', how='left')
predictions = model.predict(future)

# Plot actual data and predictions
plt.figure(figsize=(12, 6))
plt.plot(train.index, train["Office_Garman"], label='Train')
plt.plot(test.index, test["Office_Garman"], label='Test')
plt.plot(predictions["ds"], predictions["yhat"], label='Predictions')
plt.xlabel('Date')
plt.ylabel('Energy Usage')
plt.title('Prophet Model Predictions')
plt.legend()
plt.show()

# Plot zoomed-in version
zoom_start_date = pd.to_datetime('2015-09-15')
zoom_end_date = pd.to_datetime('2015-12-31')
zoomed_data = merged_data[(merged_data.index >= zoom_start_date) & (merged_data.index <= zoom_end_date)]

zoomed_predictions = predictions[(predictions["ds"] >= zoom_start_date) & (predictions["ds"] <= zoom_end_date)]

plt.figure(figsize=(12, 6))
plt.plot(zoomed_data.index, zoomed_data["Office_Garman"], label='Actual Data')
plt.plot(zoomed_predictions["ds"], zoomed_predictions["yhat"], label='Predictions')
plt.xlabel('Date')
plt.ylabel('Energy Usage')
plt.title('Zoomed-in Plot: Prophet Model Predictions')
plt.legend()
plt.show()

# Calculate evaluation metrics
mae = mean_absolute_error(test["Office_Garman"], predictions["yhat"].tail(len(test)))
mape = mean_absolute_percentage_error(test["Office_Garman"], predictions["yhat"].tail(len(test)))
rmse = np.sqrt(mean_squared_error(test["Office_Garman"], predictions["yhat"].tail(len(test))))


print("MAE:", mae)
print("MAPE:", mape)
print("RMSE:", rmse)


ValueError: Name 'ds' is reserved.