In [33]:
# Imports
from datetime import datetime
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from sklearn.metrics import mean_absolute_error
from mango import Tuner, scheduler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px


# Data Analysis

In [34]:
# Load data
data = pd.read_csv("incoming_packages.csv")
data.head(len(data))

Unnamed: 0,date,total_packages,cooled_packages,fire_packages,normal_packages
0,2020-01-01,1745,370,488,887
1,2020-01-02,1847,308,402,1137
2,2020-01-03,1842,314,394,1134
3,2020-01-04,664,131,181,352
4,2020-01-05,871,137,190,544
...,...,...,...,...,...
846,2022-04-26,2268,467,503,1298
847,2022-04-27,2466,432,641,1393
848,2022-04-28,2436,545,556,1335
849,2022-04-29,2302,368,547,1387


In [35]:
# Convert string dates into datetime objects
data_final = data.copy()
data_final['date'] = pd.to_datetime(data_final['date'])

In [36]:
# Function that gets data that can be used by prophet from the specified time range
def get_prophet_data( col:str, startdate:datetime, enddate:datetime ) -> pd.DataFrame:
    """ Get data that can be used by prophet from the specified time range. """
    prophet_data = pd.DataFrame()
    prophet_data["ds"] = data_final["date"]
    prophet_data["y"] = data_final[col]
    prophet_data = prophet_data[ (prophet_data['ds'] >= startdate) & (prophet_data['ds'] <= enddate)]

    return prophet_data

In [37]:
fig = px.line(data, x='date', y="total_packages")
fig.show()

Vi kan se en tydlig säsongsvariation. Under Juli så sjunker antalet paket en del, och vid November samt December är det en kraftig ökning. I November sker black friday, så det är sannerligen orsaken till den spiken. I December spikar det igen då det är julafton.

Det finns en trend som visar på att antalet paket stiger genom åren, då linjen sakta börjar stiga längst grafen. Det är även värt att notera att vissa dagar har ovanligt låga mängder paket, som till exempel den 3e Oktober 2020.

# Prophet

In [38]:
# Get data we should train the model with, and show it
total_packages_train = get_prophet_data("total_packages", datetime(2020, 1, 1), datetime(2022, 4, 1))
total_packages_train.head(len(total_packages_train))

Unnamed: 0,ds,y
0,2020-01-01,1745
1,2020-01-02,1847
2,2020-01-03,1842
3,2020-01-04,664
4,2020-01-05,871
...,...,...
817,2022-03-28,2431
818,2022-03-29,2223
819,2022-03-30,2260
820,2022-03-31,2374


In [39]:
# Train prophet
prophet_ = Prophet()
prophet_ = prophet_.fit(total_packages_train)

16:53:27 - cmdstanpy - INFO - Chain [1] start processing
16:53:27 - cmdstanpy - INFO - Chain [1] done processing


In [40]:
# Our predictions
future = prophet_.make_future_dataframe(periods=365)
forecast = prophet_.predict(future)

In [41]:
# Plot
plot_plotly(prophet_, forecast, uncertainty=False)

In [42]:
# Plot components
plot_components_plotly(prophet_, forecast)

In [43]:
# Test accuracy

total_packages_test = get_prophet_data("total_packages", datetime(2022, 4, 1), datetime(2022, 4, 30))
results = total_packages_test.copy()
results = results.merge(forecast)

constant_predictions = np.full_like(total_packages_test['y'], 2000)

mae_prophet = mean_absolute_error(results['y'], results['yhat'])
mae_constant = mean_absolute_error(results['y'], constant_predictions)

print( f"MAE for constant predictions: {mae_constant}" )
print( f"MAE for our model: {mae_prophet}" )

MAE for constant predictions: 524.9666666666667
MAE for our model: 115.9483761036984
