In [None]:
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt

def preprocess(data, split_date, column_mapping):
    data.rename(columns=column_mapping, inplace=True)
    data['ds'] = pd.to_datetime(data['ds'])
    data['ds'] = data['ds'].dt.tz_localize(None)
    #print(data.head())

    train_data = data[data['ds'] < split_date]  # Дані до вказаної дати
    actual_data = data[data['ds'] >= split_date]   # Дані після вказаної дати
    return train_data, actual_data

def get_holidays():
    holidays = pd.DataFrame({
        'holiday': 'blackfriday',
        'ds': pd.to_datetime(['2020-11-27', '2021-11-26', '2022-11-25',
                        '2023-11-24', '2024-11-29']),
        'lower_window': -4,
        'upper_window': 3,
    })
    return holidays

def predict(train_data, periods, holidays=None, seasonality_mode='additive'):
    model = Prophet(
        holidays=holidays,
        seasonality_mode=seasonality_mode,
        #interval_width=0.99
    )
    model.fit(train_data)

    future = model.make_future_dataframe(periods, include_history=True)
    forecast = model.predict(future)
    model.plot_components(forecast)
    model.plot(forecast)
    return forecast

def plot(train_data, forecast, comparison_df, yhat, periods):
    # Візуалізація прогнозу та фактичних даних
    plt.figure(figsize=(10, 6))
    plt.plot(train_data['ds'].head(periods), train_data['y'].head(periods), label='Тренувальні дані')
    plt.plot(forecast['ds'].tail(periods), forecast[yhat].tail(periods), label='Прогноз', linestyle='--')
    plt.plot(comparison_df['ds'], comparison_df['y'], label='Фактичні дані', linestyle=':', marker='o')
    plt.fill_between(forecast['ds'].tail(periods), forecast['yhat_lower'].tail(periods), forecast['yhat_upper'].tail(periods), color='gray', alpha=0.2)
    plt.legend()
    plt.xlabel('Дата')
    plt.ylabel('Цільова змінна')
    plt.title('Прогноз Prophet та фактичні дані')
    plt.grid(True)
    plt.show()

In [None]:
file_path = 'files/orders_till_2024-11-12.csv'
split_date = '2024-11-01'
start = '2020-10-01'
predict_periods = 20

data = pd.read_csv(file_path, usecols=['date', 'orders'])
data = data[data['date'] >= start]

#print(data)

column_mapping = {'date': 'ds', 'orders': 'y' }
holidays = get_holidays()
train_data, actual_data = preprocess(data, split_date, column_mapping)
#print(actual_data)
forecast = predict(train_data, predict_periods, holidays, seasonality_mode='multiplicative')
print(forecast)

yhat = 'yhat'
comparison_df = forecast[['ds', yhat]].merge(actual_data, on='ds', how='left')
#print(comparison_df.tail(20))

plot(train_data, forecast, comparison_df, yhat, predict_periods)