In [None]:
# https://dou.ua/forums/topic/57481/

# https://www.kaggle.com/datasets/kyanyoga/sample-sales-data
# https://www.kaggle.com/datasets/vinothkannaece/sales-dataset
# etc.

In [None]:
# ARIMA

import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error
import numpy as np

df = pd.read_csv('sales_data.csv', parse_dates=['date'], index_col='date')
train = df[:-30]
test = df[-30:]

model = ARIMA(train['sales'], order=(5, 1, 2))
fitted_model = model.fit()
forecast = fitted_model.forecast(steps=30)

from pmdarima import auto_arima
auto_model = auto_arima(train['sales'], seasonal=True, m=7, suppress_warnings=True)
print(auto_model.summary())

In [None]:
# Prophet

from prophet import Prophet
import pandas as pd

df = pd.read_csv('sales_data.csv')
df = df.rename(columns={'date': 'ds', 'sales': 'y'})

model = Prophet(yearly_seasonality=True, weekly_seasonality=True)
model.fit(df[:-30])

future = model.make_future_dataframe(periods=30)
forecast = model.predict(future)

fig = model.plot(forecast)

ua_holidays = pd.DataFrame({
    'holiday': 'ukrainian_holidays',
    'ds': pd.to_datetime(['2025-01-01', '2025-01-07', '2025-03-08', '2025-08-24', '2025-12-25']),
    'lower_window': -1,
    'upper_window': 1
})
model = Prophet(holidays=ua_holidays)

In [None]:
# XGBoost

def create_features(df):
    df = df.copy()
    
    # Базові часові ознаки
    df['dayofweek'] = df.index.dayofweek
    df['month'] = df.index.month
    df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)
    
    # Лагові ознаки — попередні значення
    df['lag_1'] = df['sales'].shift(1)
    df['lag_7'] = df['sales'].shift(7)
    df['lag_30'] = df['sales'].shift(30)
    
    # Ковзні середні
    df['rolling_mean_7'] = df['sales'].shift(1).rolling(7).mean()
    df['rolling_mean_30'] = df['sales'].shift(1).rolling(30).mean()
    
    return df

from xgboost import XGBRegressor

df = create_features(df)
df = df.dropna()

feature_cols = ['dayofweek', 'month', 'is_weekend', 'lag_1', 'lag_7', 'lag_30', 'rolling_mean_7', 'rolling_mean_30']
X = df[feature_cols]
y = df['sales']

X_train, X_test = X[:-30], X[-30:]
y_train, y_test = y[:-30], y[-30:]

model = XGBRegressor(n_estimators=500, max_depth=6, learning_rate=0.05)
model.fit(X_train, y_train)
predictions = model.predict(X_test)

importance = pd.DataFrame({
    'feature': feature_cols,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)
print(importance)

import lightgbm as lgb
model = lgb.LGBMRegressor(n_estimators=500, max_depth=6, learning_rate=0.05)
model.fit(X_train, y_train)

ensemble_pred = (pred_arima + pred_prophet + pred_xgb) / 3

weights = [0.25, 0.25, 0.5]
ensemble_pred = weights[0]*pred_arima + weights[1]*pred_prophet + weights[2]*pred_xgb

from sklearn.linear_model import Ridge

stacking_features = pd.DataFrame({
    'arima': pred_arima_validation,
    'prophet': pred_prophet_validation,
    'xgb': pred_xgb_validation
})

meta_model = Ridge()
meta_model.fit(stacking_features, y_validation)

In [None]:
# LSTM

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

# Нормалізація даних — для нейронок це обов'язково
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[['sales']])

# Створюємо послідовності: кожен приклад — це 60 попередніх днів
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data, 60)

# Архітектура мережі
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(60, 1)),
    Dropout(0.2),
    LSTM(32),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100

print(f'MAE: {mae:.2f}')
print(f'RMSE: {rmse:.2f}')
print(f'MAPE: {mape:.2f}%')

import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(y_test.index, y_test.values, label='Реальні значення', color='blue')
plt.plot(y_test.index, predictions, label='Прогноз', color='red', linestyle='--')
plt.xlabel('Дата')
plt.ylabel('Значення')
plt.title('Порівняння прогнозу з реальними даними')
plt.legend()
plt.savefig('forecast_comparison.png', dpi=150)
plt.show()

from sklearn.model_selection import TimeSeriesSplit

tscv = TimeSeriesSplit(n_splits=5)

scores = []
for train_idx, test_idx in tscv.split(X):
    X_train_cv, X_test_cv = X.iloc[train_idx], X.iloc[test_idx]
    y_train_cv, y_test_cv = y.iloc[train_idx], y.iloc[test_idx]
    
    model = XGBRegressor(n_estimators=100, max_depth=4)
    model.fit(X_train_cv, y_train_cv)
    
    pred = model.predict(X_test_cv)
    score = mean_absolute_error(y_test_cv, pred)
    scores.append(score)

print(f'Середня MAE по фолдах: {np.mean(scores):.2f} +/- {np.std(scores):.2f}')

df['sales'] = df['sales'].interpolate(method='linear')

df['sales'] = df['sales'].interpolate(method='time')