In [5]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error

df = pd.read_csv('new_data_task1\merged_order_df1.csv')
df['date'] = pd.to_datetime(df['order_purchase_timestamp'])
df['year'] = df.date.dt.year
df['month'] = df.date.dt.month
df['day'] = df.date.dt.day
df['dayofyear'] = df.date.dt.dayofyear
df['dayofweek'] = df.date.dt.dayofweek
df['date_day'] = pd.to_datetime(df[['year', 'month', 'day']])

df.set_index('date_day', inplace=True)
df.sort_index(inplace=True)

grouped = df.groupby(['product_category_name_english', pd.Grouper(freq='D')])['price'].sum().reset_index()

def forecast_demand_xgboost(grouped_df):
    forecasts = {}  
    all_categories_dfs = []  
    categories = grouped_df['product_category_name_english'].unique()  # Унікальні категорії товарів

    for category in categories[:]: 
        category_df = grouped_df[grouped_df['product_category_name_english'] == category].copy()

        # Заповнення відсутніх дат та значень ціни
        category_df.index = category_df['date_day']
        category_df = category_df.resample('D').asfreq()
        category_df['date_day'] = pd.to_datetime(category_df.index)
        category_df['year'] = category_df.date_day.dt.year
        category_df['month'] = category_df.date_day.dt.month
        category_df['day'] = category_df.date_day.dt.day
        category_df['dayofyear'] = category_df.date_day.dt.dayofyear
        category_df['dayofweek'] = category_df.date_day.dt.dayofweek
        category_df['product_category_name_english'] = category

        category_df['price'] = category_df['price'].fillna(0)

        # Створення затримок для моделі
        for i in range(1, 8):  # Додавання затримок для минулих 7 днів
            category_df[f'lag_{i}'] = category_df['price'].shift(i)

        train_size = int(len(category_df) * 0.8)
        train, test = category_df.iloc[:train_size], category_df.iloc[train_size:]

        # Ознаки та цільова змінна
        features = [col for col in category_df.columns if col.startswith('lag_')]
        target = 'price'

        model = xgb.XGBRegressor(objective ='reg:squarederror', colsample_bytree = 0.3, learning_rate = 0.1,
                    max_depth = 5, alpha = 10, n_estimators = 100)
        model.fit(train[features], train[target])

        forecast = model.predict(test[features])

        forecasts[category] = forecast
        all_categories_dfs.append(category_df)

    return forecasts, all_categories_dfs

demand_forecast_xgboost, all_categories_dfs_xgboost = forecast_demand_xgboost(grouped)