# Прогнозирование продаж и отгрузок для маркетплейсов

Этот notebook содержит полный цикл прогнозирования продаж и расчета отгрузок.

**Все классы включены в этот notebook - не требуются внешние модули.**


## 1. Импорт библиотек


In [None]:
# ============================================================================
# ИМПОРТ БИБЛИОТЕК
# ============================================================================
import sys
from pathlib import Path
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Опциональные библиотеки для продвинутых моделей
try:
    from sklearn.linear_model import LinearRegression
    from sklearn.feature_selection import SelectKBest, f_regression
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import mean_absolute_error, mean_squared_error
    SKLEARN_AVAILABLE = True
except ImportError:
    SKLEARN_AVAILABLE = False
    print("Warning: scikit-learn не установлен. Линейная регрессия недоступна.")

try:
    from statsmodels.tsa.arima.model import ARIMA
    from statsmodels.tsa.statespace.sarimax import SARIMAX
    STATSMODELS_AVAILABLE = True
except ImportError:
    STATSMODELS_AVAILABLE = False
    print("Warning: statsmodels не установлен. ARIMA модели недоступны.")

try:
    from prophet import Prophet
    PROPHET_AVAILABLE = True
except ImportError:
    PROPHET_AVAILABLE = False
    print("Warning: prophet не установлен. Prophet модель недоступна.")

try:
    import holidays
    HOLIDAYS_AVAILABLE = True
except ImportError:
    HOLIDAYS_AVAILABLE = False
    print("Warning: holidays не установлен. Праздники будут недоступны.")

# Настройка отображения
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
try:
    plt.style.use('seaborn-v0_8')
except:
    try:
        plt.style.use('seaborn')
    except:
        pass
sns.set_palette("husl")

print("✓ Библиотеки загружены")


## 2. Определение всех классов

Все классы определены ниже. Они будут использоваться в последующих ячейках.


### 2.1. Класс DataLoader - загрузка данных


In [None]:
# ============================================================================
# КЛАСС ДЛЯ ЗАГРУЗКИ ДАННЫХ
# ============================================================================

class DataLoader:
    """Класс для загрузки и предобработки данных"""
    
    def __init__(self, data_path: str = "data", file_paths: Dict[str, str] = None):
        """
        Инициализация
        
        Args:
            data_path: Путь к папке с данными (если файлы в одной папке)
            file_paths: Словарь с путями к отдельным файлам {имя_файла: путь}
        """
        self.data_path = Path(data_path) if data_path else None
        self.file_paths = file_paths if file_paths else {}
        self.wb_sales = None
        self.ozon_sales = None
        self.wb_stocks = None
        self.ozon_stocks = None
        self.our_stocks = None
        self.withdraw = None
        self.defecture = None
        self.historical_shipments = None
        
    def load_all_data(self) -> Dict[str, pd.DataFrame]:
        """Загружает все данные из файлов"""
        data = {}
        
        # Загрузка продаж
        try:
            self.wb_sales = self._load_sales("wb_sales")
            data['wb_sales'] = self.wb_sales
        except Exception as e:
            print(f"Ошибка загрузки wb_sales: {e}")
            
        try:
            self.ozon_sales = self._load_sales("ozon_sales")
            data['ozon_sales'] = self.ozon_sales
        except Exception as e:
            print(f"Ошибка загрузки ozon_sales: {e}")
        
        # Загрузка остатков
        try:
            self.wb_stocks = self._load_stocks("wb_stocks")
            data['wb_stocks'] = self.wb_stocks
        except Exception as e:
            print(f"Ошибка загрузки wb_stocks: {e}")
            
        try:
            self.ozon_stocks = self._load_stocks("ozon_stocks")
            data['ozon_stocks'] = self.ozon_stocks
        except Exception as e:
            print(f"Ошибка загрузки ozon_stocks: {e}")
        
        # Загрузка остатков на нашем складе
        try:
            self.our_stocks = self._load_our_stocks("our_stocks")
            data['our_stocks'] = self.our_stocks
        except Exception as e:
            print(f"Ошибка загрузки our_stocks: {e}")
        
        # Загрузка списка на вывод
        try:
            self.withdraw = self._load_withdraw("withdraw")
            data['withdraw'] = self.withdraw
        except Exception as e:
            print(f"Ошибка загрузки withdraw: {e}")
        
        # Загрузка дефектуры
        try:
            self.defecture = self._load_defecture("defecture")
            data['defecture'] = self.defecture
        except Exception as e:
            print(f"Ошибка загрузки defecture: {e}")
        
        return data
    
    def _get_file_path(self, filename: str, default_name: str = None) -> Path:
        """Получает путь к файлу из словаря путей или из папки данных"""
        # Сначала проверяем словарь путей
        if filename in self.file_paths:
            path = Path(self.file_paths[filename])
            if path.exists():
                return path
            else:
                print(f"⚠ Путь к файлу {filename} указан, но файл не найден: {path}")
        
        # Если не указан путь, ищем в папке данных
        if self.data_path:
            # Пробуем разные расширения
            for ext in ['.xlsx', '.xls', '.csv']:
                file_path = self.data_path / f"{default_name or filename}{ext}"
                if file_path.exists():
                    return file_path
        
        # Если ничего не найдено
        raise FileNotFoundError(f"Файл {filename} не найден. Проверьте путь в file_paths или наличие файла в {self.data_path}")
    
    def _load_sales(self, filename: str) -> pd.DataFrame:
        """Загружает данные о продажах"""
        file_path = self._get_file_path(filename, 'wb_sales' if 'wb' in filename else 'ozon_sales')
        
        # Загружаем Excel файл
        if file_path.suffix in ['.xlsx', '.xls']:
            df = pd.read_excel(file_path)
        elif file_path.suffix == '.csv':
            df = pd.read_csv(file_path)
        else:
            raise ValueError(f"Неподдерживаемый формат файла: {file_path.suffix}")
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        elif 'Годы (Дата)' in df.columns and 'Месяцы (Дата)' in df.columns:
            df['Дата'] = pd.to_datetime(
                df['Годы (Дата)'].astype(str) + '-' + 
                df['Месяцы (Дата)'].astype(str).str.zfill(2) + '-01',
                errors='coerce'
            )
        
        # Переименование колонок
        column_mapping = {
            'Количество упак.': 'quantity',
            'Унифицированный solo-code': 'unified_code',
            'solo-code': 'solo_code',
            'SKU': 'sku',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        # Группировка по дате и унифицированному коду
        if 'quantity' in df.columns and 'unified_code' in df.columns and 'date' in df.columns:
            df = df.groupby(['date', 'unified_code']).agg({
                'quantity': 'sum',
                'sku': 'first',
                'solo_code': 'first'
            }).reset_index()
        
        return df
    
    def _load_stocks(self, filename: str) -> pd.DataFrame:
        """Загружает данные об остатках на маркетплейсах"""
        file_path = self._get_file_path(filename, 'wb_stocks' if 'wb' in filename else 'ozon_stocks')
        
        # Загружаем Excel файл
        if file_path.suffix in ['.xlsx', '.xls']:
            df = pd.read_excel(file_path)
        elif file_path.suffix == '.csv':
            df = pd.read_csv(file_path)
        else:
            raise ValueError(f"Неподдерживаемый формат файла: {file_path.suffix}")
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        elif 'Годы (Дата)' in df.columns and 'Месяцы (Дата)' in df.columns:
            df['Дата'] = pd.to_datetime(
                df['Годы (Дата)'].astype(str) + '-' + 
                df['Месяцы (Дата)'].astype(str).str.zfill(2) + '-01',
                errors='coerce'
            )
        
        # Переименование колонок
        column_mapping = {
            'Остаток': 'stock',
            'Унифицированный solo-code': 'unified_code',
            'solo-code': 'solo_code',
            'SKU': 'sku',
            'Склад': 'warehouse',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        return df
    
    def _load_our_stocks(self, filename: str) -> pd.DataFrame:
        """Загружает данные об остатках на нашем складе"""
        file_path = self._get_file_path(filename, 'our_stocks')
        
        # Загружаем Excel файл
        if file_path.suffix in ['.xlsx', '.xls']:
            df = pd.read_excel(file_path)
        elif file_path.suffix == '.csv':
            df = pd.read_csv(file_path)
        else:
            raise ValueError(f"Неподдерживаемый формат файла: {file_path.suffix}")
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        elif 'Годы (Дата)' in df.columns and 'Месяцы (Дата)' in df.columns:
            df['Дата'] = pd.to_datetime(
                df['Годы (Дата)'].astype(str) + '-' + 
                df['Месяцы (Дата)'].astype(str).str.zfill(2) + '-01',
                errors='coerce'
            )
        
        # Переименование колонок
        column_mapping = {
            'Остаток': 'stock',
            'Унифицированный solo-code': 'unified_code',
            'SKU': 'sku',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        return df
    
    def _load_withdraw(self, filename: str) -> pd.DataFrame:
        """Загружает список продуктов на вывод"""
        try:
            file_path = self._get_file_path(filename, 'withdraw')
            
            # Загружаем Excel файл
            if file_path.suffix in ['.xlsx', '.xls']:
                df = pd.read_excel(file_path)
            elif file_path.suffix == '.csv':
                df = pd.read_csv(file_path)
            else:
                return pd.DataFrame(columns=['unified_code', 'sku'])
        except FileNotFoundError:
            return pd.DataFrame(columns=['unified_code', 'sku'])
        
        column_mapping = {
            'Унифицированный solo-code': 'unified_code',
            'SKU': 'sku'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        return df
    
    def _load_defecture(self, filename: str) -> pd.DataFrame:
        """Загружает список продуктов в дефектуре"""
        try:
            file_path = self._get_file_path(filename, 'defecture')
            
            # Загружаем Excel файл
            if file_path.suffix in ['.xlsx', '.xls']:
                df = pd.read_excel(file_path)
            elif file_path.suffix == '.csv':
                df = pd.read_csv(file_path)
            else:
                return pd.DataFrame(columns=['unified_code', 'sku', 'end_date'])
        except FileNotFoundError:
            return pd.DataFrame(columns=['unified_code', 'sku', 'end_date'])
        
        column_mapping = {
            'Унифицированный solo-code': 'unified_code',
            'SKU': 'sku',
            'Дата окончания дефектуры': 'end_date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        if 'end_date' in df.columns:
            df['end_date'] = pd.to_datetime(df['end_date'], errors='coerce')
        
        return df
    
    def load_historical_shipments(self, file_path: str = None) -> pd.DataFrame:
        """
        Загружает исторические данные отгрузок
        
        Args:
            file_path: Полный путь к файлу с историческими отгрузками
        """
        # Если передан путь, используем его
        if file_path:
            path = Path(file_path)
            if not path.exists():
                print(f"⚠ Файл не найден: {path}")
                return pd.DataFrame()
        # Иначе ищем в словаре путей
        elif 'historical_shipments' in self.file_paths:
            path = Path(self.file_paths['historical_shipments'])
            if not path.exists():
                print(f"⚠ Файл не найден: {path}")
                return pd.DataFrame()
        # Или в папке данных
        elif self.data_path:
            path = None
            for ext in ['.xlsx', '.xls', '.csv']:
                test_path = self.data_path / f"Отгрузки в МП{ext}"
                if test_path.exists():
                    path = test_path
                    break
            if not path:
                print(f"⚠ Файл 'Отгрузки в МП' не найден в {self.data_path}")
                return pd.DataFrame()
        else:
            print("⚠ Не указан путь к файлу исторических отгрузок")
            return pd.DataFrame()
        
        # Загружаем файл
        if path.suffix in ['.xlsx', '.xls']:
            df = pd.read_excel(path)
        elif path.suffix == '.csv':
            df = pd.read_csv(path)
        else:
            print(f"⚠ Неподдерживаемый формат файла: {path.suffix}")
            return pd.DataFrame()
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        
        # Переименование колонок
        column_mapping = {
            'Унифицированный solo-code': 'unified_code',
            'Кол-во упаково': 'quantity',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        # Группировка по дате и продукту
        if 'quantity' in df.columns and 'unified_code' in df.columns and 'date' in df.columns:
            df = df.groupby(['date', 'unified_code']).agg({
                'quantity': 'sum'
            }).reset_index()
        
        self.historical_shipments = df
        return df
    
    def prepare_sales_data(self, marketplace: str = 'wb') -> pd.DataFrame:
        """Подготавливает данные о продажах для прогнозирования"""
        if marketplace == 'wb':
            sales_df = self.wb_sales.copy() if self.wb_sales is not None else pd.DataFrame()
        else:
            sales_df = self.ozon_sales.copy() if self.ozon_sales is not None else pd.DataFrame()
        
        if sales_df.empty:
            return pd.DataFrame()
        
        sales_df = sales_df.sort_values(['unified_code', 'date'])
        return sales_df
    
    def _process_sales_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Обрабатывает сырые данные о продажах"""
        df = df.copy()
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        elif 'Годы (Дата)' in df.columns and 'Месяцы (Дата)' in df.columns:
            df['Дата'] = pd.to_datetime(
                df['Годы (Дата)'].astype(str) + '-' + 
                df['Месяцы (Дата)'].astype(str).str.zfill(2) + '-01',
                errors='coerce'
            )
        
        # Переименование колонок
        column_mapping = {
            'Количество упак.': 'quantity',
            'Унифицированный solo-code': 'unified_code',
            'solo-code': 'solo_code',
            'SKU': 'sku',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        # Группировка по дате и унифицированному коду
        if 'quantity' in df.columns and 'unified_code' in df.columns and 'date' in df.columns:
            df = df.groupby(['date', 'unified_code']).agg({
                'quantity': 'sum',
                'sku': 'first',
                'solo_code': 'first'
            }).reset_index()
        
        return df
    
    def _process_stocks_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Обрабатывает сырые данные об остатках на маркетплейсах"""
        df = df.copy()
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        elif 'Годы (Дата)' in df.columns and 'Месяцы (Дата)' in df.columns:
            df['Дата'] = pd.to_datetime(
                df['Годы (Дата)'].astype(str) + '-' + 
                df['Месяцы (Дата)'].astype(str).str.zfill(2) + '-01',
                errors='coerce'
            )
        
        # Переименование колонок
        column_mapping = {
            'Остаток': 'stock',
            'Унифицированный solo-code': 'unified_code',
            'solo-code': 'solo_code',
            'SKU': 'sku',
            'Склад': 'warehouse',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        return df
    
    def _process_our_stocks_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Обрабатывает сырые данные об остатках на нашем складе"""
        df = df.copy()
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        elif 'Годы (Дата)' in df.columns and 'Месяцы (Дата)' in df.columns:
            df['Дата'] = pd.to_datetime(
                df['Годы (Дата)'].astype(str) + '-' + 
                df['Месяцы (Дата)'].astype(str).str.zfill(2) + '-01',
                errors='coerce'
            )
        
        # Переименование колонок
        column_mapping = {
            'Остаток': 'stock',
            'Унифицированный solo-code': 'unified_code',
            'SKU': 'sku',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        return df
    
    def _process_withdraw_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Обрабатывает сырые данные о продуктах на вывод"""
        df = df.copy()
        
        column_mapping = {
            'Унифицированный solo-code': 'unified_code',
            'SKU': 'sku'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        return df
    
    def _process_defecture_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Обрабатывает сырые данные о дефектуре"""
        df = df.copy()
        
        column_mapping = {
            'Унифицированный solo-code': 'unified_code',
            'SKU': 'sku',
            'Дата окончания дефектуры': 'end_date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        if 'end_date' in df.columns:
            df['end_date'] = pd.to_datetime(df['end_date'], errors='coerce')
        
        return df
    
    def _process_historical_shipments_data(self, df: pd.DataFrame) -> pd.DataFrame:
        """Обрабатывает сырые данные об исторических отгрузках"""
        df = df.copy()
        
        # Обработка даты
        if 'Дата' in df.columns:
            df['Дата'] = pd.to_datetime(df['Дата'], errors='coerce')
        
        # Переименование колонок
        column_mapping = {
            'Унифицированный solo-code': 'unified_code',
            'Кол-во упаково': 'quantity',
            'Дата': 'date'
        }
        
        for old_col, new_col in column_mapping.items():
            if old_col in df.columns:
                df = df.rename(columns={old_col: new_col})
        
        # Группировка по дате и продукту
        if 'quantity' in df.columns and 'unified_code' in df.columns and 'date' in df.columns:
            df = df.groupby(['date', 'unified_code']).agg({
                'quantity': 'sum'
            }).reset_index()
        
        return df

print("✓ Класс DataLoader определен")


### 2.2. Класс CalendarFeatures - календарные признаки


In [None]:
# ============================================================================
# КЛАСС ДЛЯ КАЛЕНДАРНЫХ ПРИЗНАКОВ
# ============================================================================

class CalendarFeatures:
    """Класс для создания календарных признаков"""
    
    def __init__(self, country='RU'):
        self.country = country
        if HOLIDAYS_AVAILABLE:
            self.ru_holidays = holidays.Russia(years=range(2020, 2030))
        else:
            self.ru_holidays = {}
        
        # Даты черной пятницы
        self.ozon_black_friday_dates = [
            pd.Timestamp('2023-11-24'),
            pd.Timestamp('2024-11-29'),
            pd.Timestamp('2025-11-28'),
        ]
        
        self.wb_black_friday_dates = [
            pd.Timestamp('2023-11-24'),
            pd.Timestamp('2024-11-29'),
            pd.Timestamp('2025-11-28'),
        ]
    
    def add_calendar_features(self, df: pd.DataFrame, marketplace: str = 'wb') -> pd.DataFrame:
        """Добавляет календарные признаки к DataFrame"""
        df = df.copy()
        
        if 'date' not in df.columns:
            return df
        
        # Базовые признаки
        df['year'] = df['date'].dt.year
        df['month'] = df['date'].dt.month
        df['day'] = df['date'].dt.day
        df['day_of_week'] = df['date'].dt.dayofweek
        df['day_of_year'] = df['date'].dt.dayofyear
        df['week_of_year'] = df['date'].dt.isocalendar().week
        df['quarter'] = df['date'].dt.quarter
        
        # Выходные дни
        df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
        
        # Праздники
        if HOLIDAYS_AVAILABLE:
            df['is_holiday'] = df['date'].apply(
                lambda x: 1 if x in self.ru_holidays else 0
            )
        else:
            df['is_holiday'] = 0
        
        # Черная пятница
        if marketplace == 'ozon':
            black_friday_dates = self.ozon_black_friday_dates
        else:
            black_friday_dates = self.wb_black_friday_dates
        
        df['is_black_friday'] = df['date'].apply(
            lambda x: 1 if x in black_friday_dates else 0
        )
        
        # Период вокруг черной пятницы
        df['is_black_friday_period'] = 0
        for bf_date in black_friday_dates:
            period_start = bf_date - timedelta(days=7)
            period_end = bf_date + timedelta(days=7)
            mask = (df['date'] >= period_start) & (df['date'] <= period_end)
            df.loc[mask, 'is_black_friday_period'] = 1
        
        # Новогодние праздники
        df['is_new_year_period'] = (
            ((df['month'] == 12) & (df['day'] >= 20)) |
            ((df['month'] == 1) & (df['day'] <= 10))
        ).astype(int)
        
        # Летний период
        df['is_summer'] = df['month'].isin([6, 7, 8]).astype(int)
        
        # Бинарные признаки для месяцев
        for month in range(1, 13):
            df[f'month_{month}'] = (df['month'] == month).astype(int)
        
        # Бинарные признаки для дней недели
        for day in range(7):
            df[f'day_of_week_{day}'] = (df['day_of_week'] == day).astype(int)
        
        return df
    
    def get_future_calendar_features(self, start_date: pd.Timestamp, 
                                     periods: int = 18, 
                                     marketplace: str = 'wb') -> pd.DataFrame:
        """Создает календарные признаки для будущих дат"""
        dates = pd.date_range(start=start_date, periods=periods * 30, freq='D')
        dates = dates[:periods * 30]
        
        df = pd.DataFrame({'date': dates})
        df = self.add_calendar_features(df, marketplace=marketplace)
        return df
    
    def add_black_friday_dates(self, ozon_dates: list, wb_dates: list):
        """Добавляет даты черной пятницы"""
        self.ozon_black_friday_dates = [pd.Timestamp(d) for d in ozon_dates]
        self.wb_black_friday_dates = [pd.Timestamp(d) for d in wb_dates]

print("✓ Класс CalendarFeatures определен")


### 2.3. Модели прогнозирования


In [None]:
# ============================================================================
# МОДЕЛИ ПРОГНОЗИРОВАНИЯ
# ============================================================================

# Baseline модель
class BaselineModel:
    """Простая baseline модель"""
    
    def __init__(self, method: str = 'mean'):
        self.method = method
        self.fitted_values = {}
    
    def fit(self, data: pd.DataFrame, unified_code: str):
        if data.empty or 'quantity' not in data.columns:
            self.fitted_values[unified_code] = 0
            return
        
        if self.method == 'mean':
            value = data['quantity'].mean()
        elif self.method == 'median':
            value = data['quantity'].median()
        elif self.method == 'last':
            value = data['quantity'].iloc[-1] if len(data) > 0 else 0
        else:
            value = data['quantity'].mean()
        
        self.fitted_values[unified_code] = value if not pd.isna(value) else 0
    
    def predict(self, unified_code: str, periods: int = 18) -> np.ndarray:
        if unified_code not in self.fitted_values:
            return np.zeros(periods)
        value = self.fitted_values[unified_code]
        return np.full(periods, value)
    
    def get_model_name(self) -> str:
        return f"Baseline ({self.method})"

print("✓ BaselineModel определен")


In [None]:
# Линейная регрессия
if SKLEARN_AVAILABLE:
    class LinearRegressionModel:
        """Линейная регрессия с подбором фичей"""
        
        def __init__(self, use_feature_selection: bool = True, k_features: int = 10):
            self.use_feature_selection = use_feature_selection
            self.k_features = k_features
            self.models = {}
            self.scalers = {}
            self.selected_features = {}
            self.feature_names = []
        
        def fit(self, data: pd.DataFrame, unified_code: str, feature_columns: List[str] = None):
            if data.empty or 'quantity' not in data.columns:
                self.models[unified_code] = None
                return
            
            if feature_columns is None:
                feature_columns = [col for col in data.columns 
                                if col not in ['date', 'quantity', 'unified_code', 'sku', 'solo_code']]
            
            if not feature_columns:
                self.models[unified_code] = None
                return
            
            X = data[feature_columns].fillna(0)
            y = data['quantity'].fillna(0)
            
            if len(X) < 2:
                self.models[unified_code] = None
                return
            
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            self.scalers[unified_code] = scaler
            
            if self.use_feature_selection and len(feature_columns) > self.k_features:
                selector = SelectKBest(f_regression, k=min(self.k_features, len(feature_columns)))
                X_selected = selector.fit_transform(X_scaled, y)
                self.selected_features[unified_code] = selector.get_support()
                selected_feature_names = [feature_columns[i] for i in range(len(feature_columns)) 
                                        if self.selected_features[unified_code][i]]
            else:
                X_selected = X_scaled
                self.selected_features[unified_code] = np.ones(len(feature_columns), dtype=bool)
                selected_feature_names = feature_columns
            
            self.feature_names = selected_feature_names
            
            model = LinearRegression()
            model.fit(X_selected, y)
            self.models[unified_code] = model
        
        def predict(self, unified_code: str, future_features: pd.DataFrame, periods: int = 18) -> np.ndarray:
            if unified_code not in self.models or self.models[unified_code] is None:
                return np.zeros(periods)
            
            if unified_code not in self.scalers:
                return np.zeros(periods)
            
            feature_columns = [col for col in future_features.columns 
                             if col not in ['date', 'unified_code', 'sku', 'solo_code']]
            
            if not feature_columns:
                return np.zeros(periods)
            
            X = future_features[feature_columns].fillna(0)
            X_scaled = self.scalers[unified_code].transform(X)
            
            if unified_code in self.selected_features:
                X_selected = X_scaled[:, self.selected_features[unified_code]]
            else:
                X_selected = X_scaled
            
            predictions = self.models[unified_code].predict(X_selected)
            predictions = np.maximum(predictions, 0)
            
            return predictions[:periods]
        
        def get_model_name(self) -> str:
            return "Linear Regression (Feature Selection)" if self.use_feature_selection else "Linear Regression"
    
    class BinaryLinearRegressionModel:
        """Линейная регрессия с бинарными признаками"""
        
        def __init__(self):
            self.models = {}
            self.scalers = {}
            self.feature_names = []
        
        def fit(self, data: pd.DataFrame, unified_code: str):
            if data.empty or 'quantity' not in data.columns:
                self.models[unified_code] = None
                return
            
            binary_features = [col for col in data.columns 
                              if col.startswith('is_') or 
                              col.startswith('month_') or 
                              col.startswith('day_of_week_')]
            
            if not binary_features:
                self.models[unified_code] = None
                return
            
            X = data[binary_features].fillna(0)
            y = data['quantity'].fillna(0)
            
            if len(X) < 2:
                self.models[unified_code] = None
                return
            
            scaler = StandardScaler()
            X_scaled = scaler.fit_transform(X)
            self.scalers[unified_code] = scaler
            self.feature_names = binary_features
            
            model = LinearRegression()
            model.fit(X_scaled, y)
            self.models[unified_code] = model
        
        def predict(self, unified_code: str, future_features: pd.DataFrame, periods: int = 18) -> np.ndarray:
            if unified_code not in self.models or self.models[unified_code] is None:
                return np.zeros(periods)
            
            if unified_code not in self.scalers:
                return np.zeros(periods)
            
            binary_features = [col for col in future_features.columns 
                              if col.startswith('is_') or 
                              col.startswith('month_') or 
                              col.startswith('day_of_week_')]
            
            if not binary_features:
                return np.zeros(periods)
            
            available_features = [f for f in self.feature_names if f in binary_features]
            
            if not available_features:
                return np.zeros(periods)
            
            X = future_features[available_features].fillna(0)
            
            missing_features = [f for f in self.feature_names if f not in available_features]
            for feature in missing_features:
                X[feature] = 0
            
            X = X[self.feature_names]
            X_scaled = self.scalers[unified_code].transform(X)
            predictions = self.models[unified_code].predict(X_scaled)
            predictions = np.maximum(predictions, 0)
            
            return predictions[:periods]
        
        def get_model_name(self) -> str:
            return "Linear Regression (Binary Features)"
    
    print("✓ LinearRegressionModel и BinaryLinearRegressionModel определены")
else:
    print("⚠ LinearRegressionModel недоступны (scikit-learn не установлен)")


In [None]:
# ARIMA модели
if STATSMODELS_AVAILABLE:
    class ARIMAModel:
        """ARIMA модель прогнозирования"""
        
        def __init__(self, order: Tuple[int, int, int] = (1, 1, 1), 
                     seasonal_order: Tuple[int, int, int, int] = None):
            self.order = order
            self.seasonal_order = seasonal_order
            self.models = {}
            self.is_sarima = seasonal_order is not None
        
        def fit(self, data: pd.DataFrame, unified_code: str):
            if data.empty or 'quantity' not in data.columns:
                self.models[unified_code] = None
                return
            
            if len(data) < max(self.order) + 1:
                self.models[unified_code] = None
                return
            
            try:
                ts = data['quantity'].fillna(0).values
                
                if self.is_sarima:
                    model = SARIMAX(ts, order=self.order, seasonal_order=self.seasonal_order)
                else:
                    model = ARIMA(ts, order=self.order)
                
                fitted_model = model.fit(disp=False)
                self.models[unified_code] = fitted_model
                
            except Exception as e:
                print(f"Ошибка обучения ARIMA для {unified_code}: {e}")
                self.models[unified_code] = None
        
        def predict(self, unified_code: str, periods: int = 18) -> np.ndarray:
            if unified_code not in self.models or self.models[unified_code] is None:
                return np.zeros(periods)
            
            try:
                forecast = self.models[unified_code].forecast(steps=periods)
                forecast = np.maximum(forecast, 0)
                return forecast
            except Exception as e:
                print(f"Ошибка прогноза ARIMA для {unified_code}: {e}")
                return np.zeros(periods)
        
        def get_model_name(self) -> str:
            if self.is_sarima:
                return f"SARIMA{self.order}x{self.seasonal_order}"
            return f"ARIMA{self.order}"
    
    class SARIMAXModel:
        """SARIMAX модель с экзогенными переменными"""
        
        def __init__(self, order: Tuple[int, int, int] = (1, 1, 1),
                     seasonal_order: Tuple[int, int, int, int] = (1, 1, 1, 12)):
            self.order = order
            self.seasonal_order = seasonal_order
            self.models = {}
            self.exog_columns = []
        
        def fit(self, data: pd.DataFrame, unified_code: str, exog_columns: list = None):
            if data.empty or 'quantity' not in data.columns:
                self.models[unified_code] = None
                return
            
            if len(data) < max(self.order) + max(self.seasonal_order[:3]) + 1:
                self.models[unified_code] = None
                return
            
            try:
                ts = data['quantity'].fillna(0).values
                
                if exog_columns:
                    exog = data[exog_columns].fillna(0).values
                    self.exog_columns = exog_columns
                else:
                    exog = None
                    self.exog_columns = []
                
                model = SARIMAX(ts, exog=exog, order=self.order, 
                              seasonal_order=self.seasonal_order)
                fitted_model = model.fit(disp=False)
                self.models[unified_code] = fitted_model
                
            except Exception as e:
                print(f"Ошибка обучения SARIMAX для {unified_code}: {e}")
                self.models[unified_code] = None
        
        def predict(self, unified_code: str, future_exog: pd.DataFrame = None, 
                    periods: int = 18) -> np.ndarray:
            if unified_code not in self.models or self.models[unified_code] is None:
                return np.zeros(periods)
            
            try:
                if self.exog_columns and future_exog is not None:
                    exog = future_exog[self.exog_columns].fillna(0).values[:periods]
                else:
                    exog = None
                
                forecast = self.models[unified_code].forecast(steps=periods, exog=exog)
                forecast = np.maximum(forecast, 0)
                return forecast
            except Exception as e:
                print(f"Ошибка прогноза SARIMAX для {unified_code}: {e}")
                return np.zeros(periods)
        
        def get_model_name(self) -> str:
            return f"SARIMAX{self.order}x{self.seasonal_order}"
    
    print("✓ ARIMAModel и SARIMAXModel определены")
else:
    print("⚠ ARIMA модели недоступны (statsmodels не установлен)")


In [None]:
# Prophet модель
if PROPHET_AVAILABLE:
    class ProphetModel:
        """Prophet модель прогнозирования"""
        
        def __init__(self, yearly_seasonality: bool = True,
                     weekly_seasonality: bool = True,
                     daily_seasonality: bool = False,
                     holidays: pd.DataFrame = None):
            self.yearly_seasonality = yearly_seasonality
            self.weekly_seasonality = weekly_seasonality
            self.daily_seasonality = daily_seasonality
            self.holidays = holidays
            self.models = {}
        
        def fit(self, data: pd.DataFrame, unified_code: str):
            if data.empty or 'quantity' not in data.columns:
                self.models[unified_code] = None
                return
            
            if len(data) < 2:
                self.models[unified_code] = None
                return
            
            try:
                prophet_data = pd.DataFrame({
                    'ds': pd.to_datetime(data['date']),
                    'y': data['quantity'].fillna(0).values
                })
                
                model = Prophet(
                    yearly_seasonality=self.yearly_seasonality,
                    weekly_seasonality=self.weekly_seasonality,
                    daily_seasonality=self.daily_seasonality,
                    holidays=self.holidays
                )
                
                model.fit(prophet_data)
                self.models[unified_code] = model
                
            except Exception as e:
                print(f"Ошибка обучения Prophet для {unified_code}: {e}")
                self.models[unified_code] = None
        
        def predict(self, unified_code: str, periods: int = 18, freq: str = 'D') -> np.ndarray:
            if unified_code not in self.models or self.models[unified_code] is None:
                return np.zeros(periods)
            
            try:
                if freq == 'D':
                    future_periods = periods * 30
                else:
                    future_periods = periods
                
                future = self.models[unified_code].make_future_dataframe(
                    periods=future_periods, freq=freq
                )
                
                forecast = self.models[unified_code].predict(future)
                predictions = forecast['yhat'].tail(future_periods).values
                predictions = np.maximum(predictions, 0)
                
                if freq == 'D' and periods < future_periods:
                    monthly_predictions = []
                    for i in range(periods):
                        start_idx = i * 30
                        end_idx = min((i + 1) * 30, len(predictions))
                        monthly_predictions.append(predictions[start_idx:end_idx].sum())
                    return np.array(monthly_predictions)
                
                return predictions[:periods]
                
            except Exception as e:
                print(f"Ошибка прогноза Prophet для {unified_code}: {e}")
                return np.zeros(periods)
        
        def get_model_name(self) -> str:
            return "Prophet"
    
    print("✓ ProphetModel определен")
else:
    print("⚠ ProphetModel недоступна (prophet не установлен)")


### 2.4. Вспомогательные классы


In [None]:
# ============================================================================
# КЛАСС ДЛЯ ОЦЕНКИ МОДЕЛЕЙ
# ============================================================================

class ModelEvaluator:
    """Класс для оценки качества моделей прогнозирования"""
    
    def __init__(self):
        self.evaluation_results = {}
    
    def evaluate_model(self, y_true: np.ndarray, y_pred: np.ndarray, 
                      model_name: str, unified_code: str) -> Dict:
        """Оценивает качество модели"""
        if len(y_true) == 0 or len(y_pred) == 0:
            return {
                'mae': np.inf,
                'rmse': np.inf,
                'mape': np.inf,
                'r2': -np.inf
            }
        
        min_len = min(len(y_true), len(y_pred))
        y_true = y_true[:min_len]
        y_pred = y_pred[:min_len]
        
        mask = np.isfinite(y_true) & np.isfinite(y_pred) & (y_true > 0)
        if mask.sum() == 0:
            return {
                'mae': np.inf,
                'rmse': np.inf,
                'mape': np.inf,
                'r2': -np.inf
            }
        
        y_true_filtered = y_true[mask]
        y_pred_filtered = y_pred[mask]
        
        mae = mean_absolute_error(y_true_filtered, y_pred_filtered) if SKLEARN_AVAILABLE else np.mean(np.abs(y_true_filtered - y_pred_filtered))
        rmse = np.sqrt(mean_squared_error(y_true_filtered, y_pred_filtered)) if SKLEARN_AVAILABLE else np.sqrt(np.mean((y_true_filtered - y_pred_filtered) ** 2))
        mape = np.mean(np.abs((y_true_filtered - y_pred_filtered) / y_true_filtered)) * 100
        
        ss_res = np.sum((y_true_filtered - y_pred_filtered) ** 2)
        ss_tot = np.sum((y_true_filtered - np.mean(y_true_filtered)) ** 2)
        r2 = 1 - (ss_res / ss_tot) if ss_tot > 0 else -np.inf
        
        results = {
            'mae': mae,
            'rmse': rmse,
            'mape': mape,
            'r2': r2,
            'model_name': model_name,
            'unified_code': unified_code
        }
        
        key = f"{unified_code}_{model_name}"
        self.evaluation_results[key] = results
        
        return results
    
    def cross_validate(self, data: pd.DataFrame, model, unified_code: str,
                      train_size: float = 0.8) -> Dict:
        """Кросс-валидация модели на исторических данных"""
        if data.empty or len(data) < 10:
            return {
                'mae': np.inf,
                'rmse': np.inf,
                'mape': np.inf,
                'r2': -np.inf
            }
        
        split_idx = int(len(data) * train_size)
        train_data = data.iloc[:split_idx].copy()
        test_data = data.iloc[split_idx:].copy()
        
        if len(test_data) == 0:
            return {
                'mae': np.inf,
                'rmse': np.inf,
                'mape': np.inf,
                'r2': -np.inf
            }
        
        try:
            model.fit(train_data, unified_code)
            
            if hasattr(model, 'predict'):
                periods = len(test_data)
                if hasattr(model, 'predict') and len(test_data) > 0:
                    y_pred = model.predict(unified_code, periods=periods)
                else:
                    y_pred = model.predict(unified_code, periods=len(test_data))
            else:
                return {
                    'mae': np.inf,
                    'rmse': np.inf,
                    'mape': np.inf,
                    'r2': -np.inf
                }
            
            y_true = test_data['quantity'].values
            
            return self.evaluate_model(y_true, y_pred, model.get_model_name(), unified_code)
            
        except Exception as e:
            print(f"Ошибка кросс-валидации для {unified_code}: {e}")
            return {
                'mae': np.inf,
                'rmse': np.inf,
                'mape': np.inf,
                'r2': -np.inf
            }
    
    def select_best_model(self, unified_code: str, metric: str = 'mape') -> str:
        """Выбирает лучшую модель для продукта"""
        product_results = {
            k: v for k, v in self.evaluation_results.items()
            if k.startswith(f"{unified_code}_")
        }
        
        if not product_results:
            return None
        
        if metric in ['mae', 'rmse', 'mape']:
            best_key = min(product_results.keys(), 
                          key=lambda k: product_results[k].get(metric, np.inf))
        else:
            best_key = max(product_results.keys(),
                          key=lambda k: product_results[k].get(metric, -np.inf))
        
        return product_results[best_key]['model_name']
    
    def get_evaluation_summary(self) -> pd.DataFrame:
        """Возвращает сводку по оценке всех моделей"""
        if not self.evaluation_results:
            return pd.DataFrame()
        
        results_list = []
        for key, results in self.evaluation_results.items():
            results_list.append(results)
        
        return pd.DataFrame(results_list)
    
    def get_best_models_summary(self) -> pd.DataFrame:
        """Возвращает сводку по лучшим моделям для каждого продукта"""
        if not self.evaluation_results:
            return pd.DataFrame()
        
        products = set()
        for key in self.evaluation_results.keys():
            unified_code = key.split('_')[0]
            products.add(unified_code)
        
        best_models = []
        for product in products:
            best_model = self.select_best_model(product)
            if best_model:
                key = f"{product}_{best_model}"
                if key in self.evaluation_results:
                    metrics = self.evaluation_results[key].copy()
                    metrics['best_model'] = best_model
                    best_models.append(metrics)
        
        return pd.DataFrame(best_models)

print("✓ Класс ModelEvaluator определен")


In [None]:
# ============================================================================
# КЛАСС ДЛЯ РАСЧЕТА ОТГРУЗОК
# ============================================================================

class ShipmentCalculator:
    """Класс для расчета отгрузок"""
    
    def __init__(self, coverage_coefficient: float = 1.5):
        self.coverage_coefficient = coverage_coefficient
    
    def calculate_shipments(self, forecast: pd.DataFrame, 
                           stocks: pd.DataFrame,
                           marketplace: str = 'wb') -> pd.DataFrame:
        """Рассчитывает отгрузки по складам на основе прогноза продаж"""
        shipments = []
        
        forecast['year_month'] = forecast['date'].dt.to_period('M')
        forecast_monthly = forecast.groupby(['year_month', 'unified_code'])['quantity'].sum().reset_index()
        forecast_monthly['date'] = forecast_monthly['year_month'].dt.to_timestamp()
        
        if not stocks.empty and 'date' in stocks.columns:
            latest_date = stocks['date'].max()
            latest_stocks = stocks[stocks['date'] == latest_date].copy()
        else:
            latest_stocks = stocks.copy()
        
        if not latest_stocks.empty:
            stocks_by_product_warehouse = latest_stocks.groupby(
                ['unified_code', 'warehouse']
            )['stock'].sum().reset_index()
        else:
            stocks_by_product_warehouse = pd.DataFrame(columns=['unified_code', 'warehouse', 'stock'])
        
        for _, row in forecast_monthly.iterrows():
            date = row['date']
            unified_code = row['unified_code']
            forecasted_sales_monthly = row['quantity']
            
            product_stocks = stocks_by_product_warehouse[
                stocks_by_product_warehouse['unified_code'] == unified_code
            ].copy()
            
            if product_stocks.empty:
                continue
            
            required_stock_total = forecasted_sales_monthly * self.coverage_coefficient
            current_stock_total = product_stocks['stock'].sum()
            
            if current_stock_total < required_stock_total:
                total_shipment_needed = required_stock_total - current_stock_total
                
                if current_stock_total > 0:
                    product_stocks['shipment'] = (
                        total_shipment_needed * product_stocks['stock'] / current_stock_total
                    )
                else:
                    n_warehouses = len(product_stocks)
                    product_stocks['shipment'] = total_shipment_needed / n_warehouses
                
                for _, stock_row in product_stocks.iterrows():
                    if stock_row['shipment'] > 0:
                        shipments.append({
                            'date': date,
                            'warehouse': stock_row['warehouse'],
                            'unified_code': unified_code,
                            'forecasted_sales': forecasted_sales_monthly,
                            'current_stock': stock_row['stock'],
                            'required_stock': required_stock_total,
                            'shipment': stock_row['shipment']
                        })
            else:
                for _, stock_row in product_stocks.iterrows():
                    warehouse = stock_row['warehouse']
                    current_stock = stock_row['stock']
                    
                    avg_required_per_warehouse = required_stock_total / len(product_stocks)
                    if current_stock < avg_required_per_warehouse:
                        shipment = avg_required_per_warehouse - current_stock
                        shipments.append({
                            'date': date,
                            'warehouse': warehouse,
                            'unified_code': unified_code,
                            'forecasted_sales': forecasted_sales_monthly,
                            'current_stock': current_stock,
                            'required_stock': avg_required_per_warehouse,
                            'shipment': shipment
                        })
        
        if not shipments:
            return pd.DataFrame(columns=['date', 'warehouse', 'unified_code', 'shipment'])
        
        result = pd.DataFrame(shipments)
        return result
    
    def analyze_shipment_calculation(self, historical_sales: pd.DataFrame,
                                   historical_stocks: pd.DataFrame,
                                   historical_shipments: pd.DataFrame = None) -> Dict:
        """Анализирует исторические данные для понимания логики расчета отгрузок"""
        analysis = {}
        
        if historical_sales.empty or historical_stocks.empty:
            return analysis
        
        sales_grouped = historical_sales.groupby(['date', 'unified_code'])['quantity'].sum().reset_index()
        products = sales_grouped['unified_code'].unique()[:10]
        
        coverage_ratios = []
        
        for product in products:
            product_sales = sales_grouped[sales_grouped['unified_code'] == product]
            product_stocks = historical_stocks[historical_stocks['unified_code'] == product]
            
            if product_sales.empty or product_stocks.empty:
                continue
            
            common_dates = set(product_sales['date']) & set(product_stocks['date'])
            
            for date in list(common_dates)[:5]:
                sales = product_sales[product_sales['date'] == date]['quantity'].sum()
                stocks = product_stocks[product_stocks['date'] == date]['stock'].sum()
                
                if sales > 0:
                    ratio = stocks / sales
                    coverage_ratios.append(ratio)
        
        if coverage_ratios:
            analysis['avg_coverage_ratio'] = np.mean(coverage_ratios)
            analysis['median_coverage_ratio'] = np.median(coverage_ratios)
            analysis['min_coverage_ratio'] = np.min(coverage_ratios)
            analysis['max_coverage_ratio'] = np.max(coverage_ratios)
        
        if historical_shipments is not None and not historical_shipments.empty:
            shipments_grouped = historical_shipments.groupby(['date', 'unified_code'])['quantity'].sum().reset_index()
            common_dates = set(sales_grouped['date']) & set(shipments_grouped['date'])
            
            shipment_ratios = []
            coverage_from_shipments = []
            
            for date in list(common_dates)[:20]:
                date_sales = sales_grouped[sales_grouped['date'] == date]
                date_shipments = shipments_grouped[shipments_grouped['date'] == date]
                
                common_products = set(date_sales['unified_code']) & set(date_shipments['unified_code'])
                
                for product in common_products:
                    sales = date_sales[date_sales['unified_code'] == product]['quantity'].sum()
                    shipments = date_shipments[date_shipments['unified_code'] == product]['quantity'].sum()
                    
                    if sales > 0:
                        ratio = shipments / sales
                        shipment_ratios.append(ratio)
                    
                    if shipments > 0:
                        date_stocks = historical_stocks[
                            (historical_stocks['date'] == date) & 
                            (historical_stocks['unified_code'] == product)
                        ]
                        if not date_stocks.empty:
                            total_stock = date_stocks['stock'].sum()
                            if sales > 0:
                                coverage = (total_stock + shipments) / sales
                                coverage_from_shipments.append(coverage)
            
            if shipment_ratios:
                analysis['avg_shipment_ratio'] = np.mean(shipment_ratios)
                analysis['median_shipment_ratio'] = np.median(shipment_ratios)
            
            if coverage_from_shipments:
                analysis['avg_coverage_from_shipments'] = np.mean(coverage_from_shipments)
                analysis['median_coverage_from_shipments'] = np.median(coverage_from_shipments)
        
        return analysis
    
    def set_coverage_coefficient(self, coefficient: float):
        """Устанавливает коэффициент покрытия"""
        self.coverage_coefficient = coefficient

print("✓ Класс ShipmentCalculator определен")


In [None]:
# ============================================================================
# КЛАСС ДЛЯ ПРИМЕНЕНИЯ ОГРАНИЧЕНИЙ
# ============================================================================

class Constraints:
    """Класс для применения ограничений"""
    
    def __init__(self, box_sizes: Dict[str, int] = None):
        self.box_sizes = box_sizes if box_sizes else {}
        self.default_box_size = 24
    
    def apply_withdraw_constraints(self, forecast: pd.DataFrame,
                                  withdraw_list: pd.DataFrame,
                                  wb_stocks: pd.DataFrame,
                                  ozon_stocks: pd.DataFrame,
                                  our_stocks: pd.DataFrame) -> pd.DataFrame:
        """Применяет ограничения для продуктов на вывод"""
        forecast = forecast.copy()
        
        if withdraw_list.empty:
            return forecast
        
        withdraw_codes = set(withdraw_list['unified_code'].unique())
        latest_wb_stocks = self._get_latest_stocks(wb_stocks)
        latest_ozon_stocks = self._get_latest_stocks(ozon_stocks)
        latest_our_stocks = self._get_latest_stocks(our_stocks)
        
        for unified_code in withdraw_codes:
            if unified_code not in forecast['unified_code'].values:
                continue
            
            wb_total = latest_wb_stocks.get(unified_code, 0)
            ozon_total = latest_ozon_stocks.get(unified_code, 0)
            our_total = latest_our_stocks.get(unified_code, 0)
            
            total_stock = wb_total + ozon_total + our_total
            
            if total_stock <= 0:
                forecast.loc[forecast['unified_code'] == unified_code, 'quantity'] = 0
        
        return forecast
    
    def apply_defecture_constraints(self, forecast: pd.DataFrame,
                                   defecture_list: pd.DataFrame,
                                   wb_stocks: pd.DataFrame,
                                   ozon_stocks: pd.DataFrame) -> pd.DataFrame:
        """Применяет ограничения для продуктов в дефектуре"""
        forecast = forecast.copy()
        
        if defecture_list.empty:
            return forecast
        
        latest_wb_stocks = self._get_latest_stocks(wb_stocks)
        latest_ozon_stocks = self._get_latest_stocks(ozon_stocks)
        
        for _, defecture_row in defecture_list.iterrows():
            unified_code = defecture_row['unified_code']
            end_date = defecture_row.get('end_date', pd.Timestamp.max)
            
            if unified_code not in forecast['unified_code'].values:
                continue
            
            future_dates = forecast[
                (forecast['unified_code'] == unified_code) & 
                (forecast['date'] > end_date)
            ]
            
            if len(future_dates) == 0:
                wb_total = latest_wb_stocks.get(unified_code, 0)
                ozon_total = latest_ozon_stocks.get(unified_code, 0)
                
                total_stock = wb_total + ozon_total
                
                if total_stock <= 0:
                    forecast.loc[forecast['unified_code'] == unified_code, 'quantity'] = 0
        
        return forecast
    
    def apply_box_constraints(self, shipments: pd.DataFrame,
                             box_sizes: Dict[str, int] = None) -> pd.DataFrame:
        """Округляет отгрузки до размера короба"""
        shipments = shipments.copy()
        
        if shipments.empty:
            return shipments
        
        if box_sizes is None:
            box_sizes = self.box_sizes
        
        for idx, row in shipments.iterrows():
            unified_code = row['unified_code']
            shipment = row['shipment']
            
            if box_sizes and unified_code in box_sizes:
                box_size = box_sizes[unified_code]
            elif isinstance(box_sizes, dict) and 'default' in box_sizes:
                box_size = box_sizes['default']
            else:
                box_size = self.default_box_size
            
            boxes = np.ceil(shipment / box_size)
            rounded_shipment = boxes * box_size
            
            shipments.loc[idx, 'shipment'] = rounded_shipment
            shipments.loc[idx, 'boxes'] = boxes
        
        return shipments
    
    def apply_shipment_withdraw_constraints(self, shipments: pd.DataFrame,
                                          withdraw_list: pd.DataFrame) -> pd.DataFrame:
        """Обнуляет отгрузки для продуктов на вывод"""
        shipments = shipments.copy()
        
        if withdraw_list.empty:
            return shipments
        
        withdraw_codes = set(withdraw_list['unified_code'].unique())
        shipments.loc[shipments['unified_code'].isin(withdraw_codes), 'shipment'] = 0
        
        return shipments
    
    def apply_shipment_defecture_constraints(self, shipments: pd.DataFrame,
                                           defecture_list: pd.DataFrame) -> pd.DataFrame:
        """Обнуляет отгрузки для продуктов в дефектуре"""
        shipments = shipments.copy()
        
        if defecture_list.empty:
            return shipments
        
        defecture_codes = set(defecture_list['unified_code'].unique())
        shipments.loc[shipments['unified_code'].isin(defecture_codes), 'shipment'] = 0
        
        return shipments
    
    def _get_latest_stocks(self, stocks: pd.DataFrame) -> Dict[str, float]:
        """Получает последние остатки по продуктам"""
        if stocks.empty:
            return {}
        
        if 'date' in stocks.columns:
            latest_date = stocks['date'].max()
            latest_stocks = stocks[stocks['date'] == latest_date]
        else:
            latest_stocks = stocks
        
        if 'unified_code' in latest_stocks.columns and 'stock' in latest_stocks.columns:
            result = latest_stocks.groupby('unified_code')['stock'].sum().to_dict()
            return result
        
        return {}
    
    def set_box_sizes(self, box_sizes: Dict[str, int]):
        """Устанавливает размеры коробов"""
        self.box_sizes = box_sizes

print("✓ Класс Constraints определен")


In [None]:
# ============================================================================
# КЛАСС ДЛЯ УПРАВЛЕНИЯ ПРОГНОЗАМИ
# ============================================================================

class ForecastManager:
    """Класс для управления прогнозами"""
    
    def __init__(self, storage_path: str = "forecast_history"):
        self.storage_path = Path(storage_path)
        self.storage_path.mkdir(exist_ok=True)
    
    def save_forecast(self, forecast: pd.DataFrame, model_name: str,
                     marketplace: str, forecast_date: datetime = None,
                     metadata: Dict = None):
        """Сохраняет прогноз в историю"""
        if forecast_date is None:
            forecast_date = datetime.now()
        
        filename = f"{marketplace}_{model_name}_{forecast_date.strftime('%Y%m%d_%H%M%S')}.csv"
        filepath = self.storage_path / filename
        
        forecast.to_csv(filepath, index=False)
        
        if metadata:
            import json
            metadata_filename = f"{marketplace}_{model_name}_{forecast_date.strftime('%Y%m%d_%H%M%S')}_metadata.json"
            metadata_filepath = self.storage_path / metadata_filename
            
            with open(metadata_filepath, 'w', encoding='utf-8') as f:
                json.dump(metadata, f, ensure_ascii=False, indent=2, default=str)
    
    def load_forecast(self, marketplace: str, model_name: str,
                     forecast_date: str = None) -> pd.DataFrame:
        """Загружает прогноз из истории"""
        if forecast_date:
            filename = f"{marketplace}_{model_name}_{forecast_date}.csv"
        else:
            pattern = f"{marketplace}_{model_name}_*.csv"
            files = list(self.storage_path.glob(pattern))
            if not files:
                return pd.DataFrame()
            filename = max(files, key=lambda p: p.stat().st_mtime).name
        
        filepath = self.storage_path / filename
        if not filepath.exists():
            return pd.DataFrame()
        
        return pd.read_csv(filepath)
    
    def get_forecast_history(self, marketplace: str = None,
                            model_name: str = None,
                            unified_code: str = None) -> pd.DataFrame:
        """Получает историю прогнозов"""
        all_forecasts = []
        
        pattern = "*_*_*.csv"
        if marketplace:
            pattern = f"{marketplace}_*_*.csv"
        if model_name:
            pattern = f"*_{model_name}_*.csv"
        
        files = list(self.storage_path.glob(pattern))
        
        for filepath in files:
            try:
                forecast = pd.read_csv(filepath)
                
                parts = filepath.stem.split('_')
                if len(parts) >= 3:
                    forecast['marketplace'] = parts[0]
                    forecast['model_name'] = parts[1]
                    forecast['forecast_date'] = '_'.join(parts[2:])
                
                all_forecasts.append(forecast)
            except Exception as e:
                print(f"Ошибка загрузки {filepath}: {e}")
        
        if not all_forecasts:
            return pd.DataFrame()
        
        result = pd.concat(all_forecasts, ignore_index=True)
        
        if unified_code:
            result = result[result['unified_code'] == unified_code]
        
        return result

print("✓ Класс ForecastManager определен")


### 2.5. Главный класс SalesForecaster


In [None]:
# ============================================================================
# ГЛАВНЫЙ КЛАСС ДЛЯ ПРОГНОЗИРОВАНИЯ
# ============================================================================

class SalesForecaster:
    """Главный класс для прогнозирования продаж и отгрузок"""
    
    def __init__(self, data_path: str = "data", forecast_months: int = 18):
        self.data_path = data_path
        self.forecast_months = forecast_months
        
        self.data_loader = DataLoader(data_path)
        self.calendar_features = CalendarFeatures()
        self.evaluator = ModelEvaluator()
        self.shipment_calculator = ShipmentCalculator()
        self.constraints = Constraints()
        self.forecast_manager = ForecastManager()
        
        self.data = {}
        self.models = {}
        
    def load_data(self):
        """Загружает все данные"""
        print("Загрузка данных...")
        self.data = self.data_loader.load_all_data()
        print("Данные загружены")
    
    def prepare_models(self):
        """Подготавливает модели для прогнозирования"""
        print("Подготовка моделей...")
        
        self.models['baseline_mean'] = BaselineModel(method='mean')
        self.models['baseline_median'] = BaselineModel(method='median')
        self.models['baseline_last'] = BaselineModel(method='last')
        
        if SKLEARN_AVAILABLE:
            self.models['linear_regression'] = LinearRegressionModel(use_feature_selection=True)
            self.models['binary_linear_regression'] = BinaryLinearRegressionModel()
        
        if STATSMODELS_AVAILABLE:
            try:
                self.models['arima'] = ARIMAModel(order=(1, 1, 1))
                self.models['sarima'] = ARIMAModel(
                    order=(1, 1, 1),
                    seasonal_order=(1, 1, 1, 12)
                )
                self.models['sarimax'] = SARIMAXModel(
                    order=(1, 1, 1),
                    seasonal_order=(1, 1, 1, 12)
                )
            except:
                pass
        
        if PROPHET_AVAILABLE:
            try:
                holidays_df = self._prepare_prophet_holidays()
                self.models['prophet'] = ProphetModel(holidays=holidays_df)
            except:
                pass
        
        print(f"Подготовлено {len(self.models)} моделей")
    
    def _prepare_prophet_holidays(self) -> pd.DataFrame:
        """Подготавливает праздники для Prophet"""
        holidays_list = []
        
        if not HOLIDAYS_AVAILABLE:
            return None
        
        ru_holidays = holidays.Russia(years=range(2020, 2030))
        for date, name in ru_holidays.items():
            holidays_list.append({
                'ds': pd.Timestamp(date),
                'holiday': name
            })
        
        for bf_date in self.calendar_features.ozon_black_friday_dates:
            holidays_list.append({
                'ds': bf_date,
                'holiday': 'Black Friday Ozon'
            })
        
        for bf_date in self.calendar_features.wb_black_friday_dates:
            holidays_list.append({
                'ds': bf_date,
                'holiday': 'Black Friday WB'
            })
        
        return pd.DataFrame(holidays_list) if holidays_list else None
    
    def forecast_sales(self, marketplace: str = 'wb', 
                      unified_code: str = None,
                      evaluate: bool = True) -> Dict[str, pd.DataFrame]:
        """Прогнозирует продажи для маркетплейса"""
        print(f"Прогнозирование продаж для {marketplace}...")
        
        sales_data = self.data_loader.prepare_sales_data(marketplace)
        if sales_data.empty:
            print(f"Нет данных о продажах для {marketplace}")
            return {}
        
        if unified_code:
            products = [unified_code]
        else:
            products = sales_data['unified_code'].unique()
        
        all_forecasts = {}
        
        for product in products:
            print(f"  Прогнозирование для продукта {product}...")
            
            product_data = sales_data[sales_data['unified_code'] == product].copy()
            if product_data.empty:
                continue
            
            product_data = self.calendar_features.add_calendar_features(
                product_data, marketplace=marketplace
            )
            
            last_date = product_data['date'].max()
            future_dates = pd.date_range(
                start=last_date + timedelta(days=1),
                periods=self.forecast_months * 30,
                freq='D'
            )[:self.forecast_months * 30]
            
            future_df = pd.DataFrame({'date': future_dates})
            future_df = self.calendar_features.add_calendar_features(
                future_df, marketplace=marketplace
            )
            future_df['unified_code'] = product
            
            product_forecasts = {}
            
            for model_name, model in self.models.items():
                try:
                    if model_name in ['linear_regression', 'binary_linear_regression']:
                        model.fit(product_data, product)
                        forecast_values = model.predict(product, future_df, periods=len(future_dates))
                    elif model_name == 'sarimax':
                        exog_cols = [col for col in product_data.columns 
                                   if col.startswith('is_') or col.startswith('month_')]
                        if exog_cols:
                            model.fit(product_data, product, exog_columns=exog_cols)
                            future_exog = future_df[exog_cols]
                            forecast_values = model.predict(product, future_exog, periods=len(future_dates))
                        else:
                            forecast_values = np.zeros(len(future_dates))
                    else:
                        model.fit(product_data, product)
                        forecast_values = model.predict(product, periods=len(future_dates))
                    
                    forecast_df = pd.DataFrame({
                        'date': future_dates[:len(forecast_values)],
                        'unified_code': product,
                        'quantity': forecast_values,
                        'model_name': model_name
                    })
                    
                    product_forecasts[model_name] = forecast_df
                    
                    if evaluate and len(product_data) > 10:
                        try:
                            self.evaluator.cross_validate(
                                product_data, model, product
                            )
                        except Exception as e:
                            print(f"    Ошибка оценки модели {model_name}: {e}")
                    
                except Exception as e:
                    print(f"    Ошибка прогнозирования моделью {model_name} для {product}: {e}")
                    continue
            
            for model_name, forecast_df in product_forecasts.items():
                if model_name not in all_forecasts:
                    all_forecasts[model_name] = []
                all_forecasts[model_name].append(forecast_df)
        
        result = {}
        for model_name, forecasts_list in all_forecasts.items():
            if forecasts_list:
                result[model_name] = pd.concat(forecasts_list, ignore_index=True)
        
        return result
    
    def select_best_forecasts(self, forecasts: Dict[str, pd.DataFrame],
                             marketplace: str) -> pd.DataFrame:
        """Выбирает лучшие прогнозы для каждого продукта"""
        print("Выбор лучших прогнозов...")
        
        best_forecasts = []
        all_products = set()
        for forecast_df in forecasts.values():
            all_products.update(forecast_df['unified_code'].unique())
        
        for product in all_products:
            best_model = self.evaluator.select_best_model(product, metric='mape')
            
            if best_model and best_model in forecasts:
                product_forecast = forecasts[best_model][
                    forecasts[best_model]['unified_code'] == product
                ].copy()
                product_forecast['selected_model'] = best_model
                best_forecasts.append(product_forecast)
        
        if not best_forecasts:
            return pd.DataFrame()
        
        return pd.concat(best_forecasts, ignore_index=True)
    
    def apply_constraints_to_forecast(self, forecast: pd.DataFrame) -> pd.DataFrame:
        """Применяет ограничения к прогнозу"""
        print("Применение ограничений к прогнозу...")
        
        if 'withdraw' in self.data and not self.data['withdraw'].empty:
            forecast = self.constraints.apply_withdraw_constraints(
                forecast,
                self.data['withdraw'],
                self.data.get('wb_stocks', pd.DataFrame()),
                self.data.get('ozon_stocks', pd.DataFrame()),
                self.data.get('our_stocks', pd.DataFrame())
            )
        
        if 'defecture' in self.data and not self.data['defecture'].empty:
            forecast = self.constraints.apply_defecture_constraints(
                forecast,
                self.data['defecture'],
                self.data.get('wb_stocks', pd.DataFrame()),
                self.data.get('ozon_stocks', pd.DataFrame())
            )
        
        return forecast
    
    def calculate_shipments(self, forecast: pd.DataFrame,
                           marketplace: str = 'wb') -> pd.DataFrame:
        """Рассчитывает отгрузки на основе прогноза"""
        print(f"Расчет отгрузок для {marketplace}...")
        
        stocks = self.data.get(f'{marketplace}_stocks', pd.DataFrame())
        if stocks.empty:
            print(f"Нет данных об остатках для {marketplace}")
            return pd.DataFrame()
        
        forecast_for_shipment = forecast.copy()
        if 'date' in forecast_for_shipment.columns:
            forecast_for_shipment['date'] = pd.to_datetime(forecast_for_shipment['date'])
            forecast_for_shipment['year_month'] = forecast_for_shipment['date'].dt.to_period('M')
            forecast_monthly = forecast_for_shipment.groupby(
                ['year_month', 'unified_code']
            ).agg({
                'quantity': 'sum'
            }).reset_index()
            forecast_monthly['date'] = forecast_monthly['year_month'].dt.to_timestamp()
            forecast_for_shipment = forecast_monthly[['date', 'unified_code', 'quantity']]
        
        shipments = self.shipment_calculator.calculate_shipments(
            forecast_for_shipment, stocks, marketplace=marketplace
        )
        
        if 'withdraw' in self.data and not self.data['withdraw'].empty:
            shipments = self.constraints.apply_shipment_withdraw_constraints(
                shipments, self.data['withdraw']
            )
        
        if 'defecture' in self.data and not self.data['defecture'].empty:
            shipments = self.constraints.apply_shipment_defecture_constraints(
                shipments, self.data['defecture']
            )
        
        shipments = self.constraints.apply_box_constraints(shipments)
        
        return shipments

print("✓ Класс SalesForecaster определен")


## 3. Загрузка данных

Теперь все классы определены. Можно загружать данные и запускать прогнозирование.


## 2. Загрузка данных


In [None]:
# Путь к данным
# Укажите правильный путь к папке с данными
data_path = '../data'  # Если notebook в Downloads, а данные в Desktop/project/data
# Или:
# data_path = 'data'  # Если notebook в папке проекта

# Создание загрузчика данных
loader = DataLoader(data_path)

# Загрузка всех данных
data = loader.load_all_data()

# Загрузка исторических отгрузок
historical_shipments = loader.load_historical_shipments('Отгрузки в МП')

print("Загруженные данные:")
for key, df in data.items():
    if df is not None and not df.empty:
        print(f"  {key}: {len(df)} записей")
    else:
        print(f"  {key}: нет данных")

if historical_shipments is not None and not historical_shipments.empty:
    print(f"\nИсторические отгрузки: {len(historical_shipments)} записей")
    print(historical_shipments.head())
else:
    print("\nИсторические отгрузки: файл не найден или пуст")


In [None]:
# Эта ячейка удалена - импорты уже есть в ячейке 2
# Все классы импортированы из модулей forecasting


## 4. Анализ исторических данных отгрузок


In [None]:
if historical_shipments is not None and not historical_shipments.empty:
    print("="*60)
    print("АНАЛИЗ ИСТОРИЧЕСКИХ ДАННЫХ ОТГРУЗОК")
    print("="*60)
    
    # Анализ связи между продажами и отгрузками
    shipment_calc = ShipmentCalculator()
    
    # Анализ для Wildberries
    if 'wb_sales' in data and not data['wb_sales'].empty:
        print("\n📊 Анализ отгрузок для Wildberries:")
        wb_analysis = shipment_calc.analyze_shipment_calculation(
            data['wb_sales'],
            data.get('wb_stocks', pd.DataFrame()),
            historical_shipments
        )
        if wb_analysis:
            print("  Результаты анализа:")
            for key, value in wb_analysis.items():
                if isinstance(value, (int, float)):
                    print(f"    - {key}: {value:.2f}")
        else:
            print("  ⚠ Недостаточно данных для анализа")
    
    # Анализ для Ozon
    if 'ozon_sales' in data and not data['ozon_sales'].empty:
        print("\n📊 Анализ отгрузок для Ozon:")
        ozon_analysis = shipment_calc.analyze_shipment_calculation(
            data['ozon_sales'],
            data.get('ozon_stocks', pd.DataFrame()),
            historical_shipments
        )
        if ozon_analysis:
            print("  Результаты анализа:")
            for key, value in ozon_analysis.items():
                if isinstance(value, (int, float)):
                    print(f"    - {key}: {value:.2f}")
        else:
            print("  ⚠ Недостаточно данных для анализа")
    
    # Визуализация отгрузок
    print("\n📈 Построение графиков...")
    fig, axes = plt.subplots(2, 1, figsize=(15, 10))
    
    # График отгрузок по времени
    shipments_by_date = historical_shipments.groupby('date')['quantity'].sum()
    axes[0].plot(shipments_by_date.index, shipments_by_date.values, marker='o', linewidth=2)
    axes[0].set_title('Исторические отгрузки по времени', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('Дата', fontsize=12)
    axes[0].set_ylabel('Количество упаковок', fontsize=12)
    axes[0].grid(True, alpha=0.3)
    
    # Топ продуктов по отгрузкам
    top_products = historical_shipments.groupby('unified_code')['quantity'].sum().sort_values(ascending=False).head(10)
    axes[1].barh(range(len(top_products)), top_products.values, color='steelblue')
    axes[1].set_yticks(range(len(top_products)))
    axes[1].set_yticklabels(top_products.index)
    axes[1].set_title('Топ-10 продуктов по отгрузкам', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Количество упаковок', fontsize=12)
    axes[1].grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.show()
    print("✓ Графики построены")
else:
    print("⚠ Исторические отгрузки отсутствуют - анализ пропущен")


## 5. Настройка параметров прогнозирования


In [None]:
if historical_shipments is not None and not historical_shipments.empty:
    # Анализ связи между продажами и отгрузками
    shipment_calc = ShipmentCalculator()
    
    # Анализ для Wildberries
    if 'wb_sales' in data and not data['wb_sales'].empty:
        print("Анализ отгрузок для Wildberries:")
        wb_analysis = shipment_calc.analyze_shipment_calculation(
            data['wb_sales'],
            data.get('wb_stocks', pd.DataFrame()),
            historical_shipments
        )
        print(wb_analysis)
    
    # Анализ для Ozon
    if 'ozon_sales' in data and not data['ozon_sales'].empty:
        print("\nАнализ отгрузок для Ozon:")
        ozon_analysis = shipment_calc.analyze_shipment_calculation(
            data['ozon_sales'],
            data.get('ozon_stocks', pd.DataFrame()),
            historical_shipments
        )
        print(ozon_analysis)
    
    # Визуализация отгрузок
    fig, axes = plt.subplots(2, 1, figsize=(15, 10))
    
    # График отгрузок по времени
    shipments_by_date = historical_shipments.groupby('date')['quantity'].sum()
    axes[0].plot(shipments_by_date.index, shipments_by_date.values)
    axes[0].set_title('Исторические отгрузки по времени')
    axes[0].set_xlabel('Дата')
    axes[0].set_ylabel('Количество упаковок')
    axes[0].grid(True)
    
    # Топ продуктов по отгрузкам
    top_products = historical_shipments.groupby('unified_code')['quantity'].sum().sort_values(ascending=False).head(10)
    axes[1].barh(range(len(top_products)), top_products.values)
    axes[1].set_yticks(range(len(top_products)))
    axes[1].set_yticklabels(top_products.index)
    axes[1].set_title('Топ-10 продуктов по отгрузкам')
    axes[1].set_xlabel('Количество упаковок')
    
    plt.tight_layout()
    plt.show()


## 6. Прогнозирование продаж для Wildberries


In [None]:
# Создание прогнозировщика
forecaster = SalesForecaster(
    data_path=data_path,
    forecast_months=18
)

# Загрузка данных в прогнозировщик
forecaster.data = data
forecaster.data['historical_shipments'] = historical_shipments

# Настройка коэффициента покрытия (можно настроить на основе анализа)
coverage_coefficient = 1.5  # По умолчанию, можно изменить на основе анализа
if historical_shipments is not None and not historical_shipments.empty:
    # Если есть анализ, можно использовать его для настройки
    # Проверяем, был ли выполнен анализ для WB
    if 'wb_sales' in data and not data['wb_sales'].empty:
        shipment_calc = ShipmentCalculator()
        wb_analysis = shipment_calc.analyze_shipment_calculation(
            data['wb_sales'],
            data.get('wb_stocks', pd.DataFrame()),
            historical_shipments
        )
        if wb_analysis:
            if 'avg_coverage_from_shipments' in wb_analysis:
                coverage_coefficient = wb_analysis['avg_coverage_from_shipments']
                print(f"Коэффициент покрытия рассчитан на основе исторических данных: {coverage_coefficient:.2f}")
            elif 'avg_coverage_ratio' in wb_analysis:
                coverage_coefficient = wb_analysis['avg_coverage_ratio']
                print(f"Коэффициент покрытия рассчитан на основе остатков: {coverage_coefficient:.2f}")

forecaster.shipment_calculator.set_coverage_coefficient(coverage_coefficient)
print(f"Используемый коэффициент покрытия: {coverage_coefficient}")

# Размеры коробов
box_sizes = {
    'default': 24  # По умолчанию 24 штуки в коробе
}
forecaster.constraints.set_box_sizes(box_sizes)
print(f"Размер короба по умолчанию: {box_sizes['default']} шт.")

# Даты черной пятницы
ozon_bf = ['2023-11-24', '2024-11-29', '2025-11-28']
wb_bf = ['2023-11-24', '2024-11-29', '2025-11-28']
forecaster.calendar_features.add_black_friday_dates(ozon_bf, wb_bf)
print(f"Черная пятница настроена")

# Подготовка моделей
forecaster.prepare_models()
print(f"\nПодготовлено {len(forecaster.models)} моделей")


## 7. Выбор лучших прогнозов и применение ограничений


In [None]:
# Прогнозирование всеми моделями
wb_forecasts = forecaster.forecast_sales(marketplace='wb', evaluate=True)

print(f"Создано прогнозов: {len(wb_forecasts)}")
for model_name, forecast_df in wb_forecasts.items():
    print(f"  {model_name}: {len(forecast_df)} записей")


## 8. Расчет отгрузок для Wildberries


In [None]:
# Выбор лучших прогнозов для каждого продукта
wb_best_forecast = forecaster.select_best_forecasts(wb_forecasts, marketplace='wb')

print(f"Лучший прогноз: {len(wb_best_forecast)} записей")
print(f"Уникальных продуктов: {wb_best_forecast['unified_code'].nunique()}")
print(f"\nРаспределение по моделям:")
print(wb_best_forecast['selected_model'].value_counts())

# Применение ограничений
wb_best_forecast = forecaster.apply_constraints_to_forecast(wb_best_forecast)

print(f"\nОбщий прогноз продаж: {wb_best_forecast['quantity'].sum():.0f} шт.")


## 9. Прогнозирование для Ozon


In [None]:
# Расчет отгрузок
wb_shipments = forecaster.calculate_shipments(wb_best_forecast, marketplace='wb')

print(f"Отгрузки: {len(wb_shipments)} записей")
if not wb_shipments.empty:
    print(f"Уникальных складов: {wb_shipments['warehouse'].nunique()}")
    print(f"Уникальных продуктов: {wb_shipments['unified_code'].nunique()}")
    print(f"Общая отгрузка: {wb_shipments['shipment'].sum():.0f} шт.")
    print(f"\nТоп-10 продуктов по отгрузкам:")
    top_shipments = wb_shipments.groupby('unified_code')['shipment'].sum().sort_values(ascending=False).head(10)
    print(top_shipments)
    
    # Визуализация отгрузок
    fig, axes = plt.subplots(2, 1, figsize=(15, 10))
    
    # Отгрузки по времени
    shipments_by_date = wb_shipments.groupby('date')['shipment'].sum()
    axes[0].plot(shipments_by_date.index, shipments_by_date.values, marker='o')
    axes[0].set_title('Прогнозные отгрузки по времени (WB)')
    axes[0].set_xlabel('Дата')
    axes[0].set_ylabel('Количество упаковок')
    axes[0].grid(True)
    
    # Отгрузки по складам
    shipments_by_warehouse = wb_shipments.groupby('warehouse')['shipment'].sum().sort_values(ascending=False)
    axes[1].barh(range(len(shipments_by_warehouse)), shipments_by_warehouse.values)
    axes[1].set_yticks(range(len(shipments_by_warehouse)))
    axes[1].set_yticklabels(shipments_by_warehouse.index)
    axes[1].set_title('Отгрузки по складам (WB)')
    axes[1].set_xlabel('Количество упаковок')
    
    plt.tight_layout()
    plt.show()
else:
    print("Нет данных об отгрузках")


## 10. Оценка моделей


In [None]:
# Прогнозирование для Ozon
ozon_forecasts = forecaster.forecast_sales(marketplace='ozon', evaluate=True)
ozon_best_forecast = forecaster.select_best_forecasts(ozon_forecasts, marketplace='ozon')
ozon_best_forecast = forecaster.apply_constraints_to_forecast(ozon_best_forecast)
ozon_shipments = forecaster.calculate_shipments(ozon_best_forecast, marketplace='ozon')

print(f"Ozon - Прогноз продаж: {ozon_best_forecast['quantity'].sum():.0f} шт.")
if not ozon_shipments.empty:
    print(f"Ozon - Отгрузки: {ozon_shipments['shipment'].sum():.0f} шт.")
    print(f"Ozon - Складов: {ozon_shipments['warehouse'].nunique()}")


## 9. Оценка моделей


In [None]:
# Сводка по оценке моделей
evaluation_summary = forecaster.evaluator.get_evaluation_summary()
best_models_summary = forecaster.evaluator.get_best_models_summary()

if not evaluation_summary.empty:
    print("Оценка моделей:")
    print(evaluation_summary.head(20))
    
    # Визуализация метрик
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # MAE по моделям
    mae_by_model = evaluation_summary.groupby('model_name')['mae'].mean().sort_values()
    axes[0, 0].barh(range(len(mae_by_model)), mae_by_model.values)
    axes[0, 0].set_yticks(range(len(mae_by_model)))
    axes[0, 0].set_yticklabels(mae_by_model.index)
    axes[0, 0].set_title('Средний MAE по моделям')
    axes[0, 0].set_xlabel('MAE')
    
    # MAPE по моделям
    mape_by_model = evaluation_summary.groupby('model_name')['mape'].mean().sort_values()
    axes[0, 1].barh(range(len(mape_by_model)), mape_by_model.values)
    axes[0, 1].set_yticks(range(len(mape_by_model)))
    axes[0, 1].set_yticklabels(mape_by_model.index)
    axes[0, 1].set_title('Средний MAPE по моделям')
    axes[0, 1].set_xlabel('MAPE (%)')
    
    # R² по моделям
    r2_by_model = evaluation_summary.groupby('model_name')['r2'].mean().sort_values(ascending=False)
    axes[1, 0].barh(range(len(r2_by_model)), r2_by_model.values)
    axes[1, 0].set_yticks(range(len(r2_by_model)))
    axes[1, 0].set_yticklabels(r2_by_model.index)
    axes[1, 0].set_title('Средний R² по моделям')
    axes[1, 0].set_xlabel('R²')
    
    # Распределение лучших моделей
    if not best_models_summary.empty:
        best_models_dist = best_models_summary['best_model'].value_counts()
        axes[1, 1].pie(best_models_dist.values, labels=best_models_dist.index, autopct='%1.1f%%')
        axes[1, 1].set_title('Распределение лучших моделей')
    
    plt.tight_layout()
    plt.show()

if not best_models_summary.empty:
    print("\nЛучшие модели по продуктам:")
    print(best_models_summary.head(20))


## 10. Сохранение результатов


In [None]:
# Сохранение результатов
output_dir = Path('../output')
output_dir.mkdir(exist_ok=True)

# Прогнозы продаж
wb_best_forecast.to_csv(output_dir / 'wb_forecast.csv', index=False, encoding='utf-8-sig')
ozon_best_forecast.to_csv(output_dir / 'ozon_forecast.csv', index=False, encoding='utf-8-sig')
print("Прогнозы продаж сохранены")

# Отгрузки
if not wb_shipments.empty:
    wb_shipments.to_csv(output_dir / 'wb_shipments.csv', index=False, encoding='utf-8-sig')
    print("Отгрузки WB сохранены")
if not ozon_shipments.empty:
    ozon_shipments.to_csv(output_dir / 'ozon_shipments.csv', index=False, encoding='utf-8-sig')
    print("Отгрузки Ozon сохранены")

# Оценка моделей
if not evaluation_summary.empty:
    evaluation_summary.to_csv(output_dir / 'model_evaluation.csv', index=False, encoding='utf-8-sig')
    print("Оценка моделей сохранена")
if not best_models_summary.empty:
    best_models_summary.to_csv(output_dir / 'best_models.csv', index=False, encoding='utf-8-sig')
    print("Лучшие модели сохранены")

print(f"\nВсе результаты сохранены в папку {output_dir}")


## 11. Пример анализа конкретного продукта


In [None]:
# Выберите продукт для анализа
if not wb_best_forecast.empty:
    product_code = wb_best_forecast['unified_code'].iloc[0]
    print(f"Анализ продукта: {product_code}")
    
    # Исторические продажи
    if 'wb_sales' in data and not data['wb_sales'].empty:
        product_sales = data['wb_sales'][data['wb_sales']['unified_code'] == product_code].copy()
        product_sales = product_sales.sort_values('date')
        
        # Прогноз
        product_forecast = wb_best_forecast[wb_best_forecast['unified_code'] == product_code].copy()
        product_forecast = product_forecast.sort_values('date')
        
        # Визуализация
        fig, ax = plt.subplots(figsize=(15, 6))
        
        # Исторические продажи
        ax.plot(product_sales['date'], product_sales['quantity'], 
                label='Исторические продажи', marker='o', alpha=0.7)
        
        # Прогноз
        ax.plot(product_forecast['date'], product_forecast['quantity'], 
                label='Прогноз', marker='s', linestyle='--', alpha=0.7)
        
        ax.set_title(f'Продажи и прогноз для продукта {product_code}')
        ax.set_xlabel('Дата')
        ax.set_ylabel('Количество упаковок')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        # Выбранная модель
        selected_model = product_forecast['selected_model'].iloc[0] if 'selected_model' in product_forecast.columns else 'N/A'
        print(f"Выбранная модель: {selected_model}")
