In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import median_absolute_error
from sklearn.preprocessing import LabelEncoder

# 1. Загрузка данных
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# 2. Предобработка данных
# Преобразование даты в datetime и извлечение временных признаков
train['date'] = pd.to_datetime(train['date'])
test['date'] = pd.to_datetime(test['date'])
train['year'] = train['date'].dt.year
train['month'] = train['date'].dt.month
train['week'] = train['date'].dt.isocalendar().week
test['year'] = test['date'].dt.year
test['month'] = test['date'].dt.month
test['week'] = test['date'].dt.isocalendar().week

# Заполнение пропущенных значений
train.fillna(method='ffill', inplace=True)
test.fillna(method='ffill', inplace=True)

# Кодирование категориальных признаков
le = LabelEncoder()
train['store_type'] = le.fit_transform(train['store_type'])
test['store_type'] = le.transform(test['store_type'])

# 3. Формирование обучающего и тестового набора
features = [
    'store_id', 'department_id', 'temperature', 'fuel_price', 
    'promodisc1', 'promodisc2', 'promodisc3', 'promodisc4', 'promodisc5',
    'cpi', 'unemployment', 'store_type', 'store_size', 'year', 'month', 'week'
]
X = train[features]
y = train['weekly_sales']
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Обучение модели
model = lgb.LGBMRegressor(n_estimators=500, learning_rate=0.05, random_state=42)
model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], early_stopping_rounds=50, verbose=50)

# 5. Прогнозирование
predictions = model.predict(test[features])

# 6. Формирование submission.csv
submission = test[['store_id', 'department_id', 'date']].copy()
submission['weekly_sales'] = predictions
submission.to_csv("submission.csv", index=False)

print("Файл submission.csv успешно создан!")

FileNotFoundError: [Errno 2] No such file or directory: 'train.csv'