In [None]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

# Функция для расчета MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Загрузка данных
file_path = '/content/imputed_filled_P_l_LR1.csv'  # Замените на актуальный путь к файлу
data = pd.read_csv(file_path)

# Преобразование временных меток
data['time'] = pd.to_datetime(data['time'], utc=True)
data.set_index('time', inplace=True)

# Создание дополнительных временных признаков
for index in tqdm(data.index, desc="Обработка данных"):
    data.at[index, 'hour'] = index.hour
    data.at[index, 'day_of_week'] = index.dayofweek

# Установка размера тестовой выборки
test_size = 24 * 60 // 5  # Данные каждые 5 минут

# Разделение данных
train_data = data.iloc[:-test_size]
test_data = data.iloc[-test_size:]

# Признаки и целевая переменная
X_train = train_data[['hour', 'day_of_week']]
y_train = train_data['P_l']
X_test = test_data[['hour', 'day_of_week']]
y_test = test_data['P_l']

# Масштабирование данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVR
svr_model = SVR()
svr_model.fit(X_train_scaled, y_train)
y_pred_svr = svr_model.predict(X_test_scaled)

# RandomForestRegressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)

# HistGradientBoostingRegressor
hgb_model = HistGradientBoostingRegressor(random_state=42)
hgb_model.fit(X_train_scaled, y_train)
y_pred_hgb = hgb_model.predict(X_test_scaled)

# Ансамбль
ensemble_pred = np.mean([y_pred_svr, y_pred_rf, y_pred_hgb], axis=0)

# Метрики для ансамбля
ensemble_mse = mean_squared_error(y_test, ensemble_pred)
ensemble_mse_sqrt = np.sqrt(ensemble_mse)
ensemble_mae = mean_absolute_error(y_test, ensemble_pred)
ensemble_r2 = r2_score(y_test, ensemble_pred)
ensemble_mape = mean_absolute_percentage_error(y_test, ensemble_pred)

print("Ансамбль Среднеквадратичная ошибка (MSE): ", ensemble_mse_sqrt)
print("Ансамбль Средняя абсолютная ошибка (MAE): ", ensemble_mae)
print("Ансамбль Коэффициент детерминации (R^2): ", ensemble_r2)
print("Ансамбль Средняя абсолютная процентная ошибка (MAPE): ", ensemble_mape)


# Визуализация
plt.figure(figsize=(12, 6))
plt.plot(test_data.index, y_test, label='Actual Values', color='blue')
plt.plot(test_data.index, y_pred_svr, label='SVR Predictions', color='red', alpha=0.7)
plt.plot(test_data.index, y_pred_rf, label='RF Predictions', color='green', alpha=0.7)
plt.plot(test_data.index, y_pred_hgb, label='HGB Predictions', color='orange', alpha=0.7)
plt.plot(test_data.index, ensemble_pred, label='Ensemble Predictions', color='purple', alpha=0.7)
plt.title('Actual and Predicted Electricity Consumption')
plt.xlabel('Time')
plt.ylabel('Electricity Consumption')
plt.legend()
plt.show()
