In [190]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [192]:
# 1️⃣ **Veriyi Yükleme ve Feature Engineering**
electricity_data = pd.read_csv("electricity-data-new.csv")
print(electricity_data["hour"].unique())

['0:00' '1:00' '2:00' '3:00' '4:00' '5:00' '6:00' '7:00' '8:00' '9:00'
 '10:00' '11:00' '12:00' '13:00' '14:00' '15:00' '16:00' '17:00' '18:00'
 '19:00' '20:00' '21:00' '22:00' '23:00']


In [194]:
# Saatleri tam iki haneli forma dönüştür
electricity_data["hour"] = electricity_data["hour"].apply(lambda x: f"{int(x.split(':')[0]):02d}:00")

# İlk iki karakteri al ve integer'a çevir
electricity_data["hour_numeric"] = electricity_data["hour"].str[:2].astype(int)


In [196]:
electricity_data["date"] = pd.to_datetime(electricity_data["date"], utc=True, errors='coerce').dt.tz_convert(None)

electricity_data["year"] = electricity_data["date"].dt.year
electricity_data["month"] = electricity_data["date"].dt.month
electricity_data["day"] = electricity_data["date"].dt.day
electricity_data["day_of_week"] = electricity_data["date"].dt.weekday
electricity_data["is_weekend"] = (electricity_data["day_of_week"] >= 5).astype(int)
electricity_data["hour_numeric"] = electricity_data["hour"].str[:2].astype(int)

electricity_data["season"] = electricity_data["month"].map({12: "Winter", 1: "Winter", 2: "Winter",
                                                                3: "Spring", 4: "Spring", 5: "Spring",
                                                                6: "Summer", 7: "Summer", 8: "Summer",
                                                                9: "Autumn", 10: "Autumn", 11: "Autumn"})

In [198]:
electricity_data.shape

(8784, 62)

In [200]:
def categorize_hour(hour):
    if 0 <= hour < 6:
        return "Night"
    elif 6 <= hour < 12:
        return "Morning"
    elif 12 <= hour < 18:
        return "Afternoon"
    else:
        return "Evening"

electricity_data["hour_group"] = electricity_data["hour_numeric"].apply(categorize_hour)

In [202]:
electricity_data.head()

Unnamed: 0,date,hour,price,priceUsd,priceEur,toplam,dogalgaz,ruzgar,linyit,tasKomur,...,IST A-(C),IST B (Blok40+ Blok50),hour_numeric,year,month,day,day_of_week,is_weekend,season,hour_group
0,2023-12-31 21:00:00,00:00,1299.98,44.16,39.91,26901.4,1834.27,914.83,4860.0,255.0,...,0,240,0,2023.0,12.0,31.0,6.0,1,Winter,Night
1,2023-12-31 22:00:00,01:00,1299.98,44.16,39.91,25186.73,1647.48,1052.13,4750.0,255.0,...,0,240,1,2023.0,12.0,31.0,6.0,1,Winter,Night
2,2023-12-31 23:00:00,02:00,1248.54,42.41,38.33,23654.93,1536.88,1197.99,4628.9,255.0,...,0,240,2,2023.0,12.0,31.0,6.0,1,Winter,Night
3,2024-01-01 00:00:00,03:00,1299.98,44.16,39.91,22876.03,1534.68,1420.53,4675.0,255.0,...,0,240,3,2024.0,1.0,1.0,0.0,0,Winter,Night
4,2024-01-01 01:00:00,04:00,1200.0,40.76,36.84,22379.77,1534.98,1665.99,4595.0,255.0,...,0,240,4,2024.0,1.0,1.0,0.0,0,Winter,Night


In [204]:
# Label Encoding
electricity_data["season"] = LabelEncoder().fit_transform(electricity_data["season"])
electricity_data["hour_group"] = LabelEncoder().fit_transform(electricity_data["hour_group"])

In [207]:
# Santral çalışma durumlarını (operational) tek seferde ekleyelim
selected_players = [col for col in electricity_data.columns if col not in ["date", "hour", "price", "priceUsd", "priceEur", "toplam", "dogalgaz", "ruzgar", "linyit", "tasKomur"]]

# Tüm santralleri sayısal formata çevir
electricity_data[selected_players] = electricity_data[selected_players].apply(pd.to_numeric, errors='coerce')

# Tek seferde operational sütunlarını oluştur
operational_status = (electricity_data[selected_players] >= 200).astype(int)

# Yeni sütun isimleri ver
operational_status.columns = [f"{col}_operational" for col in selected_players]

# Ana veri setine toplu olarak ekleyelim
electricity_data = pd.concat([electricity_data, operational_status], axis=1)

# Oluşan sütunları kontrol et
print([col for col in electricity_data.columns if '_operational' in col])


['ithalKomur_operational', 'fuelOil_operational', 'jeotermal_operational', 'barajli_operational', 'nafta_operational', 'biokutle_operational', 'akarsu_operational', 'diger_operational', 'lep_operational', 'consumption_operational', 'toplam_uga_operational', 'ruzgar_uga_operational', 'biyogaz_operational', 'kanalTipi_operational', 'biyokutle_operational', 'gunes_operational', 'diger_uga_operational', 'ACWA_operational', 'AKENRJ ERZIN_operational', 'AKSA ANT_operational', 'BAN1_operational', 'BAN2_operational', 'BAYMINA_operational', 'BILGIN1_operational', 'BILGIN2_operational', 'BURSA BLOK1_operational', 'BURSA BLOK2_operational', 'CENGIZ_operational', 'ENKA ADP_operational', 'ENKA GBZ1_operational', 'ENKA GBZ2_operational', 'ENKA IZM1_operational', 'ENKA IZM2_operational', 'GAMA ICAN_operational', 'HABAS_operational', 'HAM-10_operational', 'HAM-20_operational', 'RWE_operational', 'TEKIRA_operational', 'TEKIRB_operational', 'YENI_operational', 'IST A-(A)_operational', 'IST A-(B)_operati

In [209]:
# Bağımsız ve bağımlı değişkenleri tekrar oluştur
features = ["hour_numeric", "day_of_week", "is_weekend", "month", "season", 
            "ruzgar", "dogalgaz", "lep", "consumption", "barajli", "gunes"]
targets = [f"{plant}_operational" for plant in selected_players if f"{plant}_operational" in electricity_data.columns]

# X ve Y'yi oluştur
X = electricity_data[features].dropna()
Y = electricity_data[targets].dropna()

# Ortak satırları eşitle
X, Y = X.align(Y, join='inner', axis=0)


In [211]:
X.shape

(8760, 11)

In [213]:
Y.shape

(8760, 53)

In [215]:
print([col for col in electricity_data.columns if '_operational' in col])

['ithalKomur_operational', 'fuelOil_operational', 'jeotermal_operational', 'barajli_operational', 'nafta_operational', 'biokutle_operational', 'akarsu_operational', 'diger_operational', 'lep_operational', 'consumption_operational', 'toplam_uga_operational', 'ruzgar_uga_operational', 'biyogaz_operational', 'kanalTipi_operational', 'biyokutle_operational', 'gunes_operational', 'diger_uga_operational', 'ACWA_operational', 'AKENRJ ERZIN_operational', 'AKSA ANT_operational', 'BAN1_operational', 'BAN2_operational', 'BAYMINA_operational', 'BILGIN1_operational', 'BILGIN2_operational', 'BURSA BLOK1_operational', 'BURSA BLOK2_operational', 'CENGIZ_operational', 'ENKA ADP_operational', 'ENKA GBZ1_operational', 'ENKA GBZ2_operational', 'ENKA IZM1_operational', 'ENKA IZM2_operational', 'GAMA ICAN_operational', 'HABAS_operational', 'HAM-10_operational', 'HAM-20_operational', 'RWE_operational', 'TEKIRA_operational', 'TEKIRB_operational', 'YENI_operational', 'IST A-(A)_operational', 'IST A-(B)_operati

In [217]:
# Son 6 ayın verisini alarak modeli güncelleyelim
recent_data = electricity_data[electricity_data["date"] >= "2023-07-01"]

# Yeni eğitim verisini oluştur
X_recent = recent_data[features]
Y_recent = recent_data[selected_players]

# Modeli sadece son 6 aylık veriye göre eğit
X_train, X_test, Y_train, Y_test = train_test_split(X_recent, Y_recent, test_size=0.2, random_state=42)


In [219]:
print(recent_data.columns)
print(selected_players)

Index(['date', 'hour', 'price', 'priceUsd', 'priceEur', 'toplam', 'dogalgaz',
       'ruzgar', 'linyit', 'tasKomur',
       ...
       'IST A-(C)_operational', 'IST B (Blok40+ Blok50)_operational',
       'hour_numeric_operational', 'year_operational', 'month_operational',
       'day_operational', 'day_of_week_operational', 'is_weekend_operational',
       'season_operational', 'hour_group_operational'],
      dtype='object', length=116)
['ithalKomur', 'fuelOil', 'jeotermal', 'barajli', 'nafta', 'biokutle', 'akarsu', 'diger', 'lep', 'consumption', 'toplam_uga', 'ruzgar_uga', 'biyogaz', 'kanalTipi', 'biyokutle', 'gunes', 'diger_uga', 'ACWA', 'AKENRJ ERZIN', 'AKSA ANT', 'BAN1', 'BAN2', 'BAYMINA', 'BILGIN1', 'BILGIN2', 'BURSA BLOK1', 'BURSA BLOK2', 'CENGIZ', 'ENKA ADP', 'ENKA GBZ1', 'ENKA GBZ2', 'ENKA IZM1', 'ENKA IZM2', 'GAMA ICAN', 'HABAS', 'HAM-10', 'HAM-20', 'RWE', 'TEKIRA', 'TEKIRB', 'YENI', 'IST A-(A)', 'IST A-(B)', 'IST A-(C)', 'IST B (Blok40+ Blok50)', 'hour_numeric', 'year', 'mo

In [221]:
selected_players = [col for col in recent_data.columns if '_operational' in col]
Y_recent = recent_data[selected_players]

In [223]:
print(Y_recent.isna().sum().sum())  # NaN sayısını görmek için

0


In [225]:
Y_recent = Y_recent.fillna(0)  # NaN olanları 0 yap


In [227]:
# Train-test split
#X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train, X_test, Y_train, Y_test = train_test_split(X_recent, Y_recent, test_size=0.2, random_state=42)

# Classification Modeli Eğitme
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, Y_train)
Y_pred_rf = rf_classifier.predict(X_test)

# Modelin doğruluğunu hesaplayalım
classification_accuracy = (Y_pred_rf == Y_test.values).mean().mean()
print(f"Classification Model Accuracy: {classification_accuracy:.2%}")

Classification Model Accuracy: 97.70%


In [228]:
# Tüm santrallerin çalışma durumlarını tek seferde tahmin et
predictions = rf_classifier.predict(electricity_data[features])

# Tahminleri dataframe olarak ekleyelim
for i, plant in enumerate(selected_players):
    electricity_data[f"{plant}_predicted"] = predictions[:, i]

In [229]:
# Santral çalışma durumu tahminlerini tüm veri setine uygula
#for i, plant in enumerate(selected_players):
 #   electricity_data[f"{plant}_predicted"] = rf_classifier.predict(electricity_data[features])[:, i]

# Yeni feature set 
#regression_features = features + [f"{plant}_predicted" for plant in selected_players]

In [230]:
# Yeni feature set
regression_features = features + [f"{plant}_predicted" for plant in selected_players]

In [231]:
# Bir önceki günün fiyatını ekleyelim
electricity_data["prev_day_price"] = electricity_data["price"].shift(24)

# Bir önceki saatin fiyatını ekleyelim
electricity_data["prev_hour_price"] = electricity_data["price"].shift(1)

# Eksik değerleri dolduralım
electricity_data.fillna(method="bfill", inplace=True)

# Yeni feature'ları modele ekleyelim
features += ["prev_day_price", "prev_hour_price"]

  electricity_data.fillna(method="bfill", inplace=True)


In [232]:
# **Bağımlı değişken (Fiyat)**
target = "price"
X_reg, Y_reg = electricity_data[regression_features].align(electricity_data[[target]], join='inner', axis=0)

In [233]:
# Train-test split
X_train_reg, X_test_reg, Y_train_reg, Y_test_reg = train_test_split(X_reg, Y_reg, test_size=0.2, random_state=42)

In [234]:
# Fiyat tahmin modeli (RandomForestRegressor)
rf_regressor = RandomForestRegressor(n_estimators=500, random_state=42, max_depth=20)

In [235]:
rf_regressor.fit(X_train_reg, Y_train_reg["price"])

In [None]:
rf_regressor.fit(X_train_reg, Y_train_reg.values.ravel())

In [None]:
Y_pred_reg = rf_regressor.predict(X_test_reg)

In [None]:
# Performans metriklerini hesapla
mae = mean_absolute_error(Y_test_reg, Y_pred_reg)
mse = mean_squared_error(Y_test_reg, Y_pred_reg)
rmse = mse ** 0.5
r2 = r2_score(Y_test_reg, Y_pred_reg)

print(f"Regression Model Performance:\n MAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2%}")

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 1️⃣ **1 Haftalık Test Verisini Yükleme**
test_data_path = "1-week-test-data.csv"
test_data = pd.read_csv(test_data_path)

test_data.head()

In [None]:
print(rf_classifier.feature_names_in_)

In [None]:
test_data["date"] = pd.to_datetime(test_data["date"], errors='coerce')

# Feature Engineering
test_data["year"] = test_data["date"].dt.year
test_data["month"] = test_data["date"].dt.month
test_data["day"] = test_data["date"].dt.day
test_data["day_of_week"] = test_data["date"].dt.weekday
test_data["is_weekend"] = (test_data["day_of_week"] >= 5).astype(int)
test_data["hour_numeric"] = test_data["date"].dt.hour

# Eksik sütunları ekleyelim ve 0 ile dolduralım
missing_features = ["dogalgaz", "linyit", "toplam", "season"]
for col in missing_features:
    if col not in test_data.columns:
        test_data[col] = 0  # Varsayılan olarak 0 veriyoruz

# Aynı feature'ları kullanarak modeli çalıştırma
features = ["hour_numeric", "day_of_week", "is_weekend", "month", "season", "ruzgar", "dogalgaz", "linyit", "toplam", "lep"]

# **Santral Çalışma Durumu Tahmini**
predictions = rf_classifier.predict(test_data[features])
for i, plant in enumerate(selected_players):
    test_data[f"{plant}_predicted"] = predictions[:, i]

# **Fiyat Tahmini İçin Regression Modelini Kullanma**
regression_features = features + [f"{plant}_predicted" for plant in selected_players]
X_test_final = test_data[regression_features]

# Fiyat tahmini yap
predicted_prices = rf_regressor.predict(X_test_final)
test_data["predicted_price"] = predicted_prices

# Sonuçları gösterelim
# Sonuçları gösterelim
print(test_data[["date", "hour_numeric", "predicted_price"]])

In [None]:
print(Y_train_reg["price"].describe())  # Eğitim setindeki fiyat istatistikleri
print(test_data["predicted_price"].describe())  # Test setindeki tahminler

In [None]:
for plant in selected_players:
    print(f"{plant} çalışma durumu:\n", test_data[f"{plant}_predicted"].value_counts(), "\n")
