In [228]:
import json
import pandas as pd
import numpy as np
from pandas import DataFrame
import glob

WIND_SCALER_BEST=1
WIND_SCALER_WORST=1
SOLAR_SCALER_BEST=1
SOLAR_SCALER_WORST=1

AIR_DENSITY=1.225
TURBINE_EFF=0.4
SOLAR_PANEL_EFF=0.18
PERF_RATE=0.75
GAMMA=0.0045
WIND_CITIES= {
    "İzmir":0.38,
    "Balıkesir":0.285,
    "Çanakkale":0.18,
    "Manisa":0.155
}
SOLAR_CITIES={
    "Konya":0.44,
    "Ankara":0.2,
    "Şanlıurfa":0.19,
    "Kayseri":0.17
}

In [229]:
all_files=glob.glob("WEATHER JSONS/*_weather.json")
li=[]
for filename in all_files:
    with open(filename,'r',encoding="utf-8") as json_file:
        data = json.load(json_file)
        temp_df=pd.json_normalize(data,record_path=['days','hours'],meta=[['days','datetime'],['address']])
        li.append(temp_df)
df_combined=pd.concat(li,axis=0,ignore_index=True)

In [230]:
df_energy=pd.read_json("energy_data.json",convert_dates=False)
df_energy['Date']=pd.to_datetime(df_energy['Date'],format='%d.%m.%Y',errors='coerce',dayfirst=True)
df_energy['DATE/HOUR'] = pd.to_datetime(
    df_energy['Date'].astype(str) + ' ' + df_energy['Hour'].astype(str)+':00',
)
deleted_columns=["Date","Hour"]
df_energy.drop(columns=deleted_columns,inplace=True)
df_energy.set_index(df_energy['DATE/HOUR'],inplace=True)

In [231]:
df_energy['Wind Energy']=df_energy['Wind Energy'].astype(str).str.strip()
df_energy["Wind Energy"] = df_energy["Wind Energy"].str.replace('.','')
df_energy["Wind Energy"] = df_energy["Wind Energy"].str.replace(',','.')
df_energy["Wind Energy"] = df_energy["Wind Energy"].astype(float)
df_energy

Unnamed: 0_level_0,Total Energy,Solar Energy,Wind Energy,DATE/HOUR
DATE/HOUR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-08-15 00:00:00,"46.930,69",001,6270.54,2025-08-15 00:00:00
2025-08-15 01:00:00,"44.939,70",001,6039.83,2025-08-15 01:00:00
2025-08-15 02:00:00,"43.132,34",001,6061.29,2025-08-15 02:00:00
2025-08-15 03:00:00,"41.894,37",001,6079.25,2025-08-15 03:00:00
2025-08-15 04:00:00,"40.912,47",092,6241.11,2025-08-15 04:00:00
...,...,...,...,...
2025-11-15 19:00:00,"42.332,06",264,2331.31,2025-11-15 19:00:00
2025-11-15 20:00:00,"41.332,64",264,2282.53,2025-11-15 20:00:00
2025-11-15 21:00:00,"40.344,24",276,2107.14,2025-11-15 21:00:00
2025-11-15 22:00:00,"38.994,12",275,1936.05,2025-11-15 22:00:00


In [232]:
target_colunms=['days.datetime','datetime','temp','solarradiation','windspeed','address']
df_final=df_combined[target_colunms].copy()

In [233]:
def convert_units(df1: DataFrame) -> DataFrame:
    #F TO C
    df1['temp_c']=(df1['temp']-32)*5/9
    #MPH TO M/S
    df1['windspeed_ms']=df1['windspeed']*0.44704
    #merging datatime day-hour
    df1['Full Date']=pd.to_datetime(df1['days.datetime']+' '+df1['datetime'])
    deleted_columns=['days.datetime','datetime','windspeed','temp']
    df1.drop(deleted_columns,inplace=True,axis=1)
    return df1
df_final=convert_units(df_final)

In [234]:
def handling_wind_sensor_anomalies(df: DataFrame,window_size) -> DataFrame:
    std_rolling=df['windspeed_ms'].rolling(window=window_size).std()
    is_stuck=(std_rolling==0)&(df['windspeed_ms']>0)
    df['windspeed_ms']=np.where(is_stuck,0,df['windspeed_ms'])
    return df
handling_wind_sensor_anomalies(df_final,6)

Unnamed: 0,solarradiation,address,temp_c,windspeed_ms,Full Date
0,0.0,Ankara,26.888889,4.336288,2025-08-15 00:00:00
1,0.0,Ankara,25.722222,3.352800,2025-08-15 01:00:00
2,0.0,Ankara,25.111111,3.844544,2025-08-15 02:00:00
3,0.0,Ankara,24.055556,3.799840,2025-08-15 03:00:00
4,0.0,Ankara,23.777778,4.604512,2025-08-15 04:00:00
...,...,...,...,...,...
17851,0.0,Şanlıurfa,6.388889,2.995168,2025-11-15 19:00:00
17852,0.0,Şanlıurfa,6.388889,3.308096,2025-11-15 20:00:00
17853,0.0,Şanlıurfa,6.166667,4.202176,2025-11-15 21:00:00
17854,0.0,Şanlıurfa,5.388889,3.218688,2025-11-15 22:00:00


In [235]:
df_final

Unnamed: 0,solarradiation,address,temp_c,windspeed_ms,Full Date
0,0.0,Ankara,26.888889,4.336288,2025-08-15 00:00:00
1,0.0,Ankara,25.722222,3.352800,2025-08-15 01:00:00
2,0.0,Ankara,25.111111,3.844544,2025-08-15 02:00:00
3,0.0,Ankara,24.055556,3.799840,2025-08-15 03:00:00
4,0.0,Ankara,23.777778,4.604512,2025-08-15 04:00:00
...,...,...,...,...,...
17851,0.0,Şanlıurfa,6.388889,2.995168,2025-11-15 19:00:00
17852,0.0,Şanlıurfa,6.388889,3.308096,2025-11-15 20:00:00
17853,0.0,Şanlıurfa,6.166667,4.202176,2025-11-15 21:00:00
17854,0.0,Şanlıurfa,5.388889,3.218688,2025-11-15 22:00:00


In [236]:
#Matematical Calculations
#RAW WIND ENERGY
def dynamic_efficiency(vel):
    if vel < 3: return 0
    if vel > 25: return 0
    if vel >= 12:
        return TURBINE_EFF * (12/vel)**3
    return TURBINE_EFF

v_eff = np.vectorize(dynamic_efficiency)
def raw_wind_energy_cal(df: DataFrame) -> None:
    vel=df['windspeed_ms']
    calculation_part=(vel**3)*AIR_DENSITY*v_eff(vel)
    df['RAW WIND ENERGY']=np.where(((vel>0.1)&(vel<25)),calculation_part,0)
def raw_solar_energy_cal(df: DataFrame) -> None:
    df['RAW SOLAR ENERGY'] = df['solarradiation'] * SOLAR_PANEL_EFF * PERF_RATE * (1 - GAMMA * (df['temp_c'] - 25))
raw_wind_energy_cal(df_final)
raw_solar_energy_cal(df_final)


In [237]:
df_final=df_final.set_index(['Full Date','address'])

In [238]:
df_final

Unnamed: 0_level_0,Unnamed: 1_level_0,solarradiation,temp_c,windspeed_ms,RAW WIND ENERGY,RAW SOLAR ENERGY
Full Date,address,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-08-15 00:00:00,Ankara,0.0,26.888889,4.336288,39.953096,0.0
2025-08-15 01:00:00,Ankara,0.0,25.722222,3.352800,18.467964,0.0
2025-08-15 02:00:00,Ankara,0.0,25.111111,3.844544,27.843933,0.0
2025-08-15 03:00:00,Ankara,0.0,24.055556,3.799840,26.883884,0.0
2025-08-15 04:00:00,Ankara,0.0,23.777778,4.604512,47.835124,0.0
...,...,...,...,...,...,...
2025-11-15 19:00:00,Şanlıurfa,0.0,6.388889,2.995168,0.000000,0.0
2025-11-15 20:00:00,Şanlıurfa,0.0,6.388889,3.308096,17.739051,0.0
2025-11-15 21:00:00,Şanlıurfa,0.0,6.166667,4.202176,36.359575,0.0
2025-11-15 22:00:00,Şanlıurfa,0.0,5.388889,3.218688,16.339273,0.0


In [239]:
def wind_pivot_creator(df: DataFrame) -> DataFrame:
    df_temp=df.reset_index()
    df_filtered=df_temp[df_temp['address'].isin(WIND_CITIES.keys())]
    wind_pivot=df_filtered.pivot(index='Full Date',columns='address',values='RAW WIND ENERGY')
    city_wind_weights=np.array([WIND_CITIES[city] for city in wind_pivot.columns])
    wind_pivot["Total Hourly Energy (RAW)"]=np.dot(wind_pivot.values,city_wind_weights)
    wind_pivot["Total Hourly Energy (RAW)"]=np.true_divide(wind_pivot['Total Hourly Energy (RAW)'].values,1000000)
    return wind_pivot
wind_pivot=wind_pivot_creator(df_final)
wind_pivot

address,Balıkesir,Manisa,Çanakkale,İzmir,Total Hourly Energy (RAW)
Full Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-08-15 00:00:00,0.000000,0.000000,34.087780,0.000000,0.000006
2025-08-15 01:00:00,15.015139,0.000000,59.869299,0.000000,0.000015
2025-08-15 02:00:00,0.000000,0.000000,0.000000,0.000000,0.000000
2025-08-15 03:00:00,0.000000,0.000000,16.339273,0.000000,0.000003
2025-08-15 04:00:00,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...
2025-11-15 19:00:00,0.000000,117.565429,0.000000,117.565429,0.000063
2025-11-15 20:00:00,0.000000,117.565429,0.000000,117.565429,0.000063
2025-11-15 21:00:00,0.000000,47.835124,0.000000,47.835124,0.000026
2025-11-15 22:00:00,0.000000,0.000000,0.000000,0.000000,0.000000


In [240]:
# 1. Önce modelin rüzgarı "gerçekten yakaladığı" (rüzgarın 5 m/s üstü olduğu) anları bulalım
# Düşük rüzgardaki hatalı (noise) verileri eliyoruz
high_wind_mask = wind_pivot['Total Hourly Energy (RAW)'] > wind_pivot['Total Hourly Energy (RAW)'].quantile(0.5)

# 2. Dinamik ama "Kararlı" Scaler'ı sadece bu güvenilir anlardan hesapla
stable_ratios = df_energy.loc[high_wind_mask, 'Wind Energy'] / wind_pivot.loc[high_wind_mask, 'Total Hourly Energy (RAW)']

# 3. İŞTE DİNAMİK SCALER'LARIN (Kuantil kullanarak uç değerleri atıyoruz)
WIND_SCALER_BEST = float(stable_ratios.quantile(0.95))   # En verimli anlardaki çarpan
WIND_SCALER_NORMAL = float(stable_ratios.median())       # Ortalama çarpan
WIND_SCALER_WORST = float(stable_ratios.quantile(0.05))  # En az verimli anlardaki çarpan

# Kontrol: Best her zaman Worst'ten büyük olmalı (Fizik kuralı!)
print(f"Scalerlar: Best={WIND_SCALER_BEST:.2f}, Worst={WIND_SCALER_WORST:.2f}")

# 4. Uygula (Daha önce konuştuğumuz Base Load -Taban Yük- ekleyerek 0 sorununu da çözüyoruz)
base_load = df_energy['Wind Energy'].min()

wind_pivot["EXPECTED - BEST"] = (wind_pivot['Total Hourly Energy (RAW)'] * WIND_SCALER_BEST) + base_load
wind_pivot["EXPECTED - WORST"] = (wind_pivot['Total Hourly Energy (RAW)'] * WIND_SCALER_WORST) + base_load
wind_pivot["Total Hourly Energy (REAL)"] = df_energy['Wind Energy']



Scalerlar: Best=343412570.65, Worst=31258487.98


In [241]:
def energy_shaper(df, target_col, reference_col, tolerance=1.2, is_best=False):
    # 1. Üst Sınır (Capping): Gerçek verinin çok üstüne çıkmasın
    too_high = df[target_col] > (df[reference_col] * tolerance)
    df[target_col] = np.where(too_high, df[reference_col] * tolerance, df[target_col])

    # 2. Alt Sınır (Floor): Eğer BEST senaryoysa, REAL'in altına düşmesin (Gimme More!)
    if is_best:
        too_low = df[target_col] < df[reference_col]
        df[target_col] = np.where(too_low, df[reference_col], df[target_col])

    # 3. Genel Taban: EPİAŞ minimumunun altına asla inme
    df[target_col] = df[target_col].clip(lower=df[reference_col].min())

    return df

# Uygulama:
# Worst için: Real'den büyük olmasın (tolerance=1.0)
wind_pivot = energy_shaper(wind_pivot, 'EXPECTED - WORST', 'Total Hourly Energy (REAL)', tolerance=1.0)

# Best için: Real'in altına düşmesin (is_best=True) ve Real'in %50 fazlasını geçmesin
wind_pivot = energy_shaper(wind_pivot, 'EXPECTED - BEST', 'Total Hourly Energy (REAL)', tolerance=1.5, is_best=True)

In [242]:
def solar_pivot_creator(df: DataFrame) -> DataFrame:
    df_temp=df.reset_index()
    df_filtered=df_temp[df_temp['address'].isin(SOLAR_CITIES.keys())]
    solar_pivot=df_filtered.pivot(index='Full Date',columns='address',values='RAW WIND ENERGY')
    city_solar_weights=np.array([SOLAR_CITIES[city] for city in solar_pivot.columns])
    solar_pivot["Total Hourly Energy (RAW)"]=np.dot(solar_pivot.values,city_solar_weights)
    solar_pivot["Total Hourly Energy (RAW)"]=np.true_divide(solar_pivot['Total Hourly Energy (RAW)'].values,1000000)
    return solar_pivot
solar_pivot=solar_pivot_creator(df_final)

In [243]:
selected_wind_colunms=['Full Date','EXPECTED - BEST','EXPECTED - WORST','Total Hourly Energy (REAL)']
wind_pivot=wind_pivot.reset_index()
wind_pivot=wind_pivot[selected_wind_colunms].copy()
wind_pivot.to_json('complete_wind_records.json',indent=2,orient='records',date_format='iso')

In [244]:
wind_pivot

address,Full Date,EXPECTED - BEST,EXPECTED - WORST,Total Hourly Energy (REAL)
0,2025-08-15 00:00:00,6270.540,378.025842,6270.54
1,2025-08-15 01:00:00,6039.830,656.851178,6039.83
2,2025-08-15 02:00:00,6061.290,186.230000,6061.29
3,2025-08-15 03:00:00,6079.250,278.163373,6079.25
4,2025-08-15 04:00:00,6241.110,186.230000,6241.11
...,...,...,...,...
2227,2025-11-15 19:00:00,3496.965,2152.310894,2331.31
2228,2025-11-15 20:00:00,3423.795,2152.310894,2282.53
2229,2025-11-15 21:00:00,3160.710,986.190708,2107.14
2230,2025-11-15 22:00:00,1936.050,186.230000,1936.05
