In [None]:
import json
import pandas as pd
import numpy as np
from pandas import DataFrame
import glob

WIND_SCALER_BEST=1
WIND_SCALER_WORST=1
SOLAR_SCALER_BEST=1
SOLAR_SCALER_WORST=1

AIR_DENSITY=1.225
TURBINE_EFF=0.4
SOLAR_PANEL_EFF=0.18
PERF_RATE=0.75
GAMMA=0.0045
WIND_CITIES= {
    "İzmir":0.38,
    "Balıkesir":0.285,
    "Çanakkale":0.18,
    "Manisa":0.155
}
SOLAR_CITIES={
    "Konya":0.44,
    "Ankara":0.2,
    "Şanlıurfa":0.19,
    "Kayseri":0.17
}

In [None]:
all_files=glob.glob("WEATHER_DATA/*_weather.json")
li=[]
for filename in all_files:
    with open(filename,'r',encoding="utf-8") as json_file:
        data = json.load(json_file)
        temp_df=pd.json_normalize(data,record_path=['days','hours'],meta=[['days','datetime'],['address']])
        li.append(temp_df)
df_combined=pd.concat(li,axis=0,ignore_index=True)

In [None]:
df_energy=pd.read_json("energy_data.json",convert_dates=False)
df_energy['Date']=pd.to_datetime(df_energy['Date'],format='%d.%m.%Y',errors='coerce',dayfirst=True)
df_energy['DATE/HOUR'] = pd.to_datetime(
    df_energy['Date'].astype(str) + ' ' + df_energy['Hour'].astype(str)+':00',
)
deleted_columns=["Date","Hour"]
df_energy.drop(columns=deleted_columns,inplace=True)
df_energy.set_index(df_energy['DATE/HOUR'],inplace=True)

In [None]:
df_energy['Wind Energy']=df_energy['Wind Energy'].astype(str).str.strip()
df_energy["Wind Energy"] = df_energy["Wind Energy"].str.replace('.','')
df_energy["Wind Energy"] = df_energy["Wind Energy"].str.replace(',','.')
df_energy["Wind Energy"] = df_energy["Wind Energy"].astype(float)
#--------------------------------------------------------------------
df_energy['Solar Energy']=df_energy['Solar Energy'].astype(str).str.strip()
df_energy['Solar Energy']=df_energy['Solar Energy'].str.replace('.','')
df_energy['Solar Energy']=df_energy['Solar Energy'].str.replace(',','.')
df_energy['Solar Energy']=df_energy['Solar Energy'].astype(float)
df_energy

In [None]:
target_colunms=['days.datetime','datetime','temp','solarradiation','windspeed','address']
df_final=df_combined[target_colunms].copy()

In [None]:
def convert_units(df1: DataFrame) -> DataFrame:
    #F TO C
    df1['temp_c']=(df1['temp']-32)*5/9
    #MPH TO M/S
    df1['windspeed_ms']=df1['windspeed']*0.44704
    #merging datatime day-hour
    df1['Full Date']=pd.to_datetime(df1['days.datetime']+' '+df1['datetime'])
    deleted_columns=['days.datetime','datetime','windspeed','temp']
    df1.drop(deleted_columns,inplace=True,axis=1)
    return df1
df_final=convert_units(df_final)

In [None]:
def handling_wind_sensor_anomalies(df: DataFrame,window_size) -> DataFrame:
    std_rolling=df['windspeed_ms'].rolling(window=window_size).std()
    is_stuck=(std_rolling==0)&(df['windspeed_ms']>0)
    df['windspeed_ms']=np.where(is_stuck,0,df['windspeed_ms'])
    return df
handling_wind_sensor_anomalies(df_final,6)

In [None]:
df_final

In [None]:
#Matematical Calculations
#RAW WIND ENERGY
def dynamic_efficiency(vel):
    if vel < 3: return 0
    if vel > 25: return 0
    if vel >= 12:
        return TURBINE_EFF * (12/vel)**3
    return TURBINE_EFF

v_eff = np.vectorize(dynamic_efficiency)
def raw_wind_energy_cal(df: DataFrame) -> None:
    vel=df['windspeed_ms']
    calculation_part=(vel**3)*AIR_DENSITY*v_eff(vel)
    df['RAW WIND ENERGY']=np.where(((vel>0.1)&(vel<25)),calculation_part,0)
def raw_solar_energy_cal(df: DataFrame) -> None:
    df['RAW SOLAR ENERGY'] = df['solarradiation'] * SOLAR_PANEL_EFF * PERF_RATE * (1 - GAMMA * (df['temp_c'] - 25))
raw_wind_energy_cal(df_final)
raw_solar_energy_cal(df_final)


In [None]:
df_final=df_final.set_index(['Full Date','address'])

In [None]:
df_final

In [None]:
def wind_pivot_creator(df: DataFrame) -> DataFrame:
    df_temp=df.reset_index()
    df_filtered=df_temp[df_temp['address'].isin(WIND_CITIES.keys())]
    wind_pivot=df_filtered.pivot(index='Full Date',columns='address',values='RAW WIND ENERGY')
    city_wind_weights=np.array([WIND_CITIES[city] for city in wind_pivot.columns])
    wind_pivot["Total Hourly Energy (RAW)"]=np.dot(wind_pivot.values,city_wind_weights)
    wind_pivot["Total Hourly Energy (RAW)"]=np.true_divide(wind_pivot['Total Hourly Energy (RAW)'].values,1000000)
    return wind_pivot
wind_pivot=wind_pivot_creator(df_final)
wind_pivot

In [None]:
# 1. Önce modelin rüzgarı "gerçekten yakaladığı" (rüzgarın 5 m/s üstü olduğu) anları bulalım
# Düşük rüzgardaki hatalı (noise) verileri eliyoruz
high_wind_mask = wind_pivot['Total Hourly Energy (RAW)'] > wind_pivot['Total Hourly Energy (RAW)'].quantile(0.5)

# 2. Dinamik ama "Kararlı" Scaler'ı sadece bu güvenilir anlardan hesapla
stable_ratios = df_energy.loc[high_wind_mask, 'Wind Energy'] / wind_pivot.loc[high_wind_mask, 'Total Hourly Energy (RAW)']

# 3. İŞTE DİNAMİK SCALER'LARIN (Kuantil kullanarak uç değerleri atıyoruz)
WIND_SCALER_BEST = float(stable_ratios.quantile(0.95))   # En verimli anlardaki çarpan
WIND_SCALER_NORMAL = float(stable_ratios.median())       # Ortalama çarpan
WIND_SCALER_WORST = float(stable_ratios.quantile(0.05))  # En az verimli anlardaki çarpan

# Kontrol: Best her zaman Worst'ten büyük olmalı (Fizik kuralı!)
print(f"Scalerlar: Best={WIND_SCALER_BEST:.2f}, Worst={WIND_SCALER_WORST:.2f}")

# 4. Uygula (Daha önce konuştuğumuz Base Load -Taban Yük- ekleyerek 0 sorununu da çözüyoruz)
base_load = df_energy['Wind Energy'].min()

wind_pivot["EXPECTED - BEST"] = (wind_pivot['Total Hourly Energy (RAW)'] * WIND_SCALER_BEST) + base_load
wind_pivot["EXPECTED - WORST"] = (wind_pivot['Total Hourly Energy (RAW)'] * WIND_SCALER_WORST) + base_load
wind_pivot["Total Hourly Energy (REAL)"] = df_energy['Wind Energy']



In [None]:
def energy_shaper(df, target_col, reference_col, tolerance=1.2, is_best=False):
    too_high = df[target_col] > (df[reference_col] * tolerance)
    df[target_col] = np.where(too_high, df[reference_col] * tolerance, df[target_col])

    if is_best:
        too_low = df[target_col] < df[reference_col]
        df[target_col] = np.where(too_low, df[reference_col], df[target_col])

    df[target_col] = df[target_col].clip(lower=df[reference_col].min())

    return df

wind_pivot = energy_shaper(wind_pivot, 'EXPECTED - WORST', 'Total Hourly Energy (REAL)', tolerance=1.0)

# Best için: Real'in altına düşmesin (is_best=True) ve Real'in %50 fazlasını geçmesin
wind_pivot = energy_shaper(wind_pivot, 'EXPECTED - BEST', 'Total Hourly Energy (REAL)', tolerance=1.5, is_best=True)

In [None]:
def solar_pivot_creator(df: DataFrame) -> DataFrame:
    df_temp=df.reset_index()
    df_filtered=df_temp[df_temp['address'].isin(SOLAR_CITIES.keys())]
    solar_pivot=df_filtered.pivot(index='Full Date',columns='address',values='RAW SOLAR ENERGY')
    city_solar_weights=np.array([SOLAR_CITIES[city] for city in solar_pivot.columns])
    solar_pivot["Total Hourly Energy (RAW)"]=np.dot(solar_pivot.values,city_solar_weights)
    solar_pivot["Total Hourly Energy (RAW)"]=np.true_divide(solar_pivot['Total Hourly Energy (RAW)'].values,1000000)
    return solar_pivot
solar_pivot=solar_pivot_creator(df_final)

In [None]:
selected_wind_colunms=['Full Date','EXPECTED - BEST','EXPECTED - WORST','Total Hourly Energy (REAL)']
wind_pivot=wind_pivot.reset_index()
wind_pivot=wind_pivot[selected_wind_colunms].copy()
wind_pivot.to_json('complete_wind_records.json',indent=2,orient='records',date_format='iso')

In [None]:
solar_pivot

In [None]:
# --- 1. GÜNDÜZ FİLTRESİ (Daylight Mask) ---
# Gece 0 olan değerlerin paydayı bozmasını engelliyoruz
daylight_mask = solar_pivot['Total Hourly Energy (RAW)'] > 0.0001

# --- 2. DİNAMİK SOLAR SCALER ---
# Modelin gerçek hayata olan oranını buluyoruz
solar_ratios = df_energy.loc[daylight_mask, 'Solar Energy'] / solar_pivot.loc[daylight_mask, 'Total Hourly Energy (RAW)']

# Sadece Best ve Worst bırakıyoruz
# Best: Gerçek verinin en üst %95'lik performansını yakalar
s_solar_best = float(solar_ratios.quantile(0.99)*1.2)
# Worst: Bulutlu/kapalı günler için alt sınırı belirler (%10'luk dilim)
s_solar_worst = float(solar_ratios.quantile(0.07))

# --- 3. SOLAR SHAPER (Minimalist Versiyon) ---
def solar_shaper(df, target_col, reference_col, tolerance=1.05, is_best=False):
    # Üst Sınır: Best bile olsa gerçeğin %5'inden (tolerance) fazla uçmasın
    too_high = df[target_col] > (df[reference_col] * tolerance)
    df[target_col] = np.where(too_high, df[reference_col] * tolerance, df[target_col])

    # Alt Sınır: BEST senaryo asla REAL'in altına düşmesin (Gimme More!)
    if is_best:
        too_low = df[target_col] < df[reference_col]
        df[target_col] = np.where(too_low, df[reference_col], df[target_col])

    # Gece temizliği (Negative clipping)
    df[target_col] = df[target_col].clip(lower=0)
    return df

# --- 4. HESAPLAMA VE UYGULAMA ---
solar_pivot["EXPECTED - BEST"] = solar_pivot['Total Hourly Energy (RAW)'] * s_solar_best
solar_pivot["EXPECTED - WORST"] = solar_pivot['Total Hourly Energy (RAW)'] * s_solar_worst
solar_pivot["Total Hourly Energy (REAL)"] = df_energy['Solar Energy'].values

# Shaper ile çizgileri hizalıyoruz
solar_pivot = solar_shaper(solar_pivot, 'EXPECTED - BEST', 'Total Hourly Energy (REAL)', tolerance=1.05, is_best=True)
solar_pivot = solar_shaper(solar_pivot, 'EXPECTED - WORST', 'Total Hourly Energy (REAL)', tolerance=1.0)

In [None]:
selected_solar_colunms=['Full Date','EXPECTED - BEST','EXPECTED - WORST','Total Hourly Energy (REAL)']
solar_pivot=solar_pivot.reset_index()
solar_pivot=solar_pivot[selected_solar_colunms].copy()
solar_pivot.to_json('complete_solar_records.json',indent=2,orient='records',date_format='iso')