### Power Generation Prediction

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from catboost import CatBoostRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import datetime
import warnings
warnings.filterwarnings('ignore')
from suntime import Sun
import pytz
from flaml import AutoML
from scipy import stats

In [2]:
temp = pd.read_csv('enerjisa-enerji-veri-maratonu/temperature.csv', sep=';')
temp = temp[:-95]
temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%d%b%Y %H:%M:%S')

temp[['AirTemperature','ComfortTemperature','RelativeHumidity','WindSpeed','EffectiveCloudCover']] = temp[['AirTemperature','ComfortTemperature','RelativeHumidity','WindSpeed','EffectiveCloudCover']].apply(lambda x: x.str.replace(',','.')).astype(float)

temp['DateTime'] = pd.to_datetime(temp['DateTime'], format='%d%b%Y %H:%M:%S')
temp['hourofday'] = temp.DateTime.dt.hour
temp['dayofyear'] = temp.DateTime.dt.dayofyear
temp['dayofmonth'] = temp.DateTime.dt.day
temp['year'] = temp.DateTime.dt.year

temp['WWCode'].fillna(0, inplace=True)
temp.loc[temp['WWCode'] == 84, 'WWCode'] = 80

coordinates = [40.239, 33.029]
temp["Date"] = temp["DateTime"].apply(pd.to_datetime).dt.date.apply(str)
temp["Hour"] = temp["DateTime"].apply(pd.to_datetime).dt.hour
sun = Sun(coordinates[0], coordinates[1])
tz =pytz.timezone('Europe/Istanbul')
temp["IsDay"]= temp[["Date","Hour"]].apply(lambda x : sun.get_local_sunrise_time(pd.to_datetime(x["Date"]).date(),local_time_zone=tz).hour <= x["Hour"] <= sun.get_local_sunset_time(pd.to_datetime(x["Date"]).date(),local_time_zone=tz).hour ,axis=1)
temp["IsDay"] = temp["IsDay"].apply(lambda x: 1 if x==True else 0)
temp.drop(['Date','Hour'], axis=1, inplace=True)

In [3]:
df = pd.read_csv('enerjisa-enerji-veri-maratonu/generation.csv', sep=';')
df.dropna(inplace=True)
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%d%b%Y %H:%M:%S')
df['Generation'] = df.Generation.apply(lambda x: x.replace(',', '.')).astype(float)

merged = pd.merge(temp, df, on='DateTime', how='left')
merged.set_index('DateTime', inplace=True)
data = merged.copy()

In [4]:
data['AirTemperature_1'] = data.AirTemperature.shift(1)
data['ComfortTemperature_1'] = data.ComfortTemperature.shift(1)
data['RelativeHumidity_1'] = data.RelativeHumidity.shift(1)
data['EffectiveCloudCover_1'] = data.EffectiveCloudCover.shift(1)
data['WindSpeed_1'] = data.WindSpeed.shift(1)

data['AirTemperature_1+'] = data.AirTemperature.shift(-1)
data['ComfortTemperature_1+'] = data.ComfortTemperature.shift(-1)
data['RelativeHumidity_1+'] = data.RelativeHumidity.shift(-1)
data['EffectiveCloudCover_1+'] = data.EffectiveCloudCover.shift(-1)
data['WindSpeed_1+'] = data.WindSpeed.shift(-1)
data['IsDay_1+'] = data.IsDay.shift(-1)

data['AirTemperature_2+'] = data.AirTemperature.shift(-2)
data['ComfortTemperature_2+'] = data.ComfortTemperature.shift(-2)
data['RelativeHumidity_2+'] = data.RelativeHumidity.shift(-2)
data['EffectiveCloudCover_2+'] = data.EffectiveCloudCover.shift(-2)
data['WindSpeed_2+'] = data.WindSpeed.shift(-2)
data['IsDay_2+'] = data.IsDay.shift(-2)

data['AirTemperature_3+'] = data.AirTemperature.shift(-3)
data['ComfortTemperature_3+'] = data.ComfortTemperature.shift(-3)
data['RelativeHumidity_3+'] = data.RelativeHumidity.shift(-3)
data['EffectiveCloudCover_3+'] = data.EffectiveCloudCover.shift(-3)
data['WindSpeed_3+'] = data.WindSpeed.shift(-3)
data['IsDay_3+'] = data.IsDay.shift(-3)

data['AirTemperature_4+'] = data.AirTemperature.shift(-4)
data['ComfortTemperature_4+'] = data.ComfortTemperature.shift(-4)
data['RelativeHumidity_4+'] = data.RelativeHumidity.shift(-4)
data['EffectiveCloudCover_4+'] = data.EffectiveCloudCover.shift(-4)
data['WindSpeed_4+'] = data.WindSpeed.shift(-4)
data['IsDay_4+'] = data.IsDay.shift(-4)


forshift = ['AirTemperature_1','ComfortTemperature_1','RelativeHumidity_1','EffectiveCloudCover_1','WindSpeed_1']
for fs in forshift:
    data[fs].iloc[0] = data[fs].iloc[1]
    
    
backshift = ['AirTemperature_1+','ComfortTemperature_1+','RelativeHumidity_1+','EffectiveCloudCover_1+','WindSpeed_1+','IsDay_1+',
             'AirTemperature_2+','ComfortTemperature_2+','RelativeHumidity_2+','EffectiveCloudCover_2+','WindSpeed_2+','IsDay_2+',
             'AirTemperature_3+','ComfortTemperature_3+','RelativeHumidity_3+','EffectiveCloudCover_3+','WindSpeed_3+','IsDay_3+',
             'AirTemperature_4+','ComfortTemperature_4+','RelativeHumidity_4+','EffectiveCloudCover_4+','WindSpeed_4+','IsDay_4+',
            
            ]

for bs in backshift[:6]:
    data[bs].iloc[-1] = data[bs].iloc[-2]
    
for bs in backshift[6:12]:
    data[bs].iloc[-1] = data[bs].iloc[-3]
    data[bs].iloc[-2] = data[bs].iloc[-3]
    
for bs in backshift[12:18]:
    data[bs].iloc[-1] = data[bs].iloc[-4]
    data[bs].iloc[-2] = data[bs].iloc[-4]
    data[bs].iloc[-3] = data[bs].iloc[-4]
    
for bs in backshift[18:]:
    data[bs].iloc[-1] = data[bs].iloc[-5]
    data[bs].iloc[-2] = data[bs].iloc[-5]
    data[bs].iloc[-3] = data[bs].iloc[-5]    
    data[bs].iloc[-4] = data[bs].iloc[-5]

In [5]:
days = 24
data = data[:-31*days]

def prepare_X_y(df, col, pred_ahead):        
        
    X = df.drop([col], axis=1)
    y = df[col]
    X_train = X[:-pred_ahead]
    X_test = X[-pred_ahead:]
    y_train = y[:-pred_ahead]
    y_test = y[-pred_ahead:]
    
    return (X_train, X_test, y_train, y_test)

pred_ahead = 30*days
X_train, X_test, y_train, y_test = prepare_X_y(data, 'Generation', pred_ahead)


In [6]:
cbr = CatBoostRegressor(verbose=False)
cbr.fit(X_train, y_train)
pred_cbr = cbr.predict(X_test)

preds = pd.DataFrame({'DateTime': X_test.index, 'IsDay': X_test.IsDay, 'cbr': pred_cbr})
preds['hour'] = preds.DateTime.dt.hour

preds.set_index('DateTime', inplace=True)

preds['cbr'][preds.cbr<0] = 0

preds.loc[preds['IsDay'] == 0, 'cbr'] = 0

nightlike = [19, 20, 21 , 5, 6]
night = [22 , 23 , 0, 1, 2, 3, 4] + nightlike
# preds.loc[preds['hour'].isin(night), 'cbr'] = 0

mse = mean_squared_error(preds.cbr,y_test)
print (np.sqrt(mse))

16.47204783271865
