In [19]:
import pandas as pd
import numpy as np
import requests
import json
from sklearn.preprocessing import LabelEncoder

In [20]:
def timeseries_features(df, label=None):
    df['datetime'] = pd.to_datetime(df.index)
    df['hour'] = df['datetime'].dt.hour
    df['date'] = df['datetime'].dt.date
    df['dayofweek'] = df['datetime'].dt.dayofweek
    df['quarter'] = df['datetime'].dt.quarter
    df['month'] = df['datetime'].dt.month
    df['year'] = df['datetime'].dt.year
    df['dayofyear'] = df['datetime'].dt.dayofyear
    df['dayofmonth'] = df['datetime'].dt.day
    # df['weekofyear'] = df['datetime'].dt.weekofyear
    df['weekday']=df['datetime'].dt.weekday
    df['wntr_month']=df.apply(lambda x: 1 if x.month in ([11,12,1,2]) else 0,axis=1) 
    df['month_label']=df.month.map(lambda x : '1' if x in ([11,12,1,2,3]) else ( '2' if x in ([6,7,8,9]) else '3' ))
    df['quarter']=df.month.map(lambda x : '1_çeyrek' if x in ([1,2,3]) else ( '2_çeyrek' if x in ([4,5,6]) else( '3_çeyrek' if x in ([7,8,9]) else '4_çeyrek') ))
    df['season']=df.month.map(lambda x : 'kış' if x in ([12,1,2]) else ( 'ilkbahar' if x in ([3,4,5]) else( 'yaz' if x in ([6,7,8]) else 'sonbahar') ))
    df['weekend']=df.dayofmonth.map(lambda x:  'hafta_içi' if x<5 else 'hafta_sonu')
    df['working_hours']=df.hour.map(lambda x: 'work_hour' if (x<7) & (x>19) else 'not_work_hour' )
    df['label_hour']=df.hour.map(lambda x : '1' if x>22 & x<24 else ('2' if x>0 & x<6 else ('3' if x>5 & x<10 else ('4' if x>9 & x<13  else ('5' if x>12 & x<17 else('6' if x>16 & x<21 else '7'))) )))
    df['prime_time']=df.hour.map(lambda x: 'prime' if (x>19) & (x<22) else 'not_prime' )
    df=df.drop('datetime',axis=1)
    return df
        
    

In [21]:
def label_data(feature,data,le_fit=None):
    if type(feature)==str:
        feature=[feature]
    else :
        pass
    for col in feature:
        le=LabelEncoder()
        le_fit.update({col:le.fit(data[col])})
        data[col]=le.transform(data[col])
    return data[feature],le_fit

In [22]:
def info_data(df):
    cm = sns.light_palette("red", as_cmap=True)
    null=(df.isnull().sum()/df.shape[0]).to_frame().rename(columns={0:'null_rate'})
    unique=df.nunique().to_frame().rename(columns={0:'nunique'})
    dtype=pd.Series({feature:df[feature].dtype.name  for feature in df.columns}).to_frame().rename(columns={0:'dtype'})
    return pd.merge(null, unique,left_index=True, right_index=True).merge(dtype,left_index=True, right_index=True).sort_values('null_rate',ascending=False).style.background_gradient(cmap=cm)

In [23]:
def real_time_consumption(start_date="2017-12-01",
                         end_date="2022-10-01"):
    
    """
    Türkiyenin toplam enerji tüketimi. 24 saat sonrasını tahmin etmek için minumum shift(24) ile kullandığım
    EPİAŞ şeffaflık real-time elektrik tüketim verisi.
    Tarih aralığı geniş olduğundan veriyi çekmek uzun sürebilir.
    
    """
    url=f"https://seffaflik.epias.com.tr/transparency/service/consumption/real-time-consumption?startDate={start_date}&endDate={end_date}"
    response=requests.get(url,verify=False)
    json_data=json.loads(response.text.encode('utf8'))
    consumption=pd.DataFrame(json_data['body']['hourlyConsumptions'])
    consumption['Tarih']=pd.to_datetime(consumption.date.str[:16])
    consumption = consumption[['consumption','Tarih']]

    return consumption

In [24]:
def real_time_generation(start_date="2017-12-01",
                         end_date="2022-10-01"):
    
    """
    Türkiyenin toplam enerji üretimi. 24 saat sonrasını tahmin etmek için minumum shift(24) ile kullandığım EPİAŞ şeffaflık real-time elektrik üretim verisi.
    Tarih aralığı geniş olduğundan veriyi çekmek uzun sürebilir. Her ihtimale karşın, ekstra data olarak gdz-ext-dataset'de production.csv dosyasında bulunabilir.
    
    """
    url=f"https://seffaflik.epias.com.tr/transparency/service/production/real-time-generation?startDate={start_date}&endDate={end_date}"
    response=requests.get(url,verify=False)
    json_data=json.loads(response.text.encode('utf8'))
    production = pd.DataFrame(json_data['body']['hourlyGenerations'])
    production['Tarih']=pd.to_datetime(production.date.str[:16])
    production.loc[production.total==0,'total'] = np.nan
    production.drop(['date','naphta','nucklear'],axis=1,inplace=True)
    
    return production

In [25]:
def get_meteostat_data ( start_date,end_date,latitude=38.4235,longitude=27.1564,features=None):
    from meteostat import Stations, Daily,Point,Hourly,Monthly,Normals
    vancouver = Point(  latitude,longitude) #for  izmir
    weather = Hourly(vancouver, start_date, end_date)
    weather = weather.fetch()
    weather=weather[features]
    return weather 

In [26]:
def lag_features(df_temp,
                 columns,
                 lags,lag_freq):
    if type(columns)==str:
        columns=[columns]
    else :
        pass
    for col in columns:
        for lag in list(range(lag_freq,lags+lag_freq,lag_freq)):
            df_temp[f'lag_{lag}_{col}'] = df_temp[col].shift(lag)
    return df_temp

In [27]:
def rolling_features(df_temp,columns,rolls,roll_types):
    
    if type(columns)==str:
        columns=[columns]
    else :
        pass
    
    if type(roll_types)==str:
        roll_types=[roll_types]
    else :
        pass
    
    if (type(rolls)==str) or (type(rolls)==int) :
        rolls=[rolls]
    else :
        pass
    
    for col in columns:
        for roll in rolls:
            if 'mean' in roll_types:
                df_temp[f'rolling_mean_{roll}_{col}'] = df_temp[col].rolling(roll,min_periods=1).mean().reset_index(drop=True).values
            if 'max' in roll_types:
                df_temp[f'rolling_max_{roll}_{col}'] = df_temp[col].rolling(roll,min_periods=1).max().reset_index(drop=True).values
            if 'min' in roll_types:
                df_temp[f'rolling_min_{roll}_{col}'] = df_temp[col].rolling(roll,min_periods=1).min().reset_index(drop=True).values
            if 'std' in roll_types:
                df_temp[f'rolling_std_{roll}_{col}'] = df_temp[col].rolling(roll,min_periods=1).std().reset_index(drop=True).values
    return df_temp

In [28]:
def rolling_shift_features(df_temp,
                     columns,
                     rolls,
                     roll_types,shift):
    
    if type(columns)==str:
        columns=[columns]
    else :
        pass
    
    if type(roll_types)==str:
        roll_types=[roll_types]
    else :
        pass
    
    if (type(rolls)==str) or (type(rolls)==int) :
        rolls=[rolls]
    else :
        pass
    
    for col in columns:
        for roll in rolls:
            if 'mean' in roll_types:
                df_temp[f'rolling_shift_24_mean_{roll}_{col}'] = df_temp[col].shift(shift).rolling(roll,min_periods=1).mean().reset_index(drop=True).values
            if 'max' in roll_types:
                df_temp[f'rolling_shift_24_max_{roll}_{col}'] = df_temp[col].shift(shift).rolling(roll,min_periods=1).max().reset_index(drop=True).values
            if 'min' in roll_types:
                df_temp[f'rolling_shift_24_min_{roll}_{col}'] = df_temp[col].shift(shift).rolling(roll,min_periods=1).min().reset_index(drop=True).values
            if 'std' in roll_types:
                df_temp[f'rolling_shift_24_std_{roll}_{col}'] = df_temp[col].shift(shift).rolling(roll,min_periods=1).std().reset_index(drop=True).values
    return df_temp

In [11]:
def periodic_spline_transformer(period, n_splines=None, degree=3):
    """
    Kaynak: https://scikit-learn.org/stable/auto_examples/applications/plot_cyclical_feature_engineering.html
    """
    
    if n_splines is None:
        n_splines = period
    n_knots = n_splines + 1  # periodic and include_bias is True
    return SplineTransformer(
        degree=degree,
        n_knots=n_knots,
        knots=np.linspace(0, period, n_knots).reshape(n_knots, 1),
        extrapolation="periodic",
        include_bias=True)

In [12]:
def diff_pct_features(df_temp,columns,diff_pct
        ):
    
    if type(columns)==str:
        columns=[columns]
    else :
        pass
    if (type(diff_pct)==str) or (type(diff_pct)==int) :
        diff_pct=[diff_pct]
    else :
        pass
    
    for col in columns:
        for value in diff_pct:
            df_temp[f'diff_{col}_{value}'] = df_temp[col].diff(value).values
            df_temp[f'pct_change_{col}_{value}'] = df_temp[col].pct_change(value).values
            
    return df_temp

In [13]:
def seasonality_features(df_temp):
    df_temp['month_sin'] = np.sin(2*np.pi*df_temp.month/12)
    df_temp['month_cos'] = np.cos(2*np.pi*df_temp.month/12)
    df_temp['day_sin'] = np.sin(2*np.pi*df_temp.hour/24)
    df_temp['day_cos'] = np.cos(2*np.pi*df_temp.hour/24)
    return df_temp

In [14]:
def seasonality_spline_features(hours=np.arange(0,24)):
    hour_df = pd.DataFrame(np.linspace(0, 24, 24).reshape(-1, 1),columns=["hour"])
    splines = periodic_spline_transformer(24, n_splines=12).fit_transform(hour_df)
    splines_df = pd.DataFrame(splines,columns=[f"spline_{i}" for i in range(splines.shape[1])])
    splines_df =pd.concat([pd.Series(hours,name='hour'), splines_df], axis="columns")
    
    return splines_df

In [15]:
def is_categorical(df_temp,columns,treshold_category=0.05):

    if type(columns)==str:
        columns=[columns]
    else :
        pass
        
    for col in columns:
        num_unique = df_temp[col].nunique()
        dtype = df_temp[col].dtype

        if dtype == 'object':
            df_temp[col]= df_temp[col].astype('category')
        if ( dtype == float or dtype == int) and  num_unique < len(df_temp[col]) *treshold_category:
            df_temp[col]= df_temp[col].astype('category')

    
    return df_temp

In [16]:
def cat_target_encode(train_temp,test_temp,target,cat_cols,encode_type):
    if type(cat_cols)==str:
        cat_cols=[cat_cols]
    else :
        pass
    dum_temp=train_temp.groupby(cat_cols).agg({target:encode_type}).reset_index().rename(columns={target:str("_".join(cat_cols))+'_te'})
    train_temp=train_temp.merge(dum_temp,on=cat_cols,how='left')
    test_temp=test_temp.merge(dum_temp,on=cat_cols,how='left')
    
    return train_temp,test_temp

In [17]:
def periodic_spline_transformer(period, n_splines=None, degree=3):
    """
    Kaynak: https://scikit-learn.org/stable/auto_examples/applications/plot_cyclical_feature_engineering.html
    """
    
    if n_splines is None:
        n_splines = period
    n_knots = n_splines + 1  # periodic and include_bias is True
    return SplineTransformer(degree=degree,n_knots=n_knots,knots=np.linspace(0, period, n_knots).reshape(n_knots, 1),extrapolation="periodic",include_bias=True)

In [18]:
def seasonality_spline_features(hours=np.arange(0,24)):
    hour_df = pd.DataFrame(np.linspace(0, 24, 24).reshape(-1, 1),columns=["hour"])
    splines = periodic_spline_transformer(24, n_splines=12).fit_transform(hour_df)
    splines_df = pd.DataFrame(splines,columns=[f"spline_{i}" for i in range(splines.shape[1])])
    splines_df =pd.concat([pd.Series(hours,name='hour'), splines_df], axis="columns")
    
    return splines_df