In [1]:
import requests 
import pandas as pd
import matplotlib.pyplot as plt
import datetime 
import numpy as np
from sklearn.preprocessing import StandardScaler
import datetime
from sklearn.neighbors import KernelDensity

In [2]:
'''tansformations with OOP'''

class DataTransformer:
    def __init__(self,df):
        self.df = df
    '''Class containing methods to transform the imported data'''

    # in OOP do u not need to call return?
    def transform(self):
        '''overall transformation of data'''
        self.interpolate()
        self.add_cyclical_features()
        self.add_time_features()
        self.ohe()
        self.add_historical_windpower()
        self.add_momentum_force()
        self.scale()
        return self.df

    def interpolate(self):
        '''interpolation of data'''
        df = self.df
        df['Time'] = df['Time'].apply(lambda x : datetime.datetime.strptime(x[:-3], '%Y/%m/%d %H:%M'))
        df['Time'] = pd.to_datetime(df['Time'])    #why double the time conversion?
        df.set_index('Time',inplace=True)  
        df = df.resample('1H').asfreq()    #unsure about resample and asfreq
        df.interpolate(method='cubic',axis=0,limit_direction='both',inplace=True)
        self.df = df

    def add_cyclical_features(self):
        '''converts direction into cylical inputs'''
        df = self.df
        cols = df.columns 
        for c in cols:
            if 'Direction' in c:
                df[c+'_norm'] = df[c]/360
                df[c+'_sin'] = df[c+'_norm'].apply(lambda x: np.sin(x))
                df[c+'_cos'] = df[c+'_norm'].apply(lambda x: np.cos(x))
                df.drop([c,c+'_norm'],inplace=True,axis=1)

        self.df = df 

    def scale(self):
        '''normalize entire dataframe'''
        df = self.df
        df = pd.DataFrame(StandardScaler().fit_transform(df),index=df.index,columns=df.columns)
        self.df = df

    def add_time_features(self):
        '''create time inputs as attributes?'''
        df = self.df
        df.reset_index(inplace=True,drop=False)
        #this is assigment of attribute?
        df['hour'] = df['Time'].apply(lambda x: x.hour).astype(str)
        df['month'] = df['Time'].apply(lambda x: x.month).astype(str)
        # df['day'] = df['Time'].apply(lambda x: x.day).astype(str)
        df.set_index('Time',inplace=True)
        self.df = df
        
    #new 
    def add_cyclic_time_features(self):
        '''create time inputs as attributes?'''
        df = self.df
        df.reset_index(inplace=True,drop=False)
        #this is assigment of attribute?
        df['hour'] = df['Time'].apply(lambda x: x.hour)
        df['sin_hour'] = df['hour'].apply(lambda x: np.sin(x))
        df['cos_hour'] = df['hour'].apply(lambda x: np.cos(x))
        df['month'] = df['Time'].apply(lambda x: x.month).astype(str)
        # df['day'] = df['Time'].apply(lambda x: x.day).astype(str)
        df.set_index('Time',inplace=True)
        self.df = df


    def ohe(self):
        '''One hot encoding of time data'''
        #what is this? I assume it standings for one hot encoding
        #doesn't it affect the entire frame vs just the select month or year?
        df = self.df
        df = pd.get_dummies(df)
        self.df = df

    def add_historical_windpower(self):
        '''conversion of windspeed into windpower'''
        df = self.df
        t = pd.read_csv('target.csv')
        t['Time'] = pd.to_datetime(t['Time'])
        t.set_index('Time',inplace=True)
        #how does this standardscaler object behave?
        target_scaler = StandardScaler().fit(t)
        t = pd.DataFrame(target_scaler.transform(t),index=t.index,columns=t.columns)
        df = df.join(t,how='left')
        self.target_scaler = target_scaler
        self.df = df

    def add_momentum_force(self):
        '''add momentum'''
        time_lag = 18
        df = self.df 
        df['Wind Energy Lag {}'.format(time_lag)] = df['Wind Energy'].shift(time_lag)
        df['Wind Energy Lag {}'.format(2*time_lag)] = df['Wind Energy'].shift(2*time_lag)
        df.dropna(axis=0,inplace=True) ####DROPPING 10 ROWS OF DATA HERE
        # are you not subtracting the future values from present here?
        df['Momentum'] = df['Wind Energy'] - df['Wind Energy Lag {}'.format(time_lag)]
        df['Force'] = df['Wind Energy'] - 2*df['Wind Energy Lag {}'.format(time_lag)] + df['Wind Energy Lag {}'.format(2*time_lag)]
        df.drop(['Wind Energy Lag {}'.format(time_lag),'Wind Energy Lag {}'.format(2*time_lag)],axis=1,inplace=True)
        self.df = df

        ### generate lagged input
        lagged = pd.DataFrame(df['Wind Energy'].shift(1))
        lagged.fillna(method='bfill',inplace=True)
        lagged = StandardScaler().fit_transform(lagged.values)
        self.lagged_input = lagged
        
        

    #----GETTER Functions---
    #what are they for?

    def get_df(self):
        return self.df

    def get_lagged_input(self):
        return self.lagged_input

    def get_target_scaler(self):
        return self.target_scaler


## Approximating wind at different Height

In [None]:
#Constant wind roughness factor

In [19]:
def aprrox_calculator(z, z_ref, z0, v_ref):
    numerator = np.log(z/z0)
    denominator = np.log(z_ref/z0)
    v = v_ref * (numerator/denominator)
    return v

def wind_speed_approx(df,wind_roughness, z, z_ref):
    speed_columns = []
    for x in df.columns:
        speed_columns.append('Speed' in x)
    df_speed = df.loc[:, speed_columns]
    for column in df_speed.columns:
        location = column[6:]
        z0 = wind_roughness['overall'].loc[location]
        df['approx_'+ column] = df[column].apply(lambda x: aprrox_calculator(z, z_ref, z0, x))
    return df

In [20]:
df1 = pd.read_csv('model_1.csv').drop(['Unnamed: 0'], axis = 1 )
df = DataTransformer(df1)
df.interpolate()
df = df.get_df()

wind_roughness = pd.read_csv('wind_roughness.csv').set_index('Unnamed: 0')

df1 = wind_speed_approx(df, wind_roughness, 50, 10)

In [21]:
df1

Unnamed: 0_level_0,Speed_guitrancourt,Direction_guitrancourt,Speed_lieusaint,Direction_lieusaint,Speed_lvs-pussay,Direction_lvs-pussay,Speed_parc-du-gatinais,Direction_parc-du-gatinais,Speed_arville,Direction_arville,...,Speed_angerville-2,Direction_angerville-2,approx_Speed_guitrancourt,approx_Speed_lieusaint,approx_Speed_lvs-pussay,approx_Speed_parc-du-gatinais,approx_Speed_arville,approx_Speed_boissy-la-riviere,approx_Speed_angerville-1,approx_Speed_angerville-2
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01 00:00:00,1.860000,212.000000,1.500000,223.000000,1.710000,222.000000,1.450000,222.000000,1.450000,223.000000,...,1.670000,222.000000,2.625219,2.250000,2.183760,1.851726,1.851726,2.173485,2.357052,2.357052
2017-01-01 01:00:00,1.751005,210.110377,1.441840,216.950107,1.620342,225.182852,1.430931,220.628669,1.427521,221.158538,...,1.589054,224.867433,2.471382,2.162760,2.069262,1.827374,1.823020,2.059416,2.242802,2.242804
2017-01-01 02:00:00,1.750148,207.514083,1.486818,211.745610,1.634538,226.326573,1.477011,219.267963,1.472961,219.382117,...,1.607412,225.857680,2.470173,2.230226,2.087391,1.886221,1.881048,2.082290,2.268712,2.268715
2017-01-01 03:00:00,1.839525,204.408009,1.616028,207.317430,1.734595,225.755655,1.577099,217.921313,1.574623,217.674393,...,1.707880,225.268401,2.596320,2.424042,2.215169,2.014038,2.010876,2.218139,2.410513,2.410516
2017-01-01 04:00:00,2.001230,200.989044,1.810565,203.596488,1.902519,223.794592,1.720053,216.592148,1.720813,216.039027,...,1.873263,223.397255,2.824552,2.715848,2.429617,2.196598,2.197568,2.442991,2.643937,2.643939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-18 20:00:00,1.932752,225.158092,0.821273,208.198161,1.690972,231.271892,0.640814,191.001196,0.697368,192.432193,...,1.633225,230.624259,2.727902,1.231909,2.159460,0.818353,0.890575,1.796784,2.305146,2.305146
2020-08-18 21:00:00,1.779108,220.278210,0.760986,201.485172,1.615761,229.500364,0.657124,188.079638,0.708856,189.171994,...,1.559394,228.508828,2.511047,1.141478,2.063412,0.839182,0.905246,1.720249,2.200941,2.200941
2020-08-18 22:00:00,1.681218,215.336504,0.882702,196.775479,1.647048,227.950976,0.902962,188.140383,0.942821,188.873575,...,1.596808,226.613657,2.372884,1.324053,2.103367,1.153130,1.204031,1.824449,2.253748,2.253748
2020-08-18 23:00:00,1.655907,210.516069,1.225885,194.727586,1.810054,226.744074,1.425023,191.881236,1.444717,192.246416,...,1.771625,225.067723,2.337160,1.838828,2.311534,1.819829,1.844979,2.149744,2.500486,2.500486


### Wind roughness factor based on direction

##### Run all these functions as they may be different from those above despite the name being the same
##### I am unsure if the dependencies are different from those above

In [22]:
def aprrox_calculator(z, z_ref, z0, v_ref):
    #this function does the conversion
    numerator = np.log(z/z0)
    denominator = np.log(z_ref/z0)
    v = v_ref * (numerator/denominator)
    return v

def cal_direction(df, location):
    direction_df = df['Direction_'+location]
    direction_df = direction_df.apply(lambda x: direction_str(x))#
    return direction_df

def direction_str(x):
    if (x <= 22.5 or x > 337.5):
        direction = 'N_'
    elif (x > 22.5 and x <= 67.5):
        direction = 'NE'
    elif (x > 67.5 and x <= 112.5):
        direction = 'E_'
    elif (x > 112.5 and x <= 157.5):
        direction = 'SE'
    elif (x > 157.5 and x <= 202.5):
        direction = 'S_'
    elif (x > 202.5 and x <= 247.5):
        direction = 'SW'
    elif (x > 247.5 and x <= 292.5):
        direction = 'W_'
    elif (x > 292.5 and x <= 337.5):
        direction = 'NW'
    return direction

def wind_approx_formatter(df, wind_roughness, location, z, z_ref):
    df['approx_direction_'+location] = cal_direction(df, location)
    df['wind_direction_str_'+ location] = df['Speed_' + location].astype(str)
    df['wind_approx_' + location] = df['wind_direction_str_' + location] + df['approx_direction_'+location]
    df['wind_speed_' + location] = df['wind_approx_' + location].apply(lambda x: wind_approx_converter(x, wind_roughness, location, z, z_ref))
    drop_cols = [('wind_direction_str_' + location), ('approx_direction_' + location),('wind_approx_'+location)]
    df.drop(drop_cols, axis = 1, inplace = True)
    return df

def wind_approx_converter(x, wind_roughness, location, z, z_ref):
    direction = x[-2:]
    z0 = wind_roughness[direction].loc[location]
    speed = float(x[:-2])
    new_speed = aprrox_calculator(z, z_ref, z0, speed)
    return new_speed
    
def wind_speed_approx(df,wind_roughness, z, z_ref):
    speed_columns = []
    for x in df.columns:
        speed_columns.append('Speed' in x)
    df_speed = df.loc[:, speed_columns]
    for column in df_speed.columns:
        location = column[6:]
        df = wind_approx_formatter(df, wind_roughness, location, 50, 10)
    return df

In [23]:
df1 = pd.read_csv('model_1.csv').drop(['Unnamed: 0'], axis = 1 )
df = DataTransformer(df1)
df.interpolate()
df = df.get_df()

wind_roughness = pd.read_csv('wind_roughness_df.csv').set_index('Unnamed: 0')

df = wind_speed_approx(df, wind_roughness, 50, 10)

In [24]:
df

Unnamed: 0_level_0,Speed_guitrancourt,Direction_guitrancourt,Speed_lieusaint,Direction_lieusaint,Speed_lvs-pussay,Direction_lvs-pussay,Speed_parc-du-gatinais,Direction_parc-du-gatinais,Speed_arville,Direction_arville,...,Speed_angerville-2,Direction_angerville-2,wind_speed_guitrancourt,wind_speed_lieusaint,wind_speed_lvs-pussay,wind_speed_parc-du-gatinais,wind_speed_arville,wind_speed_boissy-la-riviere,wind_speed_angerville-1,wind_speed_angerville-2
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01 00:00:00,1.860000,212.000000,1.500000,223.000000,1.710000,222.000000,1.450000,222.000000,1.450000,223.000000,...,1.670000,222.000000,2.790000,2.250000,2.183760,1.933333,1.851726,2.173485,2.226667,2.226667
2017-01-01 01:00:00,1.751005,210.110377,1.441840,216.950107,1.620342,225.182852,1.430931,220.628669,1.427521,221.158538,...,1.589054,224.867433,2.626507,2.162760,2.069262,1.907908,1.823020,2.059416,2.118738,2.118739
2017-01-01 02:00:00,1.750148,207.514083,1.486818,211.745610,1.634538,226.326573,1.477011,219.267963,1.472961,219.382117,...,1.607412,225.857680,2.625222,2.230226,2.087391,1.969348,1.881048,2.082290,2.143214,2.143216
2017-01-01 03:00:00,1.839525,204.408009,1.616028,207.317430,1.734595,225.755655,1.577099,217.921313,1.574623,217.674393,...,1.707880,225.268401,2.759287,2.424042,2.215169,2.102798,2.010876,2.218139,2.277171,2.277173
2017-01-01 04:00:00,2.001230,200.989044,1.810565,203.596488,1.902519,223.794592,1.720053,216.592148,1.720813,216.039027,...,1.873263,223.397255,3.001844,2.715848,2.429617,2.293404,2.197568,2.442991,2.497682,2.497684
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-18 20:00:00,1.932752,225.158092,0.821273,208.198161,1.690972,231.271892,0.640814,191.001196,0.697368,192.432193,...,1.633225,230.624259,2.899128,1.231909,2.159460,0.839036,0.890575,1.796784,2.177633,2.177633
2020-08-18 21:00:00,1.779108,220.278210,0.760986,201.485172,1.615761,229.500364,0.657124,188.079638,0.708856,189.171994,...,1.559394,228.508828,2.668662,1.141478,2.063412,0.860391,0.905246,1.720249,2.079191,2.079191
2020-08-18 22:00:00,1.681218,215.336504,0.882702,196.775479,1.647048,227.950976,0.902962,188.140383,0.942821,188.873575,...,1.596808,226.613657,2.521827,1.324053,2.103367,1.182274,1.204031,1.824449,2.129078,2.129078
2020-08-18 23:00:00,1.655907,210.516069,1.225885,194.727586,1.810054,226.744074,1.425023,191.881236,1.444717,192.246416,...,1.771625,225.067723,2.483860,1.838828,2.311534,1.865823,1.844979,2.149744,2.362167,2.362167


In [16]:
df1

Unnamed: 0_level_0,Speed_guitrancourt,Direction_guitrancourt,Speed_lieusaint,Direction_lieusaint,Speed_lvs-pussay,Direction_lvs-pussay,Speed_parc-du-gatinais,Direction_parc-du-gatinais,Speed_arville,Direction_arville,...,Speed_angerville-2,Direction_angerville-2,approx_Speed_guitrancourt,approx_Speed_lieusaint,approx_Speed_lvs-pussay,approx_Speed_parc-du-gatinais,approx_Speed_arville,approx_Speed_boissy-la-riviere,approx_Speed_angerville-1,approx_Speed_angerville-2
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-01 00:00:00,1.860000,212.000000,1.500000,223.000000,1.710000,222.000000,1.450000,222.000000,1.450000,223.000000,...,1.670000,222.000000,2.625219,2.250000,2.183760,1.851726,1.851726,2.173485,2.357052,2.357052
2017-01-01 01:00:00,1.751005,210.110377,1.441840,216.950107,1.620342,225.182852,1.430931,220.628669,1.427521,221.158538,...,1.589054,224.867433,2.471382,2.162760,2.069262,1.827374,1.823020,2.059416,2.242802,2.242804
2017-01-01 02:00:00,1.750148,207.514083,1.486818,211.745610,1.634538,226.326573,1.477011,219.267963,1.472961,219.382117,...,1.607412,225.857680,2.470173,2.230226,2.087391,1.886221,1.881048,2.082290,2.268712,2.268715
2017-01-01 03:00:00,1.839525,204.408009,1.616028,207.317430,1.734595,225.755655,1.577099,217.921313,1.574623,217.674393,...,1.707880,225.268401,2.596320,2.424042,2.215169,2.014038,2.010876,2.218139,2.410513,2.410516
2017-01-01 04:00:00,2.001230,200.989044,1.810565,203.596488,1.902519,223.794592,1.720053,216.592148,1.720813,216.039027,...,1.873263,223.397255,2.824552,2.715848,2.429617,2.196598,2.197568,2.442991,2.643937,2.643939
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08-18 20:00:00,1.932752,225.158092,0.821273,208.198161,1.690972,231.271892,0.640814,191.001196,0.697368,192.432193,...,1.633225,230.624259,2.727902,1.231909,2.159460,0.818353,0.890575,1.796784,2.305146,2.305146
2020-08-18 21:00:00,1.779108,220.278210,0.760986,201.485172,1.615761,229.500364,0.657124,188.079638,0.708856,189.171994,...,1.559394,228.508828,2.511047,1.141478,2.063412,0.839182,0.905246,1.720249,2.200941,2.200941
2020-08-18 22:00:00,1.681218,215.336504,0.882702,196.775479,1.647048,227.950976,0.902962,188.140383,0.942821,188.873575,...,1.596808,226.613657,2.372884,1.324053,2.103367,1.153130,1.204031,1.824449,2.253748,2.253748
2020-08-18 23:00:00,1.655907,210.516069,1.225885,194.727586,1.810054,226.744074,1.425023,191.881236,1.444717,192.246416,...,1.771625,225.067723,2.337160,1.838828,2.311534,1.819829,1.844979,2.149744,2.500486,2.500486
