In [1]:
import requests 
import pandas as pd
import matplotlib.pyplot as plt
import datetime 
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler,StandardScaler,OneHotEncoder
from sklearn.metrics import mean_absolute_error,mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout,Flatten,Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Concatenate


%matplotlib inline

In [2]:
'''tansformations with OOP'''

class DataTransformer:
    def __init__(self,df):
        self.df = df
    '''Class containing methods to transform the imported data'''

    # in OOP do u not need to call return?
    def transform(self):
        '''overall transformation of data'''
        self.interpolate()
        self.add_cyclical_features()
        self.add_time_features()
        self.ohe()
        self.add_historical_windpower()
        self.add_momentum_force()
        self.scale()
        return self.df

    def interpolate(self):
        '''interpolation of data'''
        df = self.df
        df['Time'] = df['Time'].apply(lambda x : datetime.datetime.strptime(x[:-3], '%Y/%m/%d %H:%M'))
        df['Time'] = pd.to_datetime(df['Time'])    #why double the time conversion?
        df.set_index('Time',inplace=True)  
        df = df.resample('1H').asfreq()    #unsure about resample and asfreq
        df.interpolate(method='cubic',axis=0,limit_direction='both',inplace=True)
        self.df = df

    def add_cyclical_features(self):
        '''converts direction into cylical inputs'''
        df = self.df
        cols = df.columns 
        for c in cols:
            if 'Direction' in c:
                df[c+'_norm'] = df[c]/360
                df[c+'_sin'] = df[c+'_norm'].apply(lambda x: np.sin(x))
                df[c+'_cos'] = df[c+'_norm'].apply(lambda x: np.cos(x))
                df.drop([c,c+'_norm'],inplace=True,axis=1)

        self.df = df 

    def scale(self):
        '''normalize entire dataframe'''
        df = self.df
        df = pd.DataFrame(StandardScaler().fit_transform(df),index=df.index,columns=df.columns)
        self.df = df

    def add_time_features(self):
        '''create time inputs as attributes?'''
        df = self.df
        df.reset_index(inplace=True,drop=False)
        #this is assigment of attribute?
        df['hour'] = df['Time'].apply(lambda x: x.hour).astype(str)
        df['month'] = df['Time'].apply(lambda x: x.month).astype(str)
        # df['day'] = df['Time'].apply(lambda x: x.day).astype(str)
        df.set_index('Time',inplace=True)
        self.df = df

    def ohe(self):
        '''One hot encoding of time data'''
        #what is this? I assume it standings for one hot encoding
        #doesn't it affect the entire frame vs just the select month or year?
        df = self.df
        df = pd.get_dummies(df)
        self.df = df

    def add_historical_windpower(self):
        '''conversion of windspeed into windpower'''
        df = self.df
        t = pd.read_csv('target.csv')
        t['Time'] = pd.to_datetime(t['Time'])
        t.set_index('Time',inplace=True)
        #how does this standardscaler object behave?
        target_scaler = StandardScaler().fit(t)
        t = pd.DataFrame(target_scaler.transform(t),index=t.index,columns=t.columns)
        df = df.join(t,how='left')
        self.target_scaler = target_scaler
        self.df = df

    def add_momentum_force(self):
        '''add momentum'''
        time_lag = 18
        df = self.df 
        df['Wind Energy Lag {}'.format(time_lag)] = df['Wind Energy'].shift(time_lag)
        df['Wind Energy Lag {}'.format(2*time_lag)] = df['Wind Energy'].shift(2*time_lag)
        df.dropna(axis=0,inplace=True) ####DROPPING 10 ROWS OF DATA HERE
        # are you not subtracting the future values from present here?
        df['Momentum'] = df['Wind Energy'] - df['Wind Energy Lag {}'.format(time_lag)]
        df['Force'] = df['Wind Energy'] - 2*df['Wind Energy Lag {}'.format(time_lag)] + df['Wind Energy Lag {}'.format(2*time_lag)]
        df.drop(['Wind Energy Lag {}'.format(time_lag),'Wind Energy Lag {}'.format(2*time_lag)],axis=1,inplace=True)
        self.df = df

        ### generate lagged input
        lagged = pd.DataFrame(df['Wind Energy'].shift(1))
        lagged.fillna(method='bfill',inplace=True)
        lagged = StandardScaler().fit_transform(lagged.values)
        self.lagged_input = lagged
        

    #----GETTER Functions---
    #what are they for?

    def get_df(self):
        return self.df

    def get_lagged_input(self):
        return self.lagged_input

    def get_target_scaler(self):
        return self.target_scaler


In [3]:
def add_b_suffix(og_name):
    return_list = []
    for f in og_name:
        return_list.append(f+"-b")
    return return_list

locations = ['guitrancourt', 'lieusaint', 
             'lvs-pussay','parc-du-gatinais', 
             'arville','boissy-la-riviere',
             'angerville-1','angerville-2']

wind_energy = 'energy-ile-de-france'
forecast_endpt = 'https://ai4impact.org/P003/'
analysis_endpt = 'https://ai4impact.org/P003/historical/'

In [4]:
model_1 = locations
model_2 = add_b_suffix(model_1)
models = [model_1, model_2]

In [5]:
model_num = 0
for m in models:
    model_num += 1
    df = pd.read_csv(analysis_endpt+m[0]+'.csv',skiprows=3)
    df.columns = ['Time','Speed_'+m[0],'Direction_'+m[0]]
    df.set_index('Time',inplace=True)
    for i in range(1,len(m)):
        loc = m[i]
        temp = pd.read_csv(analysis_endpt+loc+'.csv',skiprows=3)
        temp.columns = ['Time','Speed_'+loc,'Direction_'+loc]
        temp.set_index('Time',inplace=True)
        df = df.merge(temp,how='left',on='Time')
        df.drop_duplicates(inplace=True)

    df.reset_index(inplace=True,drop=False)
    df.to_csv(f'model_{model_num}.csv')

In [6]:
df1 = pd.read_csv('model_1.csv')
df2 = pd.read_csv('model_2.csv')

In [7]:
target = pd.read_csv('https://ai4impact.org/P003/historical/energy-ile-de-france.csv',header=None)
target.columns = ['Time','Wind Energy']
target.to_csv('target.csv',index=False)
target['Time'] = pd.to_datetime(target['Time'])
target.set_index('Time',inplace=True)

In [8]:
df = df1.copy()
transformer = DataTransformer(df)
transformer.transform()
df = transformer.get_df()
lagged = transformer.get_lagged_input()
df.tail()

Unnamed: 0_level_0,Unnamed: 0,Speed_guitrancourt,Speed_lieusaint,Speed_lvs-pussay,Speed_parc-du-gatinais,Speed_arville,Speed_boissy-la-riviere,Speed_angerville-1,Speed_angerville-2,Direction_guitrancourt_sin,...,month_3,month_4,month_5,month_6,month_7,month_8,month_9,Wind Energy,Momentum,Force
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-08-17 20:00:00,1.731006,-0.771092,-0.848547,-1.168429,-0.792996,-0.825442,-1.100277,-1.157459,-1.157221,0.884412,...,-0.321516,-0.315762,-0.321516,-0.315762,-0.321516,3.320789,-0.270112,-0.302649,-0.677287,-1.013626
2020-08-17 21:00:00,1.731115,-0.95609,-1.058139,-1.322526,-0.957032,-0.992304,-1.263382,-1.314476,-1.314195,0.861563,...,-0.321516,-0.315762,-0.321516,-0.315762,-0.321516,3.320789,-0.270112,-0.433817,-0.691094,-0.946613
2020-08-17 22:00:00,1.731225,-1.112943,-1.194949,-1.371792,-1.035802,-1.070685,-1.320897,-1.366155,-1.36586,0.80292,...,-0.321516,-0.315762,-0.321516,-0.315762,-0.321516,3.320789,-0.270112,-0.591219,-0.718709,-0.787457
2020-08-17 23:00:00,1.731334,-1.230406,-1.239387,-1.292701,-1.009725,-1.040206,-1.248675,-1.288803,-1.288529,0.700898,...,-0.321516,-0.315762,-0.321516,-0.315762,-0.321516,3.320789,-0.270112,-0.564985,-0.276874,-0.284858
2020-08-18 00:00:00,1.731443,-1.297234,-1.171861,-1.061727,-0.859221,-0.880484,-1.022571,-1.058726,-1.058514,0.545987,...,-0.321516,-0.315762,-0.321516,-0.315762,-0.321516,3.320789,-0.270112,-0.237064,0.247805,0.217741


In [9]:
X = df.loc[:,df.columns!='Wind Energy'].values
y = df['Wind Energy'].values
X = np.concatenate((X,lagged),axis=1)
X.shape,y.shape

((31765, 64), (31765,))

In [10]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.4)
x_train_features = x_train[:,:62]
x_test_features = x_test[:,:62]
x_train_lagged = x_train[:,62]
x_test_lagged = x_test[:,62]
x_train_features.shape,x_test_features.shape,x_train_lagged.shape,x_test_lagged.shape

((19059, 62), (12706, 62), (19059,), (12706,))

In [11]:
latent_dim = 16
def encoder(no_of_features, latent_dim = latent_dim):
    features = Input(shape = (no_of_features,), name = 'Features')
    X = Dense(62, activation = 'relu')(features)
    X = Dense(32, activation = 'relu')(X)
    LATENT = Dense(latent_dim, activation = 'relu')(X)
    encoder = Model(features, LATENT, name = 'encoder')
    return encoder

In [31]:
def decoder(latent_dim = latent_dim):
    LATENT_INPUTS = Input(shape = (latent_dim,))
    X = Dense(32, activation = 'relu')(LATENT_INPUTS)
    OUTPUTS = Dense(62, activation = 'linear')(X)
    decoder = Model(LATENT_INPUTS, OUTPUTS, name = 'decoder')
    return decoder

In [32]:
#Difference Model
def create_model(latent_dim = latent_dim):
    LATENT_INPUTS = Input(shape = (latent_dim,))
    X = Dense(64,activation='relu')(LATENT_INPUTS)
    X = Dropout(0.2)(X)
    X = Dense(32,activation='relu')(X)
    X = Dense(6,activation='relu')(X)

    output = Dense(1,activation='linear')(X)
    model = Model(inputs= LATENT_INPUTS ,outputs=[output,decoder], name = 'energy')
    print(model.summary())
    model.compile(loss={'energy': 'mse',
                        'decoder': 'mae'},
                  optimizer = 'adam')
    plot_model(model,to_file='difference model.png')
    return model

In [126]:
encoder = encoder(62)

In [127]:
encoder.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Features (InputLayer)        [(None, 62)]              0         
_________________________________________________________________
dense_40 (Dense)             (None, 62)                3906      
_________________________________________________________________
dense_41 (Dense)             (None, 32)                2016      
_________________________________________________________________
dense_42 (Dense)             (None, 16)                528       
Total params: 6,450
Trainable params: 6,450
Non-trainable params: 0
_________________________________________________________________


In [128]:
decoder = decoder()

In [129]:
decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 16)]              0         
_________________________________________________________________
dense_43 (Dense)             (None, 32)                544       
_________________________________________________________________
dense_44 (Dense)             (None, 62)                2046      
Total params: 2,590
Trainable params: 2,590
Non-trainable params: 0
_________________________________________________________________


In [136]:
create_model()

ValueError: The first argument to `Layer.call` must always be passed.

In [14]:
latent_dim = 16
def model():
    #encoder
    features = Input(shape=(62,),name='Features')
    X = Dense(62, activation = 'relu')(features)
    X1 = Dense(32, activation = 'relu')(X)
    encoded = Dense(latent_dim, activation = 'relu', name = 'encoder')(X1)
    
    #difference model
    X2 = Dense(32,activation='relu')(encoded)
    X3 = Dropout(0.2)(X2)
    X4 = Dense(6,activation='relu')(X3)
    output = Dense(1, activation='linear', name = 'energy')(X4)
    
    #decoder 
    X5 = Dense(32, activation = 'relu')(encoded)
    decoded = Dense(62, activation = 'linear', name = 'autoencoder')(X5)
    
    model = Model(inputs = features, outputs = [output, decoded])
    return model
    
    

In [17]:
model = model()

In [18]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Features (InputLayer)           [(None, 62)]         0                                            
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, 62)           3906        Features[0][0]                   
__________________________________________________________________________________________________
dense_6 (Dense)                 (None, 32)           2016        dense_5[0][0]                    
__________________________________________________________________________________________________
encoder (Dense)                 (None, 16)           528         dense_6[0][0]                    
____________________________________________________________________________________________

In [25]:
model.compile(loss={'energy': 'mse', 
                    'autoencoder': 'mse'},
              optimizer='adam'
)

In [26]:
hist = model.fit(x_train_features,
                 {'energy': y_train, 'autoencoder': x_train_features},
                 epochs=10,
                 batch_size=32,
                 shuffle=True,
                 validation_data=(x_test_features, {'energy': y_test, 'autoencoder': x_test_features}),
                 verbose = 1)

Epoch 1/10


InternalError:  Blas GEMM launch failed : a.shape=(32, 62), b.shape=(62, 62), m=32, n=62, k=62
	 [[node model/dense/MatMul (defined at <ipython-input-26-3e7f8dbc28de>:7) ]] [Op:__inference_train_function_1325]

Function call stack:
train_function


In [19]:
tf.test.is_gpu_available

<function tensorflow.python.framework.test_util.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None)>