In [74]:
import mlflow

import pandas as pd


from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential

In [75]:
wind_farm_df = pd.read_csv("./datasets/windfarm_data.csv", index_col=0)

In [76]:
wind_farm_df.index

Index(['2014-01-01', '2014-01-02', '2014-01-03', '2014-01-04', '2014-01-05',
       '2014-01-06', '2014-01-07', '2014-01-08', '2014-01-09', '2014-01-10',
       ...
       '2020-12-22', '2020-12-23', '2020-12-24', '2020-12-25', '2020-12-26',
       '2020-12-27', '2020-12-28', '2020-12-29', '2020-12-30', '2020-12-31'],
      dtype='object', length=2555)

In [77]:
wind_farm_df.tail()

Unnamed: 0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16,power
2020-12-27,7.123225,103.17663,8.133746,6.454002,107.79322,6.326991,7.219884,119.070526,3.062219,2621.476
2020-12-28,5.37627,118.08433,5.558247,8.118839,116.193535,8.565966,9.307176,120.26443,11.993913,5423.625
2020-12-29,8.593436,115.43259,12.18185,8.587968,112.93136,11.970859,8.956771,110.161095,11.301485,9132.115
2020-12-30,8.069033,103.169685,9.983466,7.930485,106.04551,6.381556,8.228901,111.60216,4.087358,3667.9927
2020-12-31,6.425237,115.71369,3.755484,7.935767,109.33803,5.12676,11.186373,105.87311,5.630923,858.9223


In [78]:
wind_farm_df.describe()


Unnamed: 0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16,power
count,2555.0,2555.0,2555.0,2555.0,2555.0,2555.0,2555.0,2555.0,2555.0,2555.0
mean,9.613588,198.381353,5.001413,12.827786,191.538394,5.136628,13.991459,202.051406,6.072578,2424.97888
std,5.187017,81.767411,2.940271,6.536972,83.572022,3.016934,6.862011,88.747683,2.585946,2035.838491
min,-6.172769,14.527682,0.232208,-3.989666,14.087781,0.51922,-4.087519,5.659133,0.561177,207.06557
25%,6.228526,111.309685,2.863503,7.932104,108.053078,2.897562,8.576865,111.450595,4.299781,947.10043
50%,8.688278,214.46178,4.265017,11.178736,174.02873,4.356376,12.392289,245.21094,5.785738,1674.4309
75%,13.045183,276.89433,6.263791,17.898135,281.36138,6.745475,19.687266,275.93715,7.550025,3274.85645
max,27.039269,352.76413,19.162184,30.118828,351.4283,23.394577,32.521378,349.5891,18.859266,10774.853


In [72]:
wind_farm_df["2015-01-01":"2018-01-01"].tail()

Unnamed: 0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16,power
2017-12-28,6.505444,21.090723,1.728905,7.756409,49.63617,1.844953,8.003177,141.53914,1.931224,222.44484
2017-12-29,6.150245,247.73648,8.716457,2.962454,275.8125,10.803079,2.458707,283.33875,9.119889,6943.7446
2017-12-30,1.80431,264.92587,8.026327,3.297666,277.99554,4.768021,2.136792,276.17825,2.080063,2222.4302
2017-12-31,0.295758,108.385796,4.24527,2.566788,99.84273,8.125836,4.871737,101.659065,8.688424,4079.3616
2018-01-01,4.290002,99.38345,11.876492,5.60676,99.889145,11.322479,6.574509,106.04957,9.184392,8226.334


In [79]:
def get_training_data():
    training_data = wind_farm_df["2014-01-01":"2017-12-31"]
    X = training_data.drop(columns="power")
    y = training_data["power"]
    return X, y

In [80]:
def get_validation_data():
    training_data = wind_farm_df["2018-01-01":"2019-12-31"]
    X = training_data.drop(columns="power")
    y = training_data["power"]
    return X, y

In [81]:
def get_weather_and_forecast(weather_date):
    format_date = lambda pd_date: pd_date.date().strftime("%Y-%m-%d")
    today = pd.Timestamp(weather_date).normalize()
    week_ago = today - pd.Timedelta(days=5)
    week_later = today + pd.Timedelta(days=5)
    
    past_power_output = pd.DataFrame(wind_farm_df)[format_date(week_ago):format_date(week_later)]
    weather_and_forecast = pd.DataFrame(wind_farm_df)[format_date(week_ago):format_date(week_later)]
    
    if len(weather_and_forecast) < 10:
        past_power_output = pd.DataFrame(wind_farm_df).iloc[-10:5]
        weather_and_forecast = pd.DataFrame(wind_farm_df).iloc[-10:1]
        
    return weather_and_forecast.drop(columns="power"), past_power_output["power"]


In [83]:
# Testers

# pd.Timestamp('2020-1-1').normalize()
# w1, p1 = get_weather_and_forecast('2020-1-1')
# w1
# p1
X_train, y_train = get_training_data()

In [84]:
# Train Keras model

def train_keras_model():
    model = Sequential()
    model.add(Dense(100, input_shape=(X_train.shape[-1],), activation='relu', name="hidden_layer"))
    model.add(Dense(1))
    model.compile(loss="mse", optimizer="adam")
    model.fit(X_train, y_train, epochs=25, batch_size=64, validation_split=0.2)
    return model

In [86]:
import mlflow
import mlflow.keras
import mlflow.tensorflow

X_train, y_train = get_training_data()

with mlflow.start_run():
    #Automatically capture model's parameters, metrics, artifacts,
    # and source code with the autolog() function
    
    mlflow.tensorflow.autolog()
    
    train_keras_model()
    
    run_id = mlflow.active_run().info.run_id
    
    



Epoch 1/25




Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: C:\Users\coool\AppData\Local\Temp\tmpztvr1x7u\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\coool\AppData\Local\Temp\tmpztvr1x7u\model\data\model\assets


In [87]:
model_name = "power-forecasting-model"

In [89]:
run_id

'014acc5e254448feb9fe14784c5fae6d'