# Time Series Error Computation
---
The forecast values will then be grouped per year (2018 to 2020). For every group, the monthly wind speed values will be averaged and the error metrics will be computed for the actual wind velocity (test observations) against every model forecast value (EMA, Prophet, and LSTM).

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from statsmodels.tools.eval_measures import rmse

In [2]:
df = pd.read_pickle("results/forecast.pkl")
df

Unnamed: 0,date,windvelo,lat,lon,ema_1,ema_5,ema_6,ema_7,prophet,lstm_relu,lstm_tanh
0,2018-01-01,11.304111,21.5,116.5,13.553826,14.661283,14.489713,14.233762,12.234389,9.165398,10.300563
1,2018-02-01,9.636217,21.5,116.5,11.814079,14.600123,14.024206,13.372910,9.975370,8.059443,8.727214
2,2018-03-01,6.710312,21.5,116.5,10.146185,13.735596,12.837268,11.947128,8.762326,6.161771,6.410487
3,2018-04-01,5.071315,21.5,116.5,7.220281,11.840380,10.606949,9.471261,6.164257,5.155528,5.406830
4,2018-05-01,1.462513,21.5,116.5,5.581284,10.073274,8.731424,7.581203,4.306958,3.325229,3.861131
...,...,...,...,...,...,...,...,...,...,...,...
115015,2020-08-01,4.136054,4.0,127.5,2.875014,2.033132,1.993423,2.089708,7.772307,3.707713,3.995267
115016,2020-09-01,1.958800,4.0,127.5,4.980206,3.122142,3.395299,3.755790,7.896182,2.928135,3.057908
115017,2020-10-01,3.775196,4.0,127.5,2.802953,2.578020,2.649698,2.731537,6.439601,3.569187,3.820741
115018,2020-11-01,0.734279,4.0,127.5,4.619349,3.214157,3.441295,3.695738,5.726661,2.548251,2.649334


In [3]:
def get_rmse(df, lat, lon, test_feat, forecast_feat, expected_len = 12):
    range_df = df[(df["lat"] == lat) & (df["lon"] == lon)].reset_index(drop=True)
    assert len(range_df) == expected_len

    # print("Computing RMSE value for grid (%.02f, %.02f)..." % (lat, lon))
    return rmse(range_df[test_feat], range_df[forecast_feat])

In [4]:
features = df.columns.delete(0).tolist()
features

['windvelo',
 'lat',
 'lon',
 'ema_1',
 'ema_5',
 'ema_6',
 'ema_7',
 'prophet',
 'lstm_relu',
 'lstm_tanh']

In [5]:
models = ["ema_1", "ema_7", "ema_6", "ema_5", "prophet", "lstm_relu", "lstm_tanh"]
models

['ema_1', 'ema_7', 'ema_6', 'ema_5', 'prophet', 'lstm_relu', 'lstm_tanh']

In [6]:
def get_annual_errors(df, start_date, end_date):
    annual_df = df[(df["date"] >= start_date) & (df["date"] <= end_date)].reset_index(drop=True)

    error_df = annual_df.groupby(["lat", "lon"])[features] \
        .agg(lat=("lat", max), \
             lon=("lon", max), \
             mean_wv=("windvelo", np.mean), \
             ema_1=("ema_1", np.mean), \
             ema_7=("ema_7", np.mean), \
             ema_6=("ema_6", np.mean), \
             ema_5=("ema_5", np.mean), \
             prophet=("prophet", np.mean), \
             lstm_relu=("lstm_relu", np.mean), \
             lstm_tanh=("lstm_tanh", np.mean), \
            ).reset_index(drop=True)

    for i in range(len(models)):
        time_now = datetime.now().strftime("%H:%M:%S")
        print("Start computing RMSE for model: %s at %s" % (models[i], time_now))

        col_name = models[i] + "_rmse"
        error_df[col_name] = error_df.apply(lambda x : \
                                            get_rmse(annual_df, x["lat"], x["lon"], \
                                                     "windvelo", models[i]), \
                                            axis=1)

    return error_df

In [7]:
mean18_df = get_annual_errors(df, "2018-01-01", "2018-12-01")
mean18_df

Start computing RMSE for model: ema_1 at 21:52:26
Start computing RMSE for model: ema_7 at 21:52:30
Start computing RMSE for model: ema_6 at 21:52:34
Start computing RMSE for model: ema_5 at 21:52:38
Start computing RMSE for model: prophet at 21:52:44
Start computing RMSE for model: lstm_relu at 21:52:49
Start computing RMSE for model: lstm_tanh at 21:52:54


Unnamed: 0,lat,lon,mean_wv,ema_1,ema_7,ema_6,ema_5,prophet,lstm_relu,lstm_tanh,ema_1_rmse,ema_7_rmse,ema_6_rmse,ema_5_rmse,prophet_rmse,lstm_relu_rmse,lstm_tanh_rmse
0,4.0,116.50,0.306902,0.337205,0.297053,0.292278,0.291639,0.261509,0.308845,0.298886,0.142744,0.121445,0.118219,0.116082,0.147371,0.078792,0.064583
1,4.0,116.75,0.225761,0.294614,0.244150,0.241025,0.244401,0.305867,0.248911,0.276260,0.129573,0.097475,0.093383,0.091073,0.175335,0.065290,0.074613
2,4.0,117.00,0.149293,0.070802,0.032588,0.031210,0.034703,0.097685,0.189964,0.219499,0.125499,0.144262,0.143186,0.138281,0.169810,0.064833,0.082274
3,4.0,117.25,0.267973,0.204062,0.161489,0.154646,0.149360,0.023566,0.330939,0.367143,0.191879,0.204828,0.206358,0.206991,0.331543,0.102553,0.117859
4,4.0,117.50,0.490272,0.429090,0.425119,0.422396,0.415148,0.119225,0.494287,0.599448,0.293688,0.294739,0.294695,0.295305,0.488479,0.159267,0.146760
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3190,21.5,126.50,5.233596,8.957308,9.631750,10.056722,10.555557,9.884035,5.143314,5.427236,4.200920,4.838985,5.263514,5.768262,4.876850,1.333530,1.229269
3191,21.5,126.75,5.231685,8.863185,9.564456,9.996924,10.502762,9.945190,5.318267,5.421373,4.125803,4.785290,5.215195,5.724980,4.953148,1.181654,1.198777
3192,21.5,127.00,5.215198,8.706625,9.431767,9.869738,10.380357,9.136401,5.226594,5.406188,4.003149,4.681993,5.115381,5.628020,4.124485,1.053296,1.294639
3193,21.5,127.25,5.221320,8.615129,9.360838,9.803998,10.319446,10.248816,5.002651,5.440711,3.922080,4.617435,5.054292,5.570144,5.321018,1.325892,1.283429


In [8]:
mean19_df = get_annual_errors(df, "2019-01-01", "2019-12-01")
mean19_df

Start computing RMSE for model: ema_1 at 21:53:00
Start computing RMSE for model: ema_7 at 21:53:05
Start computing RMSE for model: ema_6 at 21:53:09
Start computing RMSE for model: ema_5 at 21:53:15
Start computing RMSE for model: prophet at 21:53:19
Start computing RMSE for model: lstm_relu at 21:53:24
Start computing RMSE for model: lstm_tanh at 21:53:31


Unnamed: 0,lat,lon,mean_wv,ema_1,ema_7,ema_6,ema_5,prophet,lstm_relu,lstm_tanh,ema_1_rmse,ema_7_rmse,ema_6_rmse,ema_5_rmse,prophet_rmse,lstm_relu_rmse,lstm_tanh_rmse
0,4.0,116.50,0.449304,0.469612,0.425067,0.417529,0.413504,0.180918,0.357716,0.347851,0.307406,0.291313,0.288937,0.286400,0.334327,0.172460,0.188478
1,4.0,116.75,0.421279,0.476997,0.417460,0.409703,0.407400,0.217092,0.323795,0.347149,0.332209,0.311521,0.309312,0.307234,0.300479,0.166391,0.173708
2,4.0,117.00,0.285283,0.219209,0.172101,0.166333,0.164559,-0.089234,0.236493,0.277736,0.277525,0.285174,0.287309,0.287853,0.407132,0.146252,0.121076
3,4.0,117.25,0.428141,0.387297,0.342672,0.333849,0.325830,-0.196728,0.424394,0.454677,0.340652,0.369724,0.379111,0.387433,0.638236,0.144675,0.157996
4,4.0,117.50,0.786598,0.747105,0.744115,0.739530,0.728667,-0.097206,0.621816,0.780323,0.498262,0.542361,0.556290,0.568616,0.905797,0.327753,0.197693
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3190,21.5,126.50,4.963604,8.346777,8.923112,9.312481,9.771228,12.418488,5.024556,5.304689,4.196946,4.726333,5.080633,5.502618,7.525222,1.414376,1.290792
3191,21.5,126.75,4.939498,8.233854,8.836100,9.232893,9.698695,12.892178,5.158978,5.283787,4.099688,4.652709,5.015300,5.445734,8.019237,1.284614,1.230763
3192,21.5,127.00,4.907763,8.070768,8.697809,9.100560,9.571601,10.823226,5.031718,5.269738,3.962341,4.534543,4.902945,5.339061,5.979299,1.143271,1.295626
3193,21.5,127.25,4.904783,7.979931,8.629376,9.038074,9.514674,14.446861,4.850879,5.289861,3.859550,4.449833,4.824606,5.267201,9.613951,1.364730,1.267356


In [9]:
mean20_df = get_annual_errors(df, "2020-01-01", "2020-12-01")
mean20_df

Start computing RMSE for model: ema_1 at 21:53:35
Start computing RMSE for model: ema_7 at 21:53:39
Start computing RMSE for model: ema_6 at 21:53:43
Start computing RMSE for model: ema_5 at 21:53:47
Start computing RMSE for model: prophet at 21:53:52
Start computing RMSE for model: lstm_relu at 21:53:56
Start computing RMSE for model: lstm_tanh at 21:54:00


Unnamed: 0,lat,lon,mean_wv,ema_1,ema_7,ema_6,ema_5,prophet,lstm_relu,lstm_tanh,ema_1_rmse,ema_7_rmse,ema_6_rmse,ema_5_rmse,prophet_rmse,lstm_relu_rmse,lstm_tanh_rmse
0,4.0,116.50,0.392989,0.431589,0.393192,0.390120,0.392482,0.153115,0.338161,0.328260,0.177231,0.177910,0.181327,0.185387,0.283120,0.156517,0.144023
1,4.0,116.75,0.355647,0.420116,0.372364,0.371072,0.377425,0.150749,0.296555,0.324055,0.218769,0.207727,0.208754,0.211018,0.267342,0.155698,0.144848
2,4.0,117.00,0.253270,0.179003,0.142696,0.142281,0.147393,-0.293783,0.231170,0.268403,0.285317,0.278916,0.276046,0.271883,0.568371,0.151324,0.131778
3,4.0,117.25,0.390723,0.364039,0.321736,0.315616,0.312314,-0.446248,0.443085,0.431130,0.484260,0.467592,0.466712,0.466720,0.867233,0.131870,0.218730
4,4.0,117.50,0.627492,0.636333,0.634050,0.634124,0.632639,-0.355373,0.585952,0.683932,0.608274,0.592892,0.593494,0.594771,1.028904,0.292271,0.251262
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3190,21.5,126.50,5.878376,9.405977,9.995934,10.379839,10.826225,14.317867,5.441783,5.744386,4.351799,4.848076,5.182367,5.578305,8.559234,1.334650,1.068005
3191,21.5,126.75,5.833707,9.287936,9.905585,10.297663,10.752025,15.321965,5.621795,5.718670,4.293032,4.800049,5.138582,5.539570,9.601213,1.161278,0.998696
3192,21.5,127.00,5.772648,9.098246,9.742592,10.141819,10.603201,11.789134,5.528245,5.651331,4.183910,4.700716,5.042482,5.447522,6.181629,1.058241,1.064612
3193,21.5,127.25,5.735804,8.966967,9.636034,10.042747,10.512091,18.258944,5.244623,5.673315,4.104154,4.630772,4.977208,5.388326,12.632497,1.321556,1.055639


In [13]:
types_dct = {
    "ema_1_rmse": "float32",
    "ema_7_rmse": "float32",
    "ema_6_rmse": "float32",
    "ema_5_rmse": "float32",
    "prophet_rmse": "float32",
    "lstm_relu_rmse": "float32",
    "lstm_tanh_rmse": "float32"
}

In [14]:
mean18_df = mean18_df.astype(types_dct)
mean18_df.to_pickle("results/mean2018.pkl")

mean19_df = mean19_df.astype(types_dct)
mean19_df.to_pickle("results/mean2019.pkl")

mean20_df = mean20_df.astype(types_dct)
mean20_df.to_pickle("results/mean2020.pkl")