In [1]:
from sqlalchemy.engine import Engine, Connection
from sqlalchemy import create_engine
from os import getenv
from typing import List, Dict, Optional
import pandas as pd
from pandas import DataFrame, Series, Timestamp
import matplotlib.pyplot as plt
from datetime import datetime
import datetime as dt
import numpy as np
from numpy import ndarray
import pytz
import plotly.graph_objects as go
from plotly.graph_objects import Figure
import matplotlib.pyplot as plt

In [2]:
import sys
sys.path.insert(0, '..')

In [3]:
from inference import mape
from inference import utils as infutils
from inference import evaluate

In [4]:
try:
    from dotenv import load_dotenv

    load_dotenv()
except:
    print('No ".env" file or python-dotenv not installed... Using default env variables...')

In [5]:
def db_connection() -> Engine:
    dbname: Optional[str] = getenv('POSTGRES_DB_NAME')
    host: Optional[str] = getenv('POSTGRES_HOST')
    user: Optional[str] = getenv('POSTGRES_USERNAME')
    password: Optional[str] = getenv('POSTGRES_PASSWORD')
    port: Optional[str] = getenv('POSTGRES_PORT')
        
    postgres_str: str = f'postgresql://{user}:{password}@{host}:{port}/{dbname}'
    
    engine: Engine = create_engine(postgres_str)
    
    return engine

In [6]:
engine: Engine = db_connection()

In [7]:
def extract_mape_table(engine: Engine) -> DataFrame:
    df_mape: DataFrame = pd.read_sql_query("SELECT * FROM tft_testset_mape", con=engine)
    
    return df_mape

In [8]:
df_mape = extract_mape_table(engine)

# FILTER OUT summer and winter times

In [23]:
mape_summer: DataFrame = df_mape[df_mape['timestamp_utc'].dt.month.isin([6,7,8,9])]
mape_winter: DataFrame = df_mape[df_mape['timestamp_utc'].dt.month.isin([11,12,1,2])]

In [24]:
mape_summer.mean().mean(), mape_winter.mean().mean()


DataFrame.mean and DataFrame.median with numeric_only=None will include datetime64 and datetime64tz columns in a future version.



(45.53040372057947, 40.653633567060716)

In [25]:
mape_summer.head()

Unnamed: 0,timestamp_utc,identifier,mape_t1,mape_t2,mape_t3,mape_t4,mape_t5,mape_t6,mape_t7,mape_t8,mape_t9,mape_t10,mape_t11,mape_t12
0,2020-08-13 22:00:00,UP_MPNTLCDMRN_1,34.372128,45.581141,49.153277,50.261956,50.9146,51.202818,51.303669,51.136292,50.803383,50.841996,50.86281,50.915652
1,2020-08-13 23:00:00,UP_MPNTLCDMRN_1,33.998339,45.195572,49.106699,50.215994,50.953772,51.230465,51.281337,50.89372,50.829175,50.782566,50.913302,50.948672
2,2020-08-14 00:00:00,UP_MPNTLCDMRN_1,33.927591,45.34941,49.193389,50.33268,51.030266,51.176166,51.018184,50.905192,50.777765,50.836732,50.944066,51.101109
3,2020-08-14 01:00:00,UP_MPNTLCDMRN_1,33.933033,45.60628,49.330672,50.474501,50.939137,50.887639,51.018181,50.859611,50.838224,50.853943,51.120232,51.148005
4,2020-08-14 02:00:00,UP_MPNTLCDMRN_1,33.913127,45.685038,49.402151,50.408027,50.659682,50.893474,50.970825,50.918801,50.855928,51.03429,51.179889,51.366335


In [26]:
def boxplotter(df: DataFrame) -> Figure:
    """plots boxplot of rolling mape across prediction horizons
    :param : df : (DataFrame) dataframe with mape per horizon in the columns (assumes time and identifier as first two columns)"""
    fig: Figure = go.Figure()
    # loop over horizons
    for hz in df.columns[2:]:
        horizon: int = int(hz.split('t')[-1])
        fig.add_trace(go.Box(y=df[hz], name=f'{str(horizon)}h'))

    fig.update_layout(width=1000, height=500, title="Mape Comparison across prediction horizons",
                      title_x=0.5, xaxis_title="Horizons", yaxis_title=f'rolling mape(%)',
                      legend_title="Models")

    return fig

PLOT MAPE SUMMER

In [27]:
fig: Figure = boxplotter(mape_summer)
fig.show()
print("Average aggregate rolling mape form summer months is ", mape_summer.mean().mean())

Average aggregate rolling mape form summer months is  45.53040372057947



DataFrame.mean and DataFrame.median with numeric_only=None will include datetime64 and datetime64tz columns in a future version.



PLOT MAPE WINTER

In [28]:
fig: Figure = boxplotter(mape_winter)
fig.show()
print("Average aggregate rolling mape form winter months is ", mape_winter.mean().mean())

Average aggregate rolling mape form winter months is  40.653633567060716



DataFrame.mean and DataFrame.median with numeric_only=None will include datetime64 and datetime64tz columns in a future version.

