In [1]:
import pandas as pd
import numpy as np
import sys, os
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12, 6)

import warnings
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
load_dotenv()

PROJECT_PATH = os.getenv('PROJECT_DIR')
sys.path.append(PROJECT_PATH)

In [2]:
from src.data_ingestion.iex_data import IexDataFetcher
from src.get_apis.get_forecast import IexForecast
from src.feature_engineering.build_features import FeatureEngineering
from src.utils import *
from config.paths import *

In [3]:
iex_data = IexDataFetcher()
iex_forecast = IexForecast()
featured_data = FeatureEngineering(PROJECT_PATH)

In [4]:
market_type = 'dam'

In [5]:
actual = iex_data._get_processed_data('dam')[['datetime', 'mcp_dam']]

dam data is already updated up to:  2024-01-18 23:45:00


In [6]:
acc_report = load_pickle(REPORTS_PATH, f'{market_type}_accuracy_report')
acc_start_date = (datetime.strptime(acc_report['Date'].iloc[-1], '%d-%m-%Y')\
                   + timedelta(days=1)).strftime('%d-%m-%Y')

In [8]:
try:
    sdt = acc_start_date
    tdt = (datetime.now() + timedelta(days=30)).strftime('%d-%m-%Y')
    forecast = iex_forecast._get_processed_forecast(sdt, tdt, market_type)
except:
    print(f'{market_type} accuracy report already updated.')

KeyError: "None of [Index(['date', 'time_block', 'price', 'label'], dtype='object')] are in the [columns]"

In [19]:
df = featured_data.merge_dataframes([forecast, actual])

In [22]:
unique_dates = df['datetime'].dt.date.unique()
acc_data = []

In [23]:
night_hours = [[x for x in range(0, 6*4)] + [y for y in range(23*4, 24*4)]]
morning_hours = [[x for x in range(6*4, 10*4)]]
day_hours = [[x for x in range(10*4, 17*4)]]
evening_hours = [[x for x in range(17*4, 23*4)]]

hours = morning_hours + day_hours + evening_hours + night_hours

In [24]:
for curr_date in unique_dates:
    curr_date_str = curr_date.strftime("%d-%m-%Y")
    datas = [curr_date.strftime("%d-%m-%Y")]

    filtered_df = df[df['datetime'].dt.date == curr_date]
    filtered_df = filtered_df.reset_index(drop=True)

    MAEs = []
    for x in hours:
        mae = np.round(np.abs(filtered_df.loc[x]['forecast'] - filtered_df.loc[x]['mcp_dam']).mean(), 2)
        MAEs.append(mae)

    # Total MAE for the day
    day_MAE = np.round(np.abs(filtered_df['forecast'] - filtered_df['mcp_dam']).mean(), 2)

    # MAPE of the forecast
    MAPE = np.round((np.abs(filtered_df['forecast'] - filtered_df['mcp_dam']) / filtered_df['mcp_dam']).mean() * 100, 2)

    datas.extend([day_MAE] + MAEs + [MAPE])
    acc_data.append(datas)

In [32]:
curr_acc = pd.DataFrame(data = acc_data, columns = ['Date','MAE','Morning_MAE','Day_MAE',
                                                   'Evening_MAE','Night_MAE','MAPE'])
curr_acc

Unnamed: 0,Date,MAE,Morning_MAE,Day_MAE,Evening_MAE,Night_MAE,MAPE
0,17-01-2024,326.86,256.74,504.26,276.09,233.03,7.1
1,18-01-2024,327.15,158.99,598.2,102.2,345.01,7.16


In [34]:
acc = pd.concat([acc_report, curr_acc], ignore_index = True)

In [37]:
save_pickle(acc, REPORTS_PATH, f'{market_type}_accuracy_report')

In [None]:
print(f'Accuracy report for {market_type} generated.')