In [15]:
import os
import pandas as pd
import datetime as dt
import time
import calendar
import numpy as np
from statistics import mean

from uce_resources import get_site_id, get_mms_data, get_applied_forecast, get_prices, get_green_tariff
from uce_resources import make_results

In [16]:
from settings.sites import ceg as sites_list

target_year = 2022
target_month = 1
forecasts_types = ['real', 'naive', 'zero']

target_folder = 'data/results/{}-{:0>2}/'.format(target_year, target_month)
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

sites_list = ['Myroliubivka', 'Kyselivka', 'Poniativka']
sites_data = dict.fromkeys(sites_list)
print(sites_data)

{'Myroliubivka': None, 'Kyselivka': None, 'Poniativka': None}


# Data preparation section

In [17]:
from sqlalchemy import create_engine, MetaData
from sqlalchemy.pool import NullPool
from settings.db import DO_URL

engine = create_engine(DO_URL, poolclass=NullPool)
metadata = MetaData()
metadata.reflect(bind=engine)

  import sys


In [18]:
with engine.connect() as connection:
    prices = get_prices(target_year, target_month, connection, metadata.tables['electricity_market_prices'], currency='UAH')

min_price_day = prices.index.day.min()
max_price_day = prices.index.day.max()

prices.to_excel(target_folder + 'prices_{}_{}_{}-{}.xlsx'.format(target_year, target_month, min_price_day, max_price_day))
prices

Unnamed: 0,dam,imsp,positive_unbalance,negative_unbalance
2021-12-31 22:30:00,1.70000,0.09074,0.08620,1.78500
2021-12-31 23:30:00,1.04996,0.00001,0.00001,1.10246
2022-01-01 00:30:00,1.04996,0.00001,0.00001,1.10246
2022-01-01 01:30:00,1.04999,0.00001,0.00001,1.10249
2022-01-01 02:30:00,1.04996,0.00001,0.00001,1.10246
...,...,...,...,...
2022-01-31 17:30:00,2.99500,0.00001,0.00001,3.14475
2022-01-31 18:30:00,3.04000,0.00001,0.00001,3.19200
2022-01-31 19:30:00,2.97000,0.00001,0.00001,3.11850
2022-01-31 20:30:00,2.60000,0.00001,0.00001,2.73000


In [19]:

with engine.connect() as connection:
        
    for site in sites_data.keys():
        start = time.time()
        print('-'*50)
        print(site)
        site_data = dict()
        site_data['site'] = site
        site_data['site_id'], site_data['legal_entity'] = get_site_id(site, connection, 
                                                                      metadata.tables['sites'],
                                                                      include_legal_entity_id=True)

        site_data['green_tariff'] = get_green_tariff(site_data['site_id'], dt.date(year=target_year, month=target_month, day=1),
                                                     connection, metadata.tables['green_tariffs'], currency='UAH')
        print('Green tariff: {}'.format(site_data['green_tariff']))
        
        mms_data, site_data['mms_version'] = get_mms_data(site_data['site_id'], 
                                                          target_year, target_month, 
                                                          connection, metadata.tables['mms_data'], include_prev=True,)
        # print(mms_data)
        print('MMS data | {} version | of | {} records |'.format(site_data['mms_version'], len(mms_data)))
        applied_forecast = get_applied_forecast(site_data['site_id'], target_year, target_month, 
                                                connection=connection, db_table=metadata.tables['forecasts_applied'])
        print('Forecast data of | {} records |'.format(len(applied_forecast)))
        #print(applied_forecast)
        
        site_data['real_forecast_data'] = pd.concat([mms_data, applied_forecast], axis=1, join='inner')
        print('Real forecast data prepared')

        site_data['zero_forecast_data'] = pd.concat([mms_data, applied_forecast * 0], axis=1, join='inner')
        print('Zero forecast data prepared')

        naive_forecast_data = pd.concat([mms_data, mms_data.shift(48)], axis=1, join='inner').dropna(axis=0, how='any')
        naive_forecast_data.columns = ['yield [kWh]', 'forecast [kWh]']
        naive_forecast_data['forecast [kWh]'] = naive_forecast_data['forecast [kWh]'].astype(int)
        site_data['naive_forecast_data'] = naive_forecast_data
        print('Naive forecast data prepared')
        
        sites_data.update({site: site_data})
        end = time.time()

        print('Processing took {} seconds'.format(round(end - start, 2)))

--------------------------------------------------
Myroliubivka
Green tariff: 4.1268
MMS data | v1 version | of | 1488 records |
Forecast data of | 744 records |
Real forecast data prepared
Zero forecast data prepared
Naive forecast data prepared
Processing took 0.37 seconds
--------------------------------------------------
Kyselivka
Green tariff: 4.1268
MMS data | v1 version | of | 1488 records |
Forecast data of | 744 records |
Real forecast data prepared
Zero forecast data prepared
Naive forecast data prepared
Processing took 0.33 seconds
--------------------------------------------------
Poniativka
Green tariff: 4.1268
MMS data | v1 version | of | 1488 records |
Forecast data of | 744 records |
Real forecast data prepared
Zero forecast data prepared
Naive forecast data prepared
Processing took 0.29 seconds


In [39]:
producers = []
for site in sites_data.keys():
    producers.append(sites_data[site]['legal_entity'])
print(producers)
producers_data = dict([(producer, list()) for producer in set(producers)])
print(producers_data)

for site in sites_data.keys():
    leg_ent = sites_data[site]['legal_entity']
    producers_data[leg_ent].append(sites_data[site])
print(producers_data)

# data = prices

# for site in sites_data.keys():
#     site_errors = sites_data[site]['real_forecast_data']['yield [kWh]'] - sites_data[site]['real_forecast_data']['forecast [kWh]']
#     data[site] = site_errors

# # data
# data.to_excel('./data/results/{}-{:0>2}/hourly_results_{}_{}_{}_UAH.xlsx'.format(target_year, target_month, target_year, target_month, '1-31'))

[15, 15, 17]
{17: [], 15: []}
{17: [{'site': 'Poniativka', 'site_id': 4, 'legal_entity': 17, 'green_tariff': 4.1268, 'mms_version': 'v1', 'real_forecast_data':                      yield [kWh]  forecast [kWh]
2021-12-31 22:30:00          -43             -39
2021-12-31 23:30:00          -43             -39
2022-01-01 00:30:00          -42             -39
2022-01-01 01:30:00          -43             -39
2022-01-01 02:30:00          -43             -39
...                          ...             ...
2022-01-31 17:30:00          -44             -39
2022-01-31 18:30:00          -45             -39
2022-01-31 19:30:00          -43             -39
2022-01-31 20:30:00          -45             -39
2022-01-31 21:30:00          -48             -39

[744 rows x 2 columns], 'zero_forecast_data':                      yield [kWh]  forecast [kWh]
2021-12-31 22:30:00          -43               0
2021-12-31 23:30:00          -43               0
2022-01-01 00:30:00          -42               0
2022-01-0

## Unbalance cost estimations

In [40]:
columns = ['legal_entity', 'first_date', 'last_date', 'number_of_values [records]', 'yield_data_version',
            'yielded [kWh]', 'forecast_type', 'forecasted [kWh]', 
            'green_tariff [UAH]', 'revenue [UAH]', 
            'error_u [kWh]', 'error_u [%]',
            'max_energy [kWh]', 'max_forecast [kWh]', 'max_error [kWh]',
            'mean_absolute_error [kWh]', 'median_absolute_error [kWh]', 
            'mean_square_error [kWh]', 'root_mean_square_error [kWh]', 'R^2 score',
            'dropped by alpha_u [records]', 'dropped by alpha_u [%]',
            'error_u (excess) [kWh]', 'error_u (excess) [%]',
            'error_u (shortage) [kWh]', 'error_u (shortage) [%]', 
            'cieq_641_rule (excess) [UAH]', 'cieq_641_rule (excess) [%]',
            'cieq_641_rule (shortage) [UAH]', 'cieq_641_rule (shortage) [%]',
            'cieq_641_rule (net) [UAH]', 'cieq_641_rule (net) [%]', 
            'imsp_avg_641_rule [UAH/MWh]',
            'cieq_641_rule* [UAH]', 'cieq_641_rule* [%]', 
            'imsp_avg_641_rule* [UAH/MWh]']

### Daily results

In [41]:
daily_indexes = list()

for day in range(1, calendar.monthrange(target_year, target_month)[-1] + 1):
    start = dt.datetime(year=target_year, month=target_month, day=day, hour=0, minute=30)
    end = dt.datetime(year=target_year, month=target_month, day=day, hour=23, minute=30)
    index_in_kyiv = pd.date_range(start=start, end=end, freq='1H', tz='europe/kiev')
    index_in_utc = index_in_kyiv.tz_convert('utc').tz_localize(None)
    daily_indexes.append(index_in_utc)

print(len(daily_indexes))

31


In [42]:
print(daily_indexes[0])

DatetimeIndex(['2021-12-31 22:30:00', '2021-12-31 23:30:00',
               '2022-01-01 00:30:00', '2022-01-01 01:30:00',
               '2022-01-01 02:30:00', '2022-01-01 03:30:00',
               '2022-01-01 04:30:00', '2022-01-01 05:30:00',
               '2022-01-01 06:30:00', '2022-01-01 07:30:00',
               '2022-01-01 08:30:00', '2022-01-01 09:30:00',
               '2022-01-01 10:30:00', '2022-01-01 11:30:00',
               '2022-01-01 12:30:00', '2022-01-01 13:30:00',
               '2022-01-01 14:30:00', '2022-01-01 15:30:00',
               '2022-01-01 16:30:00', '2022-01-01 17:30:00',
               '2022-01-01 18:30:00', '2022-01-01 19:30:00',
               '2022-01-01 20:30:00', '2022-01-01 21:30:00'],
              dtype='datetime64[ns]', freq=None)


In [None]:
from sklearn.metrics import max_error, mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
from math import sqrt

def make_results_by_producer(site_data, forecast_type, prices, index):
    #print(site_data)
    if forecast_type == 'real':
        data = site_data['real_forecast_data'].loc[site_data['real_forecast_data'].index.intersection(index)]
    elif forecast_type == 'naive':
        data = site_data['naive_forecast_data'].loc[site_data['naive_forecast_data'].index.intersection(index)]
    elif forecast_type == 'zero':
        data = site_data['zero_forecast_data'].loc[site_data['zero_forecast_data'].index.intersection(index)]
    elif forecast_type == '1_dah':
        data = site_data['1_dah_forecast_data'].loc[site_data['1_dah_forecast_data'].index.intersection(index)]
    elif forecast_type == 'pro':
        data = site_data['pro_forecast_data'].loc[site_data['pro_forecast_data'].index.intersection(index)]
    elif forecast_type == 'restored':
        data = site_data['restored_forecast_data'].loc[site_data['restored_forecast_data'].index.intersection(index)]
    else:
        data = site_data[forecast_type].loc[site_data[forecast_type].index.intersection(index)]

    data = pd.concat([prices, data], axis=1, join='inner')

    if len(data.index) == 0:
        return None

    if len(data.index) != 24:
        print('{} index len is {}'.format(data.index.max().strftime('%Y-%m-%d'), len(data.index)))

    
        
    with np.errstate(divide='ignore'):
        green_tariff = site_data['green_tariff'][data.index.max().month] if type(site_data['green_tariff']) == type(dict()) else site_data['green_tariff']
        data['revenue [UAH]'] = data['yield [kWh]'] * green_tariff

        data['error_u [kWh]'] = data['yield [kWh]'] - data['forecast [kWh]']
        data['error_u [%]'] = data['error_u [kWh]'] / data['forecast [kWh]'] * 100

        excess_mask = data['error_u [kWh]'] >= 0
        shortage_mask = data['error_u [kWh]'] < 0

        data['error_u (excess) [kWh]'] = data['error_u [kWh]'] * excess_mask
        data['error_u (shortage) [kWh]'] = data['error_u [kWh]'] * shortage_mask

        data['alfa_u_mask'] = data['error_u [%]'].apply(abs) > 5.0

        data['cieq_641_rule (excess) [UAH]'] = data['error_u (excess) [kWh]'] * data['alfa_u_mask'] * \
                                        (data['dam'] - data['positive_unbalance']) 

        data['cieq_641_rule (shortage) [UAH]'] = data['error_u (shortage) [kWh]'] * data['alfa_u_mask'] * \
                                        (data['dam'] - data['negative_unbalance'])

        data['cieq_641_rule (net) [UAH]'] = data['cieq_641_rule (excess) [UAH]'] + data['cieq_641_rule (shortage) [UAH]']

        mask_641_1 = (data['imsp'] < data['dam']) & (data['error_u [kWh]'] > 0)
        mask_641_2 = (data['imsp'] > data['dam']) & (data['error_u [kWh]'] < 0)
        mask_641 = mask_641_1 | mask_641_2
        
        data['641_mask'] = mask_641 & data['alfa_u_mask']
        data['641_price'] = data['dam'] - data['imsp']
        data['cieq_641_rule* [UAH]'] = data['error_u [kWh]'] * data['641_price'] * data['641_mask'] * data['alfa_u_mask']

        result = dict()
        
        result['site'] = site_data['site']
        result['legal_entity'] = site_data['legal_entity']
        result['first_date'] = (data.index.min() + dt.timedelta(days=1)).date()
        result['last_date'] = data.index.max().date()
        result['number_of_values [records]'] = len(data.index)

        result['yield_data_version'] = site_data['mms_version']
        result['yielded [kWh]'] = data['yield [kWh]'].sum()

        result['forecast_type'] = forecast_type
        result['forecasted [kWh]'] = data['forecast [kWh]'].sum()
        
        result['green_tariff [UAH]'] = green_tariff
        result['revenue [UAH]'] = data['revenue [UAH]'].sum()

        result['error_u [kWh]'] = data['error_u [kWh]'].apply(abs).sum()
        result['error_u [%]'] = data['error_u [kWh]'].apply(abs).sum() / data['forecast [kWh]'].sum() * 100

        result['max_energy [kWh]'] = data['yield [kWh]'].max()
        result['max_forecast [kWh]'] = data['forecast [kWh]'].max()
        result['max_error [kWh]'] = max_error(data['yield [kWh]'], data['forecast [kWh]'])

        result['mean_absolute_error [kWh]'] = mean_absolute_error(data['yield [kWh]'], data['forecast [kWh]'])
        result['median_absolute_error [kWh]'] = median_absolute_error(data['yield [kWh]'], data['forecast [kWh]'])
        result['mean_square_error [kWh]'] = mean_squared_error(data['yield [kWh]'], data['forecast [kWh]'])
        result['root_mean_square_error [kWh]'] = sqrt(mean_squared_error(data['yield [kWh]'], data['forecast [kWh]']))
        result['R^2 score'] = r2_score(data['yield [kWh]'], data['forecast [kWh]'])

        result['dropped by alpha_u [records]'] = len(data['alfa_u_mask']) - data['alfa_u_mask'].sum()
        result['dropped by alpha_u [%]'] = 100 - data['alfa_u_mask'].sum() / len(data['alfa_u_mask']) * 100

        result['error_u (excess) [kWh]'] = data['error_u (excess) [kWh]'].sum()
        result['error_u (excess) [%]'] = data['error_u (excess) [kWh]'].sum() / data['forecast [kWh]'].sum() * 100
        result['error_u (shortage) [kWh]'] = data['error_u (shortage) [kWh]'].sum()
        result['error_u (shortage) [%]'] = data['error_u (shortage) [kWh]'].sum() / data['forecast [kWh]'].sum() * 100

        result['cieq_641_rule (excess) [UAH]'] = data['cieq_641_rule (excess) [UAH]'].sum()
        result['cieq_641_rule (excess) [%]'] = data['cieq_641_rule (excess) [UAH]'].sum() / \
                                                        data['revenue [UAH]'].sum() * 100
        result['cieq_641_rule (shortage) [UAH]'] = data['cieq_641_rule (shortage) [UAH]'].sum()
        result['cieq_641_rule (shortage) [%]'] = data['cieq_641_rule (shortage) [UAH]'].sum() / \
                                                        data['revenue [UAH]'].sum() * 100
        result['cieq_641_rule (net) [UAH]'] = data['cieq_641_rule (net) [UAH]'].sum()
        result['cieq_641_rule (net) [%]'] = data['cieq_641_rule (net) [UAH]'].sum() / \
                                                        data['revenue [UAH]'].sum() * 100

        result['imsp_avg_641_rule [UAH/MWh]'] = data['cieq_641_rule (net) [UAH]'].sum() / \
                                                        result['yielded [kWh]'] * 1000

        result['cieq_641_rule* [UAH]'] = data['cieq_641_rule* [UAH]'].sum()
        result['cieq_641_rule* [%]'] = data['cieq_641_rule* [UAH]'].sum() / data['revenue [UAH]'].sum() * 100

        result['imsp_avg_641_rule* [UAH/MWh]'] = data['cieq_641_rule* [UAH]'].sum() / \
                                                        result['yielded [kWh]'] * 1000

    #print(result)

    return pd.Series(result)

In [None]:
results_real = pd.DataFrame(columns=columns)
results_naive = pd.DataFrame(columns=columns)
results_zero = pd.DataFrame(columns=columns)




for legal_entity in producers_data.keys():
    
    
    for index in daily_indexes:
        # print(sites_data[site]['real_forecast_data'])
        result_real = make_results_by_producer(producers_data[legal_entity], 'real', prices, index)
        #print(result_real)

        result_naive = make_results_by_producer(producers_data[legal_entity], 'naive', prices, index)      
        #print(result_naive)

        result_zero = make_results_by_producer(producers_data[legal_entity], 'zero', prices, index)      
        #print(result_zero)

        if not result_real is None:
            results_real = results_real.append(result_real, ignore_index=True)

        if not result_naive is None:
            results_naive = results_naive.append(result_naive, ignore_index=True)
        
        if not result_zero is None:
            results_zero = results_zero.append(result_zero, ignore_index=True)

    producers_data[legal_entity]['results_real'] = results_real
    producers_data[legal_entity]['results_naive'] = results_naive
    producers_data[legal_entity]['results_zero'] = results_zero

    print(f'{legal_entity} - Results daily: Ok!')

In [56]:
# sites_data['Pohrebyshche']['results_real']

In [57]:
from uce_resources import save_results, format_excel

results_daily = pd.concat([results_real, results_naive, results_zero], axis=0)

min_day = results_daily.first_date.min().day
max_day = results_daily.last_date.max().day

with pd.ExcelWriter(target_folder + 'uce_daily_{}_{}_{}-{}_UAH.xlsx'.format(target_year, target_month, min_day, max_day), engine="openpyxl") as  writer:
    results_daily.to_excel(writer, 'results_daily')

#  writer.save()
# format_excel(writer, results_daily).save()

print('Saving results: ok!')


Saving results: ok!
