In [39]:
import os
import pandas as pd
import datetime as dt
import time, pytz
import calendar
import numpy as np
from statistics import mean

from uce_resources import get_site_id, get_mms_data, get_applied_forecast, get_prices, get_green_tariff
from uce_resources import make_results

In [2]:
from settings.sites import ceg as sites_list

target_year = 2022
target_month = 1
forecasts_types = ['real', 'naive', 'zero']

target_folder = 'data/results/{}-{:0>2}/'.format(target_year, target_month)
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

sites_list = ['Myroliubivka']
sites_data = dict.fromkeys(sites_list)
print(sites_data)

{'Myroliubivka': None}


# Data preparation section

In [3]:
from sqlalchemy import create_engine, MetaData
from sqlalchemy.pool import NullPool
from settings.db import DO_URL

engine = create_engine(DO_URL, poolclass=NullPool)
metadata = MetaData()
metadata.reflect(bind=engine)

  metadata.reflect(bind=engine)


In [4]:
with engine.connect() as connection:
    prices = get_prices(target_year, target_month, connection, metadata.tables['electricity_market_prices'], currency='UAH')

min_price_day = prices.index.day.min()
max_price_day = prices.index.day.max()

prices.to_excel(target_folder + 'prices_{}_{}_{}-{}.xlsx'.format(target_year, target_month, min_price_day, max_price_day))
prices

Unnamed: 0,dam,imsp,positive_unbalance,negative_unbalance
2021-12-31 22:30:00,1.70000,0.09074,0.08620,1.78500
2021-12-31 23:30:00,1.04996,0.00001,0.00001,1.10246
2022-01-01 00:30:00,1.04996,0.00001,0.00001,1.10246
2022-01-01 01:30:00,1.04999,0.00001,0.00001,1.10249
2022-01-01 02:30:00,1.04996,0.00001,0.00001,1.10246
...,...,...,...,...
2022-01-31 17:30:00,2.99500,0.00001,0.00001,3.14475
2022-01-31 18:30:00,3.04000,0.00001,0.00001,3.19200
2022-01-31 19:30:00,2.97000,0.00001,0.00001,3.11850
2022-01-31 20:30:00,2.60000,0.00001,0.00001,2.73000


In [54]:
limitations_file = './data/limitations/limitations.xlsx'

limitation_data = pd.read_excel(
    limitations_file, 
    sheet_name='{}_{:02d}'.format(target_year, target_month),
    parse_dates=False
    )

limitation_data = limitation_data.fillna(0)
limitation_data.iloc[:, 2:] = limitation_data.iloc[:, 2:].astype(int)
limitation_data['True_Time'] = (limitation_data['Time'] - 1).apply(lambda x: '{:02d}'.format(x))
limitation_data.iloc[:, :3] = limitation_data.iloc[:, :3].astype(str)

index = pd.to_datetime(limitation_data['Date'] + ' ' + limitation_data['True_Time'], format='%Y-%m-%d %H') + dt.timedelta(minutes=30)
index = pd.DatetimeIndex(data=index)
index = index.tz_localize(pytz.timezone('europe/kiev')).tz_convert('utc').tz_localize(None)
limitation_data.index = index
limitation_data = limitation_data.drop(columns=['Date', 'Time', 'True_Time'])

extended_limitations = pd.DataFrame(index=prices.index, columns=limitation_data.columns, data=0)
extended_limitations.loc[extended_limitations.index.intersection(limitation_data.index)] = limitation_data.loc[limitation_data.index.intersection(extended_limitations.index)]
limitation_data = extended_limitations
print(extended_limitations.head(24))

                    Afanasiivka  Bazaltova  Balivka  Balky  Bar  \
2021-12-31 22:30:00           0          0        0      0    0   
2021-12-31 23:30:00           0          0        0      0    0   
2022-01-01 00:30:00           0          0        0      0    0   
2022-01-01 01:30:00           0          0        0      0    0   
2022-01-01 02:30:00           0          0        0      0    0   
2022-01-01 03:30:00           0          0        0      0    0   
2022-01-01 04:30:00           0          0        0      0    0   
2022-01-01 05:30:00           0          0        0      0    0   
2022-01-01 06:30:00         254        167       13     33   39   
2022-01-01 07:30:00         673        563      340    174  137   
2022-01-01 08:30:00        1056        879      643    345  300   
2022-01-01 09:30:00        1392       1323      636    474  375   
2022-01-01 10:30:00           0          0        0      0    0   
2022-01-01 11:30:00           0          0        0      0    

In [5]:

with engine.connect() as connection:
        
    for site in sites_data.keys():
        start = time.time()
        print('-'*50)
        print(site)
        site_data = dict()
        site_data['site'] = site
        site_data['site_id'], site_data['legal_entity'] = get_site_id(site, connection, 
                                                                      metadata.tables['sites'],
                                                                      include_legal_entity_id=True)

        site_data['green_tariff'] = get_green_tariff(site_data['site_id'], dt.date(year=target_year, month=target_month, day=1),
                                                     connection, metadata.tables['green_tariffs'], currency='UAH')
        print('Green tariff: {}'.format(site_data['green_tariff']))
        
        mms_data, site_data['mms_version'] = get_mms_data(site_data['site_id'], 
                                                          target_year, target_month, 
                                                          connection, metadata.tables['mms_data'], include_prev=True,)
        # print(mms_data)
        print('MMS data | {} version | of | {} records |'.format(site_data['mms_version'], len(mms_data)))
        applied_forecast = get_applied_forecast(site_data['site_id'], target_year, target_month, 
                                                connection=connection, db_table=metadata.tables['forecasts_applied'])
        print('Forecast data of | {} records |'.format(len(applied_forecast)))
        print(applied_forecast)
        
        site_data['real_forecast_data'] = pd.concat([mms_data, applied_forecast], axis=1, join='inner')
        print('Real forecast data prepared')

        site_data['zero_forecast_data'] = pd.concat([mms_data, applied_forecast * 0], axis=1, join='inner')
        print('Zero forecast data prepared')

        naive_forecast_data = pd.concat([mms_data, mms_data.shift(48)], axis=1, join='inner').dropna(axis=0, how='any')
        naive_forecast_data.columns = ['yield [kWh]', 'forecast [kWh]']
        naive_forecast_data['forecast [kWh]'] = naive_forecast_data['forecast [kWh]'].astype(int)
        site_data['naive_forecast_data'] = naive_forecast_data
        print('Naive forecast data prepared')
        
        sites_data.update({site: site_data})
        end = time.time()

        print('Processing took {} seconds'.format(round(end - start, 2)))

--------------------------------------------------
Myroliubivka
Green tariff: 4.1268
MMS data | v1 version | of | 1488 records |


  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast_data = forecast_data.append(data)
  forecast

Forecast data of | 744 records |
                     forecast [kWh]
2021-12-31 22:30:00             -25
2021-12-31 23:30:00             -25
2022-01-01 00:30:00             -25
2022-01-01 01:30:00             -25
2022-01-01 02:30:00             -25
...                             ...
2022-01-31 17:30:00             -25
2022-01-31 18:30:00             -25
2022-01-31 19:30:00             -25
2022-01-31 20:30:00             -25
2022-01-31 21:30:00             -25

[744 rows x 1 columns]
Real forecast data prepared
Zero forecast data prepared
Naive forecast data prepared
Processing took 4.74 seconds


In [None]:
# print(sites_data['Oleshky_2'])

# data = prices

# for site in sites_data.keys():
#     site_errors = sites_data[site]['real_forecast_data']['yield [kWh]'] - sites_data[site]['real_forecast_data']['forecast [kWh]']
#     data[site] = site_errors

# # data
# data.to_excel('./data/results/{}-{:0>2}/hourly_results_{}_{}_{}_UAH.xlsx'.format(target_year, target_month, target_year, target_month, '1-31'))

## Unbalance cost estimations

In [None]:
columns = ['site', 'legal_entity', 'first_date', 'last_date', 'number_of_values [records]', 'yield_data_version',
            'yielded [kWh]', 'forecast_type', 'forecasted [kWh]', 
            'green_tariff [UAH]', 'revenue [UAH]', 
            'error_u [kWh]', 'error_u [%]',
            'max_energy [kWh]', 'max_forecast [kWh]', 'max_error [kWh]',
            'mean_absolute_error [kWh]', 'median_absolute_error [kWh]', 
            'mean_square_error [kWh]', 'root_mean_square_error [kWh]', 'R^2 score',
            'dropped by alpha_u [records]', 'dropped by alpha_u [%]',
            'error_u (excess) [kWh]', 'error_u (excess) [%]',
            'error_u (shortage) [kWh]', 'error_u (shortage) [%]', 
            'cieq_641_rule (excess) [UAH]', 'cieq_641_rule (excess) [%]',
            'cieq_641_rule (shortage) [UAH]', 'cieq_641_rule (shortage) [%]',
            'cieq_641_rule (net) [UAH]', 'cieq_641_rule (net) [%]', 
            'imsp_avg_641_rule [UAH/MWh]',
            'cieq_641_rule* [UAH]', 'cieq_641_rule* [%]', 
            'imsp_avg_641_rule* [UAH/MWh]']

### Daily results

In [None]:
daily_indexes = list()

for day in range(1, calendar.monthrange(target_year, target_month)[-1] + 1):
    start = dt.datetime(year=target_year, month=target_month, day=day, hour=0, minute=30)
    end = dt.datetime(year=target_year, month=target_month, day=day, hour=23, minute=30)
    index_in_kyiv = pd.date_range(start=start, end=end, freq='1H', tz='europe/kiev')
    index_in_utc = index_in_kyiv.tz_convert('utc').tz_localize(None)
    daily_indexes.append(index_in_utc)

print(len(daily_indexes))

In [None]:
print(daily_indexes[0])

In [None]:
results_real = pd.DataFrame(columns=columns)
results_naive = pd.DataFrame(columns=columns)
results_zero = pd.DataFrame(columns=columns)

for site in sites_data.keys():
    
    
    for index in daily_indexes:
        # print(sites_data[site]['real_forecast_data'])
        result_real = make_results(sites_data[site], 'real', prices, index)
        #print(result_real)

        result_naive = make_results(sites_data[site], 'naive', prices, index)      
        #print(result_naive)

        result_zero = make_results(sites_data[site], 'zero', prices, index)      
        #print(result_zero)

        if not result_real is None:
            results_real = results_real.append(result_real, ignore_index=True)

        if not result_naive is None:
            results_naive = results_naive.append(result_naive, ignore_index=True)
        
        if not result_zero is None:
            results_zero = results_zero.append(result_zero, ignore_index=True)

    sites_data[site]['results_real'] = results_real
    sites_data[site]['results_naive'] = results_naive
    sites_data[site]['results_zero'] = results_zero

    print(f'{site} - Results daily: Ok!')

In [None]:
# sites_data['Pohrebyshche']['results_real']

In [None]:
from uce_resources import save_results, format_excel

results_daily = pd.concat([results_real, results_naive, results_zero], axis=0)

min_day = results_daily.first_date.min().day
max_day = results_daily.last_date.max().day

with pd.ExcelWriter(target_folder + 'uce_daily_{}_{}_{}-{}_UAH.xlsx'.format(target_year, target_month, min_day, max_day), engine="openpyxl") as  writer:
    results_daily.to_excel(writer, 'results_daily')

# writer.save()
#format_excel(writer, results_daily).save()

print('Saving results: ok!')
