In [None]:
import os
import pandas as pd
import datetime as dt
import time
import calendar
import numpy as np
from statistics import mean

from uce_resources import get_site_id, get_mms_data, get_applied_forecast, get_prices, get_green_tariff
from uce_resources import make_results

In [None]:
from settings.sites import ceg as sites_list

target_year = 2022
target_month = 3
forecasts_types = ['real']

target_folder = 'data/results/{}-{:0>2}/'.format(target_year, target_month)
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# sites_list = ['Myroliubivka']

# Data preparation section

In [None]:
from sqlalchemy import create_engine, MetaData
from sqlalchemy.pool import NullPool
from sqlalchemy.sql import select, and_
from settings.db import DO_URL

engine = create_engine(DO_URL, poolclass=NullPool)
metadata = MetaData()
metadata.reflect(bind=engine)

In [None]:
sites_data = list()

with engine.connect() as connection:
        
    for site in sites_list:
        start = time.time()
        print('-'*50)
        print(site)
        site_data = dict()

        sites_table = metadata.tables['sites']
        list_to_select = [sites_table.c.id, sites_table.c.legal_entity, sites_table.c.location, sites_table.c.region, sites_table.c.cluster]
        query = select(list_to_select).where(sites_table.c.displayable_name == site)
        site_id_response = connection.execute(query).fetchall()[0]

        site_id = site_id_response[0]
        legal_entity_id = site_id_response[1]
        location = site_id_response[2]
        region = site_id_response[3]
        cluster = site_id_response[4]

        latitude, longitude = map(float, location.replace('(', '').replace(')', '').split(','))
        mms_data, mms_version = get_mms_data(site_id, 
                                             target_year, target_month, 
                                             connection, metadata.tables['mms_data'], include_prev=True,)
        mms_data.columns = ['yield']
        # print(mms_data)
        print('MMS data | {} version | of | {} records |'.format(mms_version, len(mms_data)))
        applied_forecast = get_applied_forecast(site_id, target_year, target_month, 
                                                connection=connection, db_table=metadata.tables['forecasts_applied'])
        applied_forecast.columns = ['forecast']
        print('Forecast data of | {} records |'.format(len(applied_forecast)))
        #print(applied_forecast)
        
        site_series = pd.Series(index=mms_data.index, data=site)
        latitude_series = pd.Series(index=mms_data.index, data=latitude)
        longitude_series = pd.Series(index=mms_data.index, data=longitude)
        region_series = pd.Series(index=mms_data.index, data=region)
        cluster_series = pd.Series(index=mms_data.index, data=cluster)

        site_data = pd.concat([site_series, latitude_series, longitude_series, 
                               region_series, cluster_series], axis=1, join='inner')
        site_data.columns = ['site', 'latitude', 'longitude', 'region', 'cluster', ]                       
        site_data['date'] = site_data.index.strftime('%Y-%m-%d')
        site_data['hour'] = site_data.index.hour + 1
        site_data['datetime'] = site_data.index.strftime('%Y-%m-%dT%H:%M')

        site_data = pd.concat([site_data, mms_data, applied_forecast], axis=1, join='inner')
        
        site_data['error'] = site_data['yield'] - site_data['forecast']
        site_data['error_positive'] = site_data['error'].apply(lambda x: x if x >= 0 else 0)
        site_data['error_negative'] = site_data['error'].apply(lambda x: x if x < 0 else 0)
        site_data['error_abs'] = site_data['error'].apply(abs)
        site_data['error_type'] = site_data['error'].apply(lambda x: 'negative' if x < 0 else 'positive')
        
        print('Data prepared')

        # print(site_data)
        
        sites_data.append(site_data)
        end = time.time()

        print('Processing took {} seconds'.format(round(end - start, 2)))

In [None]:
data = pd.concat(sites_data, ignore_index=True)
data.to_csv(target_folder + 'mart_operative_forecasting_results.csv', index=False)

In [None]:
data.loc[data.site == 'Vasylivka']

### Daily results

In [None]:
daily_indexes = list()

for day in range(1, calendar.monthrange(target_year, target_month)[-1] + 1):
    start = dt.datetime(year=target_year, month=target_month, day=day, hour=0, minute=30)
    end = dt.datetime(year=target_year, month=target_month, day=day, hour=23, minute=30)
    index_in_kyiv = pd.date_range(start=start, end=end, freq='1H', tz='europe/kiev')
    index_in_utc = index_in_kyiv.tz_convert('utc').tz_localize(None)
    daily_indexes.append(index_in_utc)

print(len(daily_indexes))

In [None]:
print(daily_indexes[0])

In [None]:
results_real = pd.DataFrame(columns=columns)
results_naive = pd.DataFrame(columns=columns)
results_zero = pd.DataFrame(columns=columns)

for site in sites_data.keys():
    
    
    for index in daily_indexes:
        # print(sites_data[site]['real_forecast_data'])
        result_real = make_results(sites_data[site], 'real', prices, index)
        #print(result_real)

        result_naive = make_results(sites_data[site], 'naive', prices, index)      
        #print(result_naive)

        result_zero = make_results(sites_data[site], 'zero', prices, index)      
        #print(result_zero)

        if not result_real is None:
            results_real = results_real.append(result_real, ignore_index=True)

        if not result_naive is None:
            results_naive = results_naive.append(result_naive, ignore_index=True)
        
        if not result_zero is None:
            results_zero = results_zero.append(result_zero, ignore_index=True)

    sites_data[site]['results_real'] = results_real
    sites_data[site]['results_naive'] = results_naive
    sites_data[site]['results_zero'] = results_zero

    print(f'{site} - Results daily: Ok!')

In [None]:
# sites_data['Pohrebyshche']['results_real']

In [None]:
from uce_resources import save_results, format_excel

results_daily = pd.concat([results_real, results_naive, results_zero], axis=0)

min_day = results_daily.first_date.min().day
max_day = results_daily.last_date.max().day

with pd.ExcelWriter(target_folder + 'uce_daily_{}_{}_{}-{}_UAH.xlsx'.format(target_year, target_month, min_day, max_day), engine="openpyxl") as  writer:
    results_daily.to_excel(writer, 'results_daily')

#  writer.save()
# format_excel(writer, results_daily).save()

print('Saving results: ok!')
