In [None]:
import os
import numpy as np
import pandas as pd
import datetime as dt
import pytz
import time
import calendar
from statistics import mean

from uce_resources import get_mms_data, get_applied_forecast

In [None]:
from settings.sites import ceg_mms as sites_list

today = dt.datetime.today()-dt.timedelta(1)

target_year = today.year
target_month = today.month
target_day = today.day
forecasts_types = ['real']

target_folder = 'data/forecasts/factor/'
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# sites_list = ['Pohrebyshche']

# Data preparation section

In [None]:
from sqlalchemy import create_engine, MetaData
from sqlalchemy.pool import NullPool
from sqlalchemy.sql import select, and_
from settings.db import DO_URL

engine_source = create_engine(DO_URL, poolclass=NullPool)
metadata_source = MetaData()
metadata_source.reflect(bind=engine_source)


from settings.db import WAREHOSUE_URL

engine_warehouse = create_engine(WAREHOSUE_URL, poolclass=NullPool)

In [None]:
sites_data = list()
raw_sites_data = list()
applied_sites_data = list()

with engine_source.connect() as connection:
        
    for site in sites_list:
        start = time.time()
        # print('-'*50)
        # print(site)
        site_data = dict()

        sites_table = metadata_source.tables['sites']
        list_to_select = [
            sites_table.c.id, 
            sites_table.c.legal_entity, 
            sites_table.c.location, 
            sites_table.c.region, 
            sites_table.c.cluster,
            sites_table.c.installed_capacity_dc,
            sites_table.c.grid_capacity 
            ]
        query = select(list_to_select).where(sites_table.c.displayable_name == site)
        site_id_response = connection.execute(query).fetchall()[0]

        site_id = site_id_response[0]
        legal_entity_id = site_id_response[1]
        location = site_id_response[2]
        region = site_id_response[3]
        cluster = site_id_response[4]
        capacity_dc = site_id_response[5]
        
        with engine_warehouse.connect() as connection_warehouse:
            query = f"""
            SELECT grid_capacity from dim_site
            WHERE site_name = '{site}';
            """
            response = connection_warehouse.execute(query).fetchall()[0]
            
        grid_capacity = response[0]

        latitude, longitude = map(float, location.replace('(', '').replace(')', '').split(','))
        mms_data, mms_version = get_mms_data(site_id, 
                                             target_year, target_month, 
                                             connection, metadata_source.tables['mms_data'], include_prev=True,)
        mms_data.columns = ['yield']

        first_date=today-dt.timedelta(1)
        last_date=today

        applied_forecast = get_applied_forecast(
            site_id, 
            first_date,
            last_date,
            connection=connection, 
            db_table=metadata_source.tables['forecasting_data']
        )
        applied_forecast.columns = ['applied_forecast']


        raw_forecast = get_applied_forecast(
            site_id, 
            first_date,
            last_date,
            connection=connection, 
            db_table=metadata_source.tables['forecasting_data'],
            forecast_type='forecast_applied_raw'
        )
        raw_forecast.columns = ['raw_forecast']

        forecast_data = pd.concat([applied_forecast, raw_forecast,], axis=1, join='outer').reindex(columns=['raw_forecast', 'applied_forecast'])
        forecast_data[f"{site}"] = round(forecast_data['applied_forecast'] / forecast_data['raw_forecast'], 2)
        forecast_data = forecast_data.drop(columns=["raw_forecast", "applied_forecast"])

        sites_data.append(forecast_data)
        koef_merged_data = pd.concat(sites_data, axis=1)

        applied_forecast_data = pd.concat([mms_data, applied_forecast,], axis=1, join='outer').reindex(columns=['yield', 'applied_forecast'])
        applied_forecast_data = applied_forecast_data.dropna()
        applied_forecast_data[f"{site}"] = round(applied_forecast_data['yield'] / applied_forecast_data['applied_forecast'], 2)
        applied_forecast_data = applied_forecast_data.drop(columns=["yield", "applied_forecast"])
        # print(applied_forecast_data)
        
        applied_sites_data.append(applied_forecast_data)
        applied_merged_data = pd.concat(applied_sites_data, axis=1)



        raw_forecast_data = pd.concat([mms_data, raw_forecast,], axis=1, join='outer').reindex(columns=['yield', 'raw_forecast'])
        raw_forecast_data = raw_forecast_data.dropna()
        raw_forecast_data[f"{site}"] = round(raw_forecast_data['yield'] / raw_forecast_data['raw_forecast'], 2)
        raw_forecast_data = raw_forecast_data.drop(columns=["yield", "raw_forecast"])
        # print(raw_forecast_data)
        
        raw_sites_data.append(raw_forecast_data)
        raw_merged_data = pd.concat(raw_sites_data, axis=1)


        end = time.time()

        print('{}: ок! Processing took {} seconds'.format(site, round(end - start, 2)))

    koef_merged_data.index=koef_merged_data.index.tz_localize(pytz.utc).tz_convert(pytz.timezone('europe/kiev')).strftime('%Y-%m-%d %H:%M')
    applied_merged_data.index=applied_merged_data.index.tz_localize(pytz.utc).tz_convert(pytz.timezone('europe/kiev')).strftime('%Y-%m-%d %H:%M')
    raw_merged_data.index=raw_merged_data.index.tz_localize(pytz.utc).tz_convert(pytz.timezone('europe/kiev')).strftime('%Y-%m-%d %H:%M')

In [None]:
file_name = f'forecast_MMS_{target_year}_{target_month}_{target_day}.xlsx'  
file_path = os.path.join(target_folder, file_name)

with pd.ExcelWriter(file_path, engine='xlsxwriter') as writer:
    koef_merged_data.to_excel(writer, sheet_name='Coefficient')
    raw_merged_data.to_excel(writer, sheet_name='Raw_forecast')
    applied_merged_data.to_excel(writer, sheet_name='Applied_forecast')