In [1]:
from transparency_epias.production import productionClient
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import os
import requests
import json

In [2]:
master_start_dt = datetime(2015,1,1)
today = datetime.today()
save_path = r"C:\Users\mehmetg\jupyter\EPIAS_Production-Rolling_Source-Based"

In [3]:
production_columns_map = {
    "fueloil": "FuelOil",
    "gasOil": "GasOil",
    "blackCoal": "BlackCoal",
    "lignite": "Lignite",
    "geothermal": "Geothermal",
    "naturalGas": "NaturalGas",
    "river": "Run-of-River",
    "dammedHydro": "Dam",
    "lng": "LNG",
    "biomass": "Biomass",
    "naphta": "Naphta",
    "importCoal": "HardCoal",
    "asphaltiteCoal": "Asphaltite",
    "wind": "Wind",
    "nucklear": "Nuclear",
    "sun": "Solar",
    "importExport": "ImportExport",
    "total": "Total"
}

In [4]:
def get_production(start_date, end_date):
    start_date_str = start_date.strftime("%Y-%m-%d")
    end_date_str = end_date.strftime("%Y-%m-%d")
    response_production = productionClient.production.real_time_gen(
        startDate=start_date_str, endDate=end_date_str)
    production = pd.DataFrame(response_production)
    production = production.set_index("date")
    production.index =  pd.to_datetime(production.index.map(lambda dt: str(dt)[:19]))
    production.columns = [production_columns_map[col] for col in production.columns]
    production.index.name = "DateTime"
    return production

In [5]:
year_dts = []
year_dts.append(master_start_dt)
while year_dts[-1] < today:
    year_dts.append(year_dts[-1] + relativedelta(years=1))
    
existing = os.listdir(save_path)
existing = [f for f in existing if '.csv' in f]
if len(existing) > 0:
    existing.sort()
    latest = int(existing[-1].split(".")[0])
    year_dts = [dt for dt in year_dts if dt.year >= latest]

In [6]:
for y in range(len(year_dts)-1):
    dt_start = year_dts[y]
    dt_end = year_dts[y+1] - timedelta(hours=1)
    print("Processing", dt_start.year)
    filename = "{}.csv".format(dt_start.year)
    file_save_path = os.path.join(save_path, filename)
    if os.path.exists(file_save_path):
        dummy = pd.read_csv(file_save_path, index_col=0, parse_dates=True)
        last_idx = dummy.index[-1]
        if last_idx != dt_end:
            print("Appending to", filename)
            new_dt_start = last_idx - timedelta(days=7) # start from last 7 days
            production = get_production(new_dt_start, dt_end)
            production = pd.concat([dummy, production])
            production = production.groupby(production.index).last()
            production.to_csv(file_save_path)
    else:
        production = get_production(dt_start, dt_end)
        production.to_csv(file_save_path)

Processing 2021
Appending to 2021.csv
