# Preparing highRES demand.dd and temopral.dd 

In [None]:
import calendar
import datetime
import pandas as pd
import numpy as np
#import pathlib
import itertools

## Input data

In [None]:
Years = snakemake.wildcards.year  
Years_int = int(Years)
date_range = snakemake.params.date_range
Europe_countries = pd.read_csv(snakemake.input["europecountriescsvlocation"])  # from snakeflow
Europe_demand = pd.read_csv(snakemake.input["europedemandcsvlocation"])  # Input from rule build_demand
#scenarios = pd.read_excel('gb_ext_scenarios.xls', sheet_name="scenario_annual_dem", skiprows=0)
#euro31_dem_2050 = snakemake.params.euro31_dem_2050  # Yearly Twh demand for rescaling, if necessary
# How to deal with hardcore rescale?
#rescale = snakemake.params.rescale

In [None]:
Europe_countries

In [None]:
Europe_demand

## Demand.dd

In [None]:
#pathlib.Path(snakemake.output[0]).parent
#opath = pathlib.Path('/fp/homes01/u01/ec-javedm/ec85/models/shahzad/private/highRES_demand').parent 

In [None]:
date_range = [Years + "-" + date for date in date_range]
dstart = datetime.datetime.fromisoformat(date_range[0])
dstop = datetime.datetime.fromisoformat(date_range[1]) + datetime.timedelta(hours=23)

In [None]:
# Countries selection for modeling coming from snakemake work flow
etm_countries = Europe_countries[Europe_countries["ETM"] == 1]["ISO2"].tolist()

In [None]:
etm_countries

In [None]:
# Convert the 'datetime' column to actual datetime objects for filtering
Europe_demand['datetime'] = pd.to_datetime(Europe_demand['datetime'])
# Set the 'datetime' column as the index
Europe_demand = Europe_demand.set_index('datetime')

In [None]:
Europe_demand

In [None]:
# Select demand data for ETM countries only
Europe_demand_etm = Europe_demand[etm_countries]

In [None]:
# Filter by the specified date range
Europe_demand_etm = Europe_demand_etm.loc[dstart:dstop]

In [None]:
Europe_demand_etm

In [None]:
Europe_demand_etm.head()

In [None]:
# Check if there are any countries with missing data
if Europe_demand_etm.shape[1] != len(etm_countries):
    print("Countries missing...")

In [None]:
# Replace zero demands with NaN to prepare for interpolation
Europe_demand_etm.replace(0, np.nan, inplace=True)

In [None]:
# Interpolate missing values
Europe_demand_etm.interpolate(limit=2, inplace=True)

In [None]:
# Check if there's still any missing data, and print them if found
if Europe_demand_etm.isnull().any().any():
    for column in Europe_demand_etm.columns[Europe_demand_etm.isnull().any()]:
        print(f"Missing data found in column: {column}")
        missing_rows = Europe_demand_etm[Europe_demand_etm[column].isnull()]
        print(missing_rows)
    print("Countries with remaining missing demand data:",
          Europe_demand_etm.columns[Europe_demand_etm.isnull().any()])

In [None]:
Years_int

In [None]:
# Add data for leap year's 29th of February
#if calendar.isleap(Years_int):
#    print(f"{Years_int} is a leap year")
#    # You may need to adjust the following line if your index isn't in hourly format
#    feb_29_index = pd.date_range(f"{Years_int}-02-29 00:00", f"{Years_int}-02-29 23:00", freq="H")
#    feb_29_data = pd.DataFrame(index=feb_29_index, columns=Europe_demand_etm.columns)

    # Here replicate the demand from February 28th
#    for col in feb_29_data.columns:
#        feb_29_data[col] = Europe_demand_etm.loc[f"{Years_int}-02-28"].values

    # Append the February 29th data
#    Europe_demand_etm = Europe_demand_etm.append(feb_29_data).sort_index()

In [None]:
# Check if the year is a leap year
if calendar.isleap(Years_int):
    print(f"{Years_int} is a leap year")
    # Define the date range for February 29th
    feb_29_range = pd.date_range(start=f"{Years_int}-02-29 00:00", end=f"{Years_int}-02-29 23:00", freq='h')
    
    # Keep only the rows that are not in the February 29 range
    Europe_demand_etm = Europe_demand_etm[~Europe_demand_etm.index.isin(feb_29_range)]

In [None]:
Europe_demand_etm

In [None]:
Europe_demand_etm.sum().sum()  # while the value in excel file for 2050 is 5870 TWH 

In [None]:
# Hourly demand rescaling with respect to meet annual demand target  
#if rescale == "annual":
#    out_flg = "annual"
#    if Europe_demand_etm.shape[0] >= 8760.0:
#        # Rescale the DataFrame so its sum matches the annual demand
#        Europe_demand_etm = Europe_demand_etm * (euro31_dem_2050 * 1e6 / Europe_demand_etm.sum().sum())
#else:
#    out_flg = "norescale"
#print(f"{out_flg} Demand")

In [None]:
Europe_demand_etm.sum().sum() 

In [None]:
def data2dd(data, sets, all_combin=False, rounddp=8):  #all_combin input could be removed
    # Verify if the input is already a NumPy array or convert it to one
    if not isinstance(data, np.ndarray):
        data = np.array(data)
     
    sets = np.array(sets, dtype="object")

    # Each set in "sets" needs to be 1D at the moment
    # "sets" and "data" must be in correct order -> last set must be column headers, previous sets are rows

    # Round the data first
    data = np.round(data.astype(float), rounddp)
    
    # Check the lengths of sets to determine how to combine them
    lens = np.array([item.shape[0] for item in sets])
    
    # If all_combin is True, we want to calculate the cartesian product
    # This is for the situation where every possible set combination is needed
    if all_combin:
        # Generate all combinations of the sets, create a matrix where each row is a combination
        sets_out = np.array(list(itertools.product(*sets)))
        
        # Concatenate the elements in each combination with a period ('.')
        sets_new = np.array(['.'.join(map(str, item)) for item in sets_out])
        
        # Transform 'sets_new' into a column vector, to be combined with 'data'
        sets_new = sets_new.reshape(-1, 1)
    
    # If all_combin is False, we are assuming that the last set corresponds to data columns
    # And the remaining sets correspond to data rows (if there are more than one)
    else:
        if len(sets) == 1:
            sets_new = sets[0].astype(str).reshape(-1, 1)
        else:
            # Repeat or tile sets if necessary
            sets_new = [np.tile(s, int(lens.max()/len(s))) if len(s) < lens.max() else s for s in sets]
            sets_new = np.array(['.'.join(map(str, comb)) for comb in zip(*sets_new)])
            sets_new = sets_new.reshape(-1, 1)

    # Combine 'sets_new' with 'data', side by side
    data_out = np.hstack((sets_new, data.reshape(-1, 1)))

    # Return the formatted output data
    return data_out

In [None]:
t = np.arange(Europe_demand_etm.shape[0])   #length of rows
z = Europe_demand_etm.columns.values

dd_data = data2dd(Europe_demand_etm.values.T, [z, t], all_combin=True)

In [None]:
dd_data

In [None]:
top = np.array([["parameter", ""], ["demand" + " /", ""]])
bottom = np.array([["/", ""], ["", ""]])

In [None]:
dd_data = np.concatenate((top, dd_data, bottom), axis=0)

In [None]:
dd_data

In [None]:
outfile1 = snakemake.output[0]
# Format the data as a string before writing
dd_data_str = '\n'.join([' '.join(row) for row in dd_data])
with open(outfile1, 'w') as file:
    file.write(dd_data_str)


## Temporal2dd

In [None]:
dstart

In [None]:
dstop

In [None]:
outfile2 = snakemake.output[1]

In [None]:
# Calculate the range of years and total number of time periods (hours)
years = np.arange(dstart.year, dstop.year + 1)
ntime = ((dstop - dstart).total_seconds() / 3600) + 1

In [None]:
ntime

In [None]:
# Create an hour-to-year mapping
hr2yr = []
for nyr, yr in enumerate(years):
    shour = ((datetime.datetime(yr, 1, 1, 0) - dstart).total_seconds() / 3600) + 1
    if dstop.year == yr:
        ehour = ((dstop - dstart).total_seconds() / 3600) + 1
    else:
        ehour = (
            (datetime.datetime(yr, 12, 31, 23) - dstart).total_seconds() / 3600
        ) + 1                    #hardcoded: 12/31/23

    hrs = np.arange(shour - 1, ehour).astype(int)
    hr2yr.append(list(zip(np.repeat(nyr, hrs.shape[0]).astype(int), hrs)))

hr2yr = np.char.array(np.vstack(hr2yr).astype(str))

hr2yr = (hr2yr[:, 0] + "." + hr2yr[:, 1]).reshape(-1, 1)

In [None]:
hr2yr

In [None]:
# Determine the year or year range as a string for the output
if dstart.year != dstop.year:
    yr = str(dstart.year) + "-" + str(dstop.year)
else:
    yr = str(dstart.year)

In [None]:
yr

In [None]:
# Format the "hourly" set
h_set = np.arange(ntime).reshape(-1, 1).astype(int)
top_h = np.array([["set"], ["h /"]])
bottom_h = np.array([["/"], [""]])
h_set_data = np.concatenate((top_h, h_set, bottom_h), axis=0)

In [None]:
h_set_data

In [None]:
# Format the "yr" set
yr_set = np.arange(years.shape[0]).reshape(-1, 1).astype(int)
top_yr = np.array([["set"], ["yr /"]])
bottom_yr = np.array([["/"], [""]])
yr_set_data = np.concatenate((top_yr, yr_set, bottom_yr), axis=0)

In [None]:
yr_set_data

In [None]:
# Format the "hr2yr_map" set
top_hr2yr = np.array([["set"], ["hr2yr_map /"]])
bottom_hr2yr = np.array([["/"], [""]])
hr2yr_map_data = np.concatenate((top_hr2yr, hr2yr, bottom_hr2yr), axis=0)

In [None]:
hr2yr_map_data

In [None]:
# Concatenate all formatted sets
concatenate_sets = np.concatenate((h_set_data, yr_set_data, hr2yr_map_data), axis=0)

In [None]:
concatenate_sets

In [None]:
# Convert the numpy array to a string for writing to file
concatenate_sets_str = '\n'.join(' '.join(row) for row in concatenate_sets)

In [None]:
with open(outfile2, 'w') as file:
    file.write(concatenate_sets_str)