# Common DataFrame manipulations to all storage options 

In [1]:
import os
from openpyxl import load_workbook
import pandas as pd
from datetime import timedelta

BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath('')))
DATA_DIR = os.path.join(BASE_DIR, 'amerigo_island', 'data')
RAW_DATA_DIR = os.path.join(DATA_DIR, 'raw')
INTERIM_DATA_DIR = os.path.join(DATA_DIR, 'interim')

INPUT_LOAD_AND_VRE_FILENAME = 'load_and_vre.csv'
FINAL_LOAD_AND_VRE_FILENAME = 'final_load_and_vre.csv'

In [2]:
# all input columns as variables for later use
date_colname = 'date'
load_colname = 'load_mw'
solar_gen_colname = 'solar_mw'
wind_gen_colname = 'wind_mw'

load_and_vre_df = pd.read_csv(os.path.join(RAW_DATA_DIR, INPUT_LOAD_AND_VRE_FILENAME))
input_cols = [date_colname, load_colname, wind_gen_colname, solar_gen_colname]
load_and_vre_df = load_and_vre_df.iloc[:, [i for i in range(len(input_cols))]]
load_and_vre_df.columns = input_cols

In [19]:
# all derived columns as variables for later use

### COMMON - columns needed bt legacy gen, batteries, hydrogen
total_vre_gen_colname = 'total_vre_mw'
day_of_month_colname = 'day_of_month'
day_of_yr_colname = 'day_of_yr'
week_of_yr_colname = 'wk_of_yr'
month_colname = 'month'
year_colname = 'year'
weekday_colname = 'weekday'
daily_load_vre_diff_colname = 'daily_load_vre_diff'
prev_7_load_colname = 'prev_7_load'
prev_7_load_vre_diff_colname = 'prev_7_load_vre_diff'
prev_7_vre_gen_colname = 'prev_7_vre_gen'
critical_load_mw_colname = 'critical_load_mw'
critical_load_less_vre_colname = 'critical_load_less_vre_mw'
surplus_vre_colname = 'surplus_vre'

### BATTERIES 
cuml_load_since_prev_charge_colname = 'cuml_load_since_prev_charge_mw'
cuml_charge_since_prev_charge_colname = 'cuml_charge_since_prev_discharge_mw'
battery_soc_colname = 'battery_soc'

### HYDROGEN
req_hydrogen_kg_colname = 'reqd_hydrogen_kg'
hydrogen_production_kg_colname = 'hydrogen_prod_kg'
hydrogen_production_m3_colname = 'hydrogen_prod_m3'
hydrogen_h2o_demand_l_colname = 'hydrogen_h2o_demand_l'
cuml_hydrogen_demand_kg_colname = 'cuml_hydrogen_demand_kg'
cuml_h2_energy_demand_mwh_colname = 'cuml_h2_energy_demand_mwh'

### BAU - LEGACY GENERATION
thermal_gen_total_mw_colname = 'thermal_gen_total_mw'
thermal_gen_rice_mw_colname = 'thermal_gen_rice_mw'
thermal_gen_combustion_mw_colname = 'thermal_gen_combustion_mw'

In [4]:
# other values/assumptions needed for the analysis
CRITICAL_LOAD_PERC = (1/3)
SOLAR_SCALE_FACTOR = 4
WIND_SCALE_FACTOR = .4

SOLAR_INSTALLATION_COST_MM = 50.8
WIND_INSTALLATION_COST_MM = 31.2

In [5]:
def clean_input_columns(df):

    cleaned_df = df.copy()
    cleaned_df = cleaned_df.dropna(how='all')

    cleaned_df[date_colname] = pd.to_datetime(cleaned_df[date_colname])
    # all solar vals need to be > 0
    cleaned_df[solar_gen_colname] = cleaned_df[solar_gen_colname]\
        .apply(lambda x: x if x > 0 else 0)
    
    cleaned_df[month_colname] = cleaned_df[date_colname].map(lambda x: x.month)
    cleaned_df[day_of_month_colname] = cleaned_df[date_colname].map(lambda x: x.day)
    cleaned_df[day_of_yr_colname] = cleaned_df[date_colname].map(lambda x: x.timetuple().tm_yday)
    
    return cleaned_df

In [6]:
cleaned_load_and_vre_df = clean_input_columns(load_and_vre_df)

In [7]:
assert len(cleaned_load_and_vre_df) == 8760, "Expected one row per hour in year (8760), got {} rows".format(len(load_and_gen_df))

for day_of_yr in cleaned_load_and_vre_df[day_of_yr_colname].unique():
    if len(cleaned_load_and_vre_df[cleaned_load_and_vre_df[day_of_yr_colname] == day_of_yr]) != 24:
        assert ValueError("Day {} has {} values".format(day_of_yr, len(cleaned_load_and_vre_df[cleaned_load_and_vre_df[day_of_yr_colname] == day_of_yr])))
        

In [8]:
def add_common_derived_columns(df):
    
    decorated_df = df.copy()
    
    decorated_df[solar_gen_colname] = decorated_df[solar_gen_colname] * SOLAR_SCALE_FACTOR
    decorated_df[wind_gen_colname] = decorated_df[wind_gen_colname] * WIND_SCALE_FACTOR
    
    decorated_df[total_vre_gen_colname] = \
        decorated_df[wind_gen_colname] + decorated_df[solar_gen_colname]
    
    decorated_df[critical_load_mw_colname] = \
        decorated_df[load_colname] * CRITICAL_LOAD_PERC
    
    decorated_df[critical_load_less_vre_colname] = \
        decorated_df[critical_load_mw_colname] - decorated_df[total_vre_gen_colname]
    
    decorated_df[surplus_vre_colname] = \
        -decorated_df[critical_load_less_vre_colname]

    decorated_df[critical_load_less_vre_colname] = decorated_df[critical_load_less_vre_colname]\
        .map(lambda x: x if x > 0 else 0)

    decorated_df[surplus_vre_colname] = decorated_df[surplus_vre_colname]\
        .map(lambda x: x if x > 0 else 0)
    
    return decorated_df

In [9]:
final_load_and_vre_df = add_common_derived_columns(cleaned_load_and_vre_df)
final_load_and_vre_df.to_csv(os.path.join(INTERIM_DATA_DIR, FINAL_LOAD_AND_VRE_FILENAME), index=False)

# Hydrogen-specific analysis

In [20]:
ELEC_EFF_H2_FUEL_CELL_KWH_PER_KG = 19.99
ELEC_EFF_ELECTROLYSIS_KWH_PER_KG = 55.8

ENERGY_CONSUMPTION_H2O_DESAL_KWH_PER_M3 = 1.8
DENSITY_H2_300BAR_KG_PER_M3 = 20
DENSITY_H2_30BAR_KG_PER_M3 = 2.38
H2O_ELECTROLYSIS_CONSUMPTION_L_PER_M3 = 1.4

def calc_h2_cols(df):
    
    decorated_df = df.copy()
    
    decorated_df[req_hydrogen_kg_colname] = \
        (decorated_df[critical_load_less_vre_colname] * 1000)/ ELEC_EFF_H2_FUEL_CELL_KWH_PER_KG
    
    decorated_df[hydrogen_production_kg_colname] = \
        (decorated_df[surplus_vre_colname] * 1000) / ELEC_EFF_ELECTROLYSIS_KWH_PER_KG
    
    decorated_df[hydrogen_production_m3_colname] = \
        decorated_df[hydrogen_production_kg_colname] / DENSITY_H2_30BAR_KG_PER_M3
    
    decorated_df[hydrogen_h2o_demand_l_colname] = \
        decorated_df[hydrogen_production_m3_colname] * H2O_ELECTROLYSIS_CONSUMPTION_L_PER_M3
    
    decorated_df.sort_values(date_colname)
    
## calc cuml h2 demand
    decorated_df[cuml_hydrogen_demand_kg_colname] = None
    for idx, row in decorated_df.iterrows():
        
        if idx == 0:
            val = row[req_hydrogen_kg_colname]
        else : #
            val = \
                decorated_df.loc[idx - 1, cuml_hydrogen_demand_kg_colname] \
                + row[req_hydrogen_kg_colname]\
                - row[hydrogen_production_kg_colname]
        decorated_df.at[idx, cuml_hydrogen_demand_kg_colname] = val
        
    decorated_df[cuml_h2_energy_demand_mwh_colname] = \
        (decorated_df[cuml_hydrogen_demand_kg_colname] * ELEC_EFF_H2_FUEL_CELL_KWH_PER_KG) / 1000
    
    return decorated_df

In [21]:
h2_hourly_df = calc_h2_cols(final_load_and_vre_df)

In [23]:
h2_hourly_df.head(24)

Unnamed: 0,date,load_mw,wind_mw,solar_mw,month,day_of_month,day_of_yr,total_vre_mw,critical_load_mw,critical_load_less_vre_mw,surplus_vre,reqd_hydrogen_kg,hydrogen_prod_kg,hydrogen_prod_m3,hydrogen_h2o_demand_l,cuml_hydrogen_demand_kg,cuml_h2_energy_demand_mwh
0,2017-01-01 00:00:00,35.1,0.671864,0.0,1,1,1,0.671864,11.7,11.028136,0.0,551.682641,0.0,0.0,0.0,551.683,11.0281
1,2017-01-01 01:00:00,35.1,0.671864,0.0,1,1,1,0.671864,11.7,11.028136,0.0,551.682641,0.0,0.0,0.0,1103.37,22.0563
2,2017-01-01 02:00:00,34.6,0.493308,0.0,1,1,1,0.493308,11.533333,11.040025,0.0,552.277405,0.0,0.0,0.0,1655.64,33.0963
3,2017-01-01 03:00:00,34.2,0.436772,0.0,1,1,1,0.436772,11.4,10.963228,0.0,548.435618,0.0,0.0,0.0,2204.08,44.0595
4,2017-01-01 04:00:00,33.8,0.58604,0.0,1,1,1,0.58604,11.266667,10.680627,0.0,534.298483,0.0,0.0,0.0,2738.38,54.7402
5,2017-01-01 05:00:00,33.4,0.448116,0.0,1,1,1,0.448116,11.133333,10.685217,0.0,534.528131,0.0,0.0,0.0,3272.9,65.4254
6,2017-01-01 06:00:00,33.3,0.365421,0.0,1,1,1,0.365421,11.1,10.734579,0.0,536.997439,0.0,0.0,0.0,3809.9,76.1599
7,2017-01-01 07:00:00,33.8,0.736516,0.0,1,1,1,0.736516,11.266667,10.530151,0.0,526.770919,0.0,0.0,0.0,4336.67,86.6901
8,2017-01-01 08:00:00,34.7,0.6375,12.504,1,1,1,13.1415,11.566667,0.0,1.574833,0.0,28.22282,67.170311,94.038435,4308.45,86.1259
9,2017-01-01 09:00:00,34.7,0.471112,39.808,1,1,1,40.279112,11.566667,0.0,28.712445,0.0,514.559952,1224.652686,1714.513761,3793.89,75.8399


In [13]:
def get_h2_storage_characteristics(df):
    
    
    max_h2
    return h2_characteristics

In [18]:
h2_hourly_df[[hydrogen_production_kg_colname, hydrogen_h2o_demand_l_colname]].head(24)

Unnamed: 0,hydrogen_prod_kg,hydrogen_h2o_demand_l
0,0.0,0.0
1,0.0,0.0
2,0.0,0.0
3,0.0,0.0
4,0.0,0.0
5,0.0,0.0
6,0.0,0.0
7,0.0,0.0
8,28.22282,39.511947
9,514.559952,720.383933


In [14]:
h2_daily_df = h2_hourly_df.groupby(day_of_yr_colname).sum().reset_index()
h2_daily_df


Unnamed: 0,day_of_yr,load_mw,wind_mw,solar_mw,month,day_of_month,total_vre_mw,critical_load_mw,critical_load_less_vre_mw,surplus_vre,reqd_hydrogen_kg,hydrogen_prod_kg,hydrogen_h2o_demand_l
0,1,949.3,13.492779,533.148,24,24,546.640779,316.433333,162.933122,393.140568,8150.731472,7045.529888,9863.741843
1,2,1014.8,51.360884,616.624,24,48,667.984884,338.266667,152.416304,482.134521,7624.627514,8640.403608,12096.565051
2,3,1019.5,34.963272,412.396,24,72,447.359272,339.833333,163.603925,271.129864,8184.288411,4858.958136,6802.541391
3,4,1007.8,43.555568,497.956,24,96,541.511568,335.933333,151.031935,356.610169,7555.374421,6390.863250,8947.208550
4,5,1001.6,30.290704,459.960,24,120,490.250704,333.866667,170.211064,326.595101,8514.810605,5852.958805,8194.142327
...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,361,929.3,1.490697,567.080,288,648,568.570697,309.766667,171.035802,429.839832,8556.068157,7703.222803,10784.511924
361,362,916.9,12.214012,613.212,288,672,625.426012,305.633333,165.524307,485.316986,8280.355538,8697.437030,12176.411842
362,363,953.5,5.038729,546.812,288,696,551.850729,317.833333,171.127032,405.144428,8560.631911,7260.652827,10164.913957
363,364,906.5,25.316527,552.296,288,720,577.612527,302.166667,153.396710,428.842570,7673.672329,7685.350717,10759.491004
