In [185]:
import pandas as pd
import numpy as np
import ast
import geo2ei

In [172]:
MONTHS = {'jan': 1, 'feb': 2, 'mar': 3, 'apr': 4, 'may': 5, 'jun': 6, 'jul': 7, 'aug': 8, 'sep': 9, 'oct': 10, 'nov': 11, 'dec': 12}

mixed_type_columns = [3, 5, 11, 28, 362, 392, 397, 402, 407, 412, 417, 437, 442, 447, 452, 457, 472, 477, 482, 492, 497, 502, 507, 517, 522, 527, 532, 542, 547, 552, 557, 562, 567, 572, 577, 582, 587, 592, 597, 602, 607, 612, 617, 622, 627, 632, 637, 642, 647, 652, 655, 668]

dtype_dict = {col: str for col in mixed_type_columns}

openei = pd.read_csv('../data/kentucky_rand.csv', dtype=dtype_dict)

openei['sourceparent'] = openei['sourceparent'].fillna('')


In [173]:
data_dict = {
    'utility': [],
    'type': [],
    'assessed': [],
    'period': [],
    'basic_charge_limit (imperial)': [],
    'basic_charge_limit (metric)': [],
    'month_start': [],
    'month_end': [],
    'hour_start': [],
    'hour_end': [],
    'weekday_start': [],
    'weekday_end': [],
    'charge (imperial)': [],
    'charge (metric)': [],
    'units': [],
    'Notes': []
}

In [174]:
def find_consecutive_ranges(lst):
    if not lst:
        return []

    ranges = []
    start = 0

    for i in range(1, len(lst)):
        if lst[i] != lst[start]:
            ranges.append((start, i - 1))
            start = i

    ranges.append((start, len(lst) - 1))

    return ranges

In [175]:
def process_demand(i):
    # processing the array for time intervals

    MONTH_ARRAY = ['flatDemandMonth_jan', 'flatDemandMonth_feb', 'flatDemandMonth_mar', 'flatDemandMonth_apr', 'flatDemandMonth_may', 'flatDemandMonth_jun', 'flatDemandMonth_jul', 'flatDemandMonth_aug', 'flatDemandMonth_sep', 'flatDemandMonth_oct', 'flatDemandMonth_nov', 'flatDemandMonth_dec']
    sched = {}
    for j in range(len(MONTH_ARRAY)):
        sched[j] = (openei[MONTH_ARRAY[j]][i])

    ranges = find_consecutive_ranges(list(sched.values()))
        
    if len(ranges) == 0:
        ranges = [(1, 12)]

    time_index = 0
    tier_index = 0
    charge_limit = 0

    while time_index < len(ranges):
        try:
            # tier_str first to catch ValueErrors from null values in the dataframe
            tier_str = 'flatdemandstructure/period'+ str(int(sched[ranges[time_index][0]])) + '/tier'+ str(tier_index)
            data_dict['month_start'].append(str(ranges[time_index][0]))
            data_dict['month_end'].append(str(ranges[time_index][1]))
            data_dict['utility'].append('electricity')
            data_dict['type'].append('demand')
            data_dict['assessed'].append('')
            data_dict['period'].append('')
            data_dict['basic_charge_limit (imperial)'].append(charge_limit)
            data_dict['basic_charge_limit (metric)'].append(charge_limit)
            data_dict['hour_start'].append('')
            data_dict['hour_end'].append('')
            # Not the case for all structures
            data_dict['weekday_start'].append('0')
            data_dict['weekday_end'].append('6')
            rate = openei[tier_str + 'rate'][i]
            if not np.isnan(openei[tier_str + 'adj'][i]):
                rate += openei[tier_str + 'adj'][i]
            data_dict['charge (imperial)'].append(rate)
            data_dict['charge (metric)'].append(rate)
            data_dict['units'].append(openei['flatdemandunit'][i])
            data_dict['Notes'].append('')


            max_str = 'flatdemandstructure/period' + str(int(sched[ranges[time_index][0]])) + '/tier'+ str(tier_index) + 'max'
            if not np.isnan(openei[max_str][i]):
                charge_limit = openei[max_str][i]
                tier_index += 1
            else:
                time_index += 1

        except (ValueError, KeyError):
            return

In [180]:
def unpack_array(lst, sched, str, i, units):
    time_index = 0
    tier_index = 0
    charge_limit = 0

    while time_index < len(lst):
        try:
            # tier_str first to catch ValueErrors from null values in the dataframe
            tier_str = str + '/period'+ str(int(sched[lst[time_index][0]])) + '/tier'+ str(tier_index)
            data_dict['month_start'].append(str(lst[time_index][0]))
            data_dict['month_end'].append(str(lst[time_index][1]))
            data_dict['utility'].append('electricity')
            data_dict['type'].append('demand')
            data_dict['assessed'].append('')
            data_dict['period'].append('')
            data_dict['basic_charge_limit (imperial)'].append(charge_limit)
            data_dict['basic_charge_limit (metric)'].append(charge_limit)
            data_dict['hour_start'].append('')
            data_dict['hour_end'].append('')
            # Not the case for all structures
            data_dict['weekday_start'].append('0')
            data_dict['weekday_end'].append('6')
            rate = openei[tier_str + 'rate'][i]
            if not np.isnan(openei[tier_str + 'adj'][i]):
                rate += openei[tier_str + 'adj'][i]
            data_dict['charge (imperial)'].append(rate)
            data_dict['charge (metric)'].append(rate)
            data_dict['units'].append(units)
            data_dict['Notes'].append('')


            max_str = str + '/period' + str(int(sched[lst[time_index][0]])) + '/tier'+ str(tier_index) + 'max'
            if not np.isnan(openei[max_str][i]):
                charge_limit = openei[max_str][i]
                tier_index += 1
            else:
                time_index += 1

        except (ValueError, KeyError):
            break

In [179]:
def process_TOU(i):
    # check if TOU data available
    
    if pd.isna(openei['demandweekdayschedule'][i]) and pd.isna(openei['demandweekendschedule'][i]):
        return

    weekday_sched = ast.literal_eval(openei['demandweekdayschedule'][i])
    weekend_sched = ast.literal_eval(openei['demandweekendschedule'][i])

    weekday_ranges = find_consecutive_ranges(weekday_sched)
    weekend_ranges = find_consecutive_ranges(weekend_sched)
    
    print(f"wd sched: {weekday_sched}, ranges length: {len(weekday_sched)}")
    print(f"we sched: {weekend_sched}, ranges length: {len(weekend_sched)}")

    print(f"wd ranges: {weekday_ranges}, ranges length: {len(weekday_ranges)}")
    print(f"we ranges: {weekend_ranges}, ranges length: {len(weekend_ranges)}")
    unpack_array(weekday_ranges, weekday_sched, 'demandratestructure', i, 'kW')

In [184]:

#for i in range(len(openei_dummy)):
# for i in range(23,24):
    # data_dict['utility'].append('electricity')
    # data_dict['type'].append('customer')
    # data_dict['assessed'].append('')
    # data_dict['period'].append('')
    # data_dict['basic_charge_limit (imperial)'].append('')
    # data_dict['basic_charge_limit (metric)'].append('')
    # data_dict['month_start'].append('')
    # data_dict['month_end'].append('')
    # data_dict['hour_start'].append('')
    # data_dict['hour_end'].append('')
    # data_dict['weekday_start'].append('')
    # data_dict['weekday_end'].append('')
    # data_dict['charge (imperial)'].append(openei['fixedchargefirstmeter'][i])
    # data_dict['charge (metric)'].append(openei['fixedchargefirstmeter'][i])
    # data_dict['units'].append('$/month')
    # data_dict['Notes'].append(openei['source'][i] + ('\t' + openei['sourceparent'][i] if openei['sourceparent'][i] != '' else ''))
    # process_demand(i)

process_TOU(40)

for key, value in data_dict.items():
    print(f"{key}: {len(value)}")
new_df = pd.DataFrame(data_dict)
print(new_df)

wd sched: [[0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], ranges length: 12
we sched: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

TypeError: int() argument must be a string, a bytes-like object or a real number, not 'list'