## Used to Develop New Benchmarking Features
### Alan's Notebook
Assumes that `df_raw.pkl`, `df_processed.pkl`, and `util_obj.pkl` are located in the directory above this one.

In [1]:
import pickle
import time
import os
import io
import requests
import pandas as pd
import numpy as np
from importlib import reload
import sys
from datetime import datetime

import matplotlib.pyplot as plt
%matplotlib inline

# bench_util module is in the parent directory, because it will be part of
# the production code.  Add the parent directory to the Path so Python can
# import from there
sys.path.insert(0, '../')   
import bench_util as bu
import graph_util as gu


In [2]:
reload(bu)

<module 'bench_util' from '../bench_util.py'>

In [3]:
# Unpickle the raw DataFrame
df_raw = pd.read_pickle('../df_raw.pkl')

# Unpickle the processed DataFrame.
dfp = pd.read_pickle('../df_processed.pkl')

# Unpickle the Utility object
ut = pickle.load(open('../util_obj.pkl', 'rb'))

In [4]:
# Show the first record of the raw dataframe
df_raw.iloc[0]

Site ID                                              CLXGP2
Site Name                 FNSB-CLRA River Park General Area
Vendor Code                                        VU797000
Vendor Name                         Army Corps of Engineers
Account Number                                01 River Park
Bill Date                                        01/31/2010
Due Date                                         01/31/2010
Entry Date                                       01/13/2011
Invoice #                                               NaN
Voucher #                                               NaN
From                                    2009-12-31 00:00:00
Thru                                    2010-01-31 00:00:00
Service Name                                    Electricity
Item Description                              Energy charge
Meter Number                                            NaN
Usage                                                     0
Cost                                    

In [5]:
# Show the first record of the processed dataframe
dfp.iloc[0]

site_id                    03
service_type      electricity
cal_year                 2009
cal_mo                     12
item_desc       Energy charge
units                     kWh
cost                  2607.98
usage                 14360.6
mmbtu                 48.9985
fiscal_year              2010
fiscal_mo                   6
Name: 0, dtype: object

In [6]:
# Show type information and counts of processed dataframe
dfp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64343 entries, 0 to 64342
Data columns (total 11 columns):
site_id         64343 non-null object
service_type    64343 non-null object
cal_year        64343 non-null int64
cal_mo          64343 non-null int64
item_desc       64343 non-null object
units           64343 non-null object
cost            64343 non-null float64
usage           64343 non-null float64
mmbtu           64343 non-null float64
fiscal_year     64343 non-null int64
fiscal_mo       64343 non-null int64
dtypes: float64(3), int64(4), object(4)
memory usage: 5.4+ MB


In [7]:
# Show stats for numeric columns
dfp.describe()

Unnamed: 0,cal_year,cal_mo,cost,usage,mmbtu,fiscal_year,fiscal_mo
count,64343.0,64343.0,64343.0,64343.0,64343.0,64343.0,64343.0
mean,2014.018697,6.497941,1450.56796,11312.06,51.650916,2014.517119,6.516871
std,2.484807,3.448452,3878.98522,43765.35,1357.65548,2.515183,3.463266
min,2009.0,1.0,-15844.611452,-6104.919,0.0,2009.0,1.0
25%,2012.0,4.0,20.0,0.0,0.0,2012.0,3.0
50%,2014.0,6.0,126.37679,0.0,0.0,2015.0,7.0
75%,2016.0,9.0,876.723269,1496.329,1.005428,2017.0,10.0
max,2018.0,12.0,149076.345136,1946610.0,239533.571429,2019.0,12.0


In [8]:
# Show counts of service types
dfp.service_type.value_counts()

electricity      27682
water            13502
sewer             7835
fuel_oil          5965
refuse            4448
natural_gas       3346
district_heat     1565
Name: service_type, dtype: int64

In [9]:
dfp.item_desc.value_counts()

Other Charge             23892
Energy charge            10530
Water Usage (Gallons)     7825
KW Charge                 7802
Fuel Oil #1 (Gallons)     5965
Sewer Usage (Gallons)     2479
Refuse (Loads)            1823
Natural gas (CCF)         1802
Electricity charge         631
Steam (MMBtu) CDHW         536
Steam (klbs)               448
Steam (lbs)                285
Water (kGal)               171
Refuse (Tons)              119
Utility Charge              21
Demand Charge               11
Water (Cgallons)             3
Name: item_desc, dtype: int64

In [10]:
pd.pivot_table(dfp, values='site_id', index='service_type', columns='units', aggfunc='count')

units,-,CCF,Cgallons,Gallons,Loads,MMBtu,Tons,kGal,kW,kWh,klbs,lbs
service_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
district_heat,332.0,,,,,517.0,,,,,441.0,275.0
electricity,14914.0,,,,,,,,3157.0,9611.0,,
fuel_oil,107.0,,,5858.0,,,,,,,,
natural_gas,2121.0,1225.0,,,,,,,,,,
refuse,2640.0,,,,1772.0,,36.0,,,,,
sewer,5872.0,,,1963.0,,,,,,,,
water,6998.0,,3.0,6364.0,,,,137.0,,,,


In [11]:
# See how the building_info() method of the Utility object works
ut.building_info('03')

{'site_name': 'SD-Denali Elementary',
 'site_category': 'School District',
 'address': nan,
 'city': nan,
 'group': 'School District',
 'division': nan,
 'campus': nan,
 'primary_func': 'Elementary School',
 'year_built': nan,
 'sq_ft': 49210.0,
 'onsite_gen': nan,
 'dd_site': 'PAFA',
 'full_address': nan,
 'source_fuel_oil': 'Sourdough Fuel  (Petro Star)',
 'source_natural_gas': 'Fairbanks Natural Gas',
 'source_electricity': 'Golden Valley Electric',
 'source_propane': '',
 'source_wood': '',
 'source_district_heat': '',
 'source_coal': '',
 'source_water': 'Golden Heart Utilities',
 'source_sewer': 'Golden Heart Utilities',
 'source_refuse': 'Alaska Waste',
 'acct_fuel_oil': '55010001',
 'acct_natural_gas': '10283 - DENALI',
 'acct_electricity': '172230',
 'acct_propane': '',
 'acct_wood': '',
 'acct_district_heat': '',
 'acct_coal': '',
 'acct_water': '1998005610',
 'acct_sewer': '1998005610',
 'acct_refuse': 'AW23-DNL03',
 'site_id': '03'}

## Work on Preprocessing Routine

In [12]:
start_time = None
def msg(the_message):
    """Prints a message to the console, along cumulative elapsed time
    since the script started.
    """
    print('{} ({:.1f} s)'.format(the_message, time.time() - start_time))

In [42]:
# Set up Dummy Settings Object
class Dummy:
    pass

settings = Dummy()
settings.UTILITY_BILL_FILE_PATH = '/home/tabb99/fnsb_data/20190306_AllDataExport.CSV'
settings.OTHER_DATA_DIR_PATH = '/home/tabb99/fnsb_data/'
settings.ADDITIONAL_GROUPING_COLS = ['group', 'division', 'campus', 'primary_func']
#settings.ADDITIONAL_GROUPING_COLS = ['campus']

In [15]:
"""Loads and processes the Utility Bill data into a smaller and more usable
form.  Returns
    - a DataFrame with the raw billing data,
    - a DataFrame with the preprocessed data,
    - and a bench_util.Util object, which provides useful functions to
        the analysis portion of this script.

This the "preprocess_data.ipynb" was used to develop this code and shows
intermdediate results from each of the steps.
"""

start_time = time.time()

# --- Read the CSV file and convert the billing period dates into 
#     real Pandas dates
fn = settings.UTILITY_BILL_FILE_PATH
msg('Starting to read Utility Bill Data File.')
dfu = pd.read_csv(fn, 
                  parse_dates=['From', 'Thru'],
                  dtype={'Site ID': 'object', 'Account Number': 'object'}
                )

#--- Make a utility function object
msg('Make an Object containing Useful Utility Functions.')
dn = settings.OTHER_DATA_DIR_PATH
ut = bu.Util(dfu, dn)

msg('Removing Unneeded columns and Combining Charges.')

# Filter down to the needed columns and rename them
cols = [
    ('Site ID', 'site_id'),
    ('From', 'from_dt'),
    ('Thru', 'thru_dt'),
    ('Service Name', 'service_type'),
    ('Item Description', 'item_desc'),
    ('Usage', 'usage'),
    ('Cost', 'cost'),
    ('Units', 'units'),
]

old_cols, new_cols = zip(*cols)         # unpack into old and new column names
dfu1 = dfu[list(old_cols)].copy()       # select just those columns from the origina dataframe
dfu1.columns = new_cols                 # rename the columns

# --- Collapse Non-Usage Changes into "Other Charge"

# This cuts the processing time in half due to not having to split a whole 
# bunch of non-consumption charges.
dfu1.loc[np.isnan(dfu1.usage), 'item_desc'] = 'Other Charge'
# Pandas can't do a GroupBy on NaNs, so replace with something
dfu1.units.fillna('-', inplace=True)   
dfu1 = dfu1.groupby(['site_id', 
                     'from_dt', 
                     'thru_dt', 
                     'service_type', 
                     'item_desc', 
                     'units']).sum()
dfu1.reset_index(inplace=True)

# --- Split Each Bill into Multiple Pieces, each within one Calendar Month

msg('Split Bills into Calendar Month Pieces.')
# Split all the rows into calendar month pieces and make a new DataFrame
recs=[]
for ix, row in dfu1.iterrows():
    # it is *much* faster to modify a dictionary than a Pandas series
    row_tmpl = row.to_dict()   

    # Pull out start and end of billing period; can drop the from & thru dates now
    # doing split-up of billing period across months.
    st = row_tmpl['from_dt']
    en = row_tmpl['thru_dt']
    del row_tmpl['from_dt']
    del row_tmpl['thru_dt']

    for piece in bu.split_period(st, en):
        new_row = row_tmpl.copy()
        new_row['cal_year'] = piece.cal_year
        new_row['cal_mo'] = piece.cal_mo
        # new_row['days_served'] = piece.days_served    # not really needed
        new_row['usage'] *= piece.bill_frac
        new_row['cost'] *= piece.bill_frac
        recs.append(new_row)

dfu2 = pd.DataFrame(recs, index=range(len(recs)))

# --- Sum Up the Pieces by Month
dfu3 = dfu2.groupby(
    ['site_id', 'service_type', 'cal_year', 'cal_mo', 'item_desc', 'units']
).sum()
dfu3 = dfu3.reset_index()

# --- Add MMBtus Fiscal Year Info and MMBtus
msg('Add MMBtu Information.')
mmbtu = []
for ix, row in dfu3.iterrows():
    row_mmbtu = ut.fuel_btus_per_unit(row.service_type, row.units) * row.usage / 1e6
    if np.isnan(row_mmbtu): row_mmbtu = 0.0
    mmbtu.append(row_mmbtu)
dfu3['mmbtu'] = mmbtu

# Now that original service types have been used to determine MMBtus,
# convert all service types to standard service types.
dfu3['service_type'] = dfu3.service_type.map(ut.service_to_category())

# This may cause multiple rows for a fiscal month and service type.
# Re-sum to reduce to least number of rows.
dfu4 = dfu3.groupby(
    ['site_id', 'service_type', 'cal_year', 'cal_mo', 'item_desc', 'units']
).sum()
dfu4 = dfu4.reset_index()

# Add columns that indicate what type of grouping is being done
dfu4['group'] = 'facility'
dfu4.head()

Starting to read Utility Bill Data File. (0.0 s)
Make an Object containing Useful Utility Functions. (22.9 s)
Removing Unneeded columns and Combining Charges. (30.8 s)
Split Bills into Calendar Month Pieces. (30.9 s)
Add MMBtu Information. (203.0 s)


Unnamed: 0,site_id,service_type,cal_year,cal_mo,item_desc,units,cost,usage,mmbtu,group
0,3,electricity,2009,12,Energy charge,kWh,2607.979752,14360.630515,48.998471,facility
1,3,electricity,2010,1,Energy charge,kWh,7849.39409,43317.459347,147.799171,facility
2,3,electricity,2010,2,Energy charge,kWh,7344.389601,39700.93871,135.459603,facility
3,3,electricity,2010,3,Energy charge,kWh,7404.968981,38988.804762,133.029802,facility
4,3,electricity,2010,4,Energy charge,kWh,6472.904545,35895.878788,122.476738,facility


In [43]:
df_all_groups = pd.DataFrame()
# now create rows for the other grouping columns.
for gp_col in settings.ADDITIONAL_GROUPING_COLS:
    dfu_gp = dfu4.copy()
    dfu_gp['group'] = gp_col
    
    # get a dictionary mapping the site_id into the group id
    map_to_group = ut.site_to_col_value_dict(gp_col)
    
    # fill out the 'site_id' column with group values
    dfu_gp['site_id'] = dfu_gp['site_id'].map(map_to_group)
    
    # Only keep rows that have a site_id
    dfu_gp = dfu_gp.loc[dfu_gp.site_id.notna()]
    
    dfu_gp2 = dfu_gp.groupby(
        ['group', 'site_id', 'service_type', 'cal_year', 'cal_mo', 'item_desc', 'units']
    ).sum()
    dfu_gp2.reset_index(inplace=True)
    df_all_groups = pd.concat([df_all_groups, dfu_gp2], sort=True, ignore_index=True)

print(df_all_groups.group.value_counts())
# add these records to the prior list of facility records
dfu4 = pd.concat([dfu4, df_all_groups], sort=True)

# Add the fiscal year information
msg('Add Fiscal Year Information.')
fyr = []
fmo = []
for cyr, cmo in zip(dfu4.cal_year, dfu4.cal_mo):
    fis_yr, fis_mo = bu.calendar_to_fiscal(cyr, cmo)
    fyr.append(fis_yr)
    fmo.append(fis_mo)
dfu4['fiscal_year'] = fyr
dfu4['fiscal_mo'] = fmo

msg('Preprocessing complete!')

primary_func    14051
division         5621
campus           4296
group            4094
Name: group, dtype: int64
Add Fiscal Year Information. (5054.4 s)
Preprocessing complete! (5055.0 s)


In [44]:
dfu4.head()

Unnamed: 0,cal_mo,cal_year,cost,fiscal_mo,fiscal_year,group,item_desc,mmbtu,service_type,site_id,units,usage
0,12,2009,2607.979752,6,2010,facility,Energy charge,48.998471,electricity,3,kWh,14360.630515
1,1,2010,7849.39409,7,2010,facility,Energy charge,147.799171,electricity,3,kWh,43317.459347
2,2,2010,7344.389601,8,2010,facility,Energy charge,135.459603,electricity,3,kWh,39700.93871
3,3,2010,7404.968981,9,2010,facility,Energy charge,133.029802,electricity,3,kWh,38988.804762
4,4,2010,6472.904545,10,2010,facility,Energy charge,122.476738,electricity,3,kWh,35895.878788


In [45]:
dfu4.group.value_counts()

facility         514744
primary_func     126459
division          50589
campus            38664
site_category     32240
group              4094
Name: group, dtype: int64

## Work on Utility Object Changes needed to support Grouping

In [19]:
# List of all possible services
all_services = [
    'fuel_oil',
    'natural_gas',
    'electricity',
    'propane',
    'wood',
    'district_heat',
    'coal',
    'water',
    'sewer',
    'refuse'
]

all_energy_services = [
    'fuel_oil',
    'natural_gas',
    'electricity',
    'propane',
    'wood',
    'district_heat',
    'coal'
]

all_heat_services = [
    'fuel_oil',
    'natural_gas',
    'propane',
    'wood',
    'district_heat',
    'coal'
]

def calendar_to_fiscal(cal_year, cal_mo):
    """Converts a calendar year and month into a fiscal year and month.
    Returns (fiscal_year, fical_month) tuple.
    """
    if cal_mo <= 6:
        fiscal_year = cal_year
        fiscal_month = cal_mo + 6
    else:
        fiscal_year = cal_year + 1
        fiscal_month = cal_mo - 6

    return fiscal_year, fiscal_month


In [28]:
#    def __init__(self, util_df, other_data_pth, addl_grouping_cols=[]):
self = Dummy()
util_df = dfu
other_data_pth = '/home/tabb99/fnsb_data/'
addl_grouping_cols = ['campus']
"""
util_df: DataFrame containing the raw utility bill data
other_data_pth: path to the directory containing other application data spreadsheets,
    building info, degree days, etc.
addl_grouping_cols: Additional columns in the Buildings.xlsx spread sheet that
    are used for producing grouped reports (e.g. campus, division).
"""

# Get Service Type information and create a Fuel Btu dictionary as an
# object attribute.  Keys are fuel type, fuel unit, both in lower case.
# Also create a dictionary mapping service types to standard service 
# type category names.  
df_services = pd.read_excel(os.path.join(other_data_pth, 'Services.xlsx'), sheet_name='Service Types', skiprows=3)
self._fuel_btus = {}
for ix, row in df_services.iterrows():
    # Only put energy services into fuel btu dictionary
    if row.btu_per_unit > 0.0:
        self._fuel_btus[(row.service.lower(), row.unit.lower())] = row.btu_per_unit

# Make a dictionary mapping Service Type to Service Type Category
# For duplicate service type entries, this will take the last category.
self._service_to_category = dict(zip(df_services.service, df_services.category))

# Make a dictionary that maps the standard Service Category for fuels
# to the standard display units and the Btus per unit for that fuel unit.
# The keys are the standardized service type names, but only include energy
# producing fuels (not water, refuse, etc.).  The values are a two-tuple:
# (unit, Btus/unit).
df_svc_cat_info = pd.read_excel(os.path.join(other_data_pth, 'Services.xlsx'),
    sheet_name='Service Categories', skiprows=3)
ky_val = zip(df_svc_cat_info.category, zip(df_svc_cat_info.unit, df_svc_cat_info.btu_per_unit))
self._service_cat_info = dict(ky_val)

# Read in the Building Information. Ensure site_id is a string.
df_bldg = pd.read_excel(
        os.path.join(other_data_pth, 'Buildings.xlsx'), 
        skiprows=3, 
        converters={'site_id': str},
        )
df_bldg.set_index('site_id', inplace=True)

# Add a full address column, combo of address and city.
df_bldg['full_address'] = df_bldg.address.str.strip() + ', ' + \
    df_bldg.city.str.strip()
# now remove any leading or trailing commas.
df_bldg.full_address = df_bldg.full_address.str.strip(',') 

# Create a dictionary to hold info for each building
# The keys of the dictionary are the columns from the spreadsheet that
# was just read, but also fields that hold service provider names and
# account numbers.
src_list = ['source_{}'.format(s) for s in all_services]
acct_list = ['acct_{}'.format(s) for s in all_services]
dict_keys = list(df_bldg.columns) + src_list + acct_list + ['facility_list', 'grouping']

# make a dictionary with default values for all fields (use empty
# string for defaults)
default_info = dict(zip(dict_keys, [''] * len(dict_keys)))

In [29]:
def find_src_acct(dfs, service_cat):
    """Function used below to return service provider and account
    numbers for a particular service type.  'dfs' is a DataFrame that
    has only the records for one site.  'service_type' is the name of
    the service, e.g. 'Water'.  (provider name, account numbers) are
    returned.
    """
    try:
        # add in the service type category
        dfs2 = dfs.copy()
        dfs2['svc_cat'] = dfs2['Service Name'].map(self._service_to_category)
        df_svc = dfs2[dfs2.svc_cat==service_cat]
        last_bill_date = df_svc.Thru.max()
        df_last_bill = df_svc[df_svc.Thru == last_bill_date]

        # could be multiple account numbers. Get them all and
        # separate with commas
        accts = df_last_bill['Account Number'].unique()
        acct_str = ', '.join(accts)
        # Assume only one provider.
        provider = df_last_bill['Vendor Name'].iloc[0]

        return provider, acct_str

    except:
        return '', ''

# create a dictionary to map site_id to info about the building
self._bldg_info = {}

# separately, create a list that will be used to make a DataFrame
# that also contains this info.
rec_list = []

for ix, row in df_bldg.iterrows():
    # Start the record of building information (as a dictionary)
    # and fill out the info from the spreadsheet first.
    rec = default_info.copy()
    rec.update(row.to_dict())
    # these records are part of the facility grouping
    rec['grouping'] = 'facility'

    # now find providers and account numbers from raw utility file.
    df_site = util_df[util_df['Site ID']==ix]
    for svc_cat in all_services:
        source, accounts = find_src_acct(df_site, svc_cat)
        #print('source_{}'.format(svc_cat), 'acct_{}'.format(svc_cat))
        rec['source_{}'.format(svc_cat)] = source
        rec['acct_{}'.format(svc_cat)] = accounts
    self._bldg_info[row.name] = rec

    # add in the site_id to the record so the DataFrame has this
    # column.
    rec['site_id'] = row.name
    rec_list.append(rec)

In [31]:
# --- Add in information for the other grouping columns, which combine together
# sites. Use the value in the grouping column 
# as the 'site_id' for the new record.
# This presumes that group items are unique across all the additional
# groups.  e.g. 'FNSB' is a site_category, but it can't appear as a 
# campus or division.
df_just_facilities = pd.DataFrame(rec_list)  # make a DataFrame of the records so far
for gp_col in addl_grouping_cols:
    
    # Loop through the possible values in this group
    for id in df_bldg[gp_col].unique():
        
        # Don't process NaN entries
        if type(id) != str:
            continue
    
        rec = default_info.copy()
        rec['site_id'] = id
        rec['site_name'] = id    # reuse the group id as the site name as well.
        rec['grouping'] =  gp_col

        # get a DataFrame of all the site records that are part of this group.
        df_sites_for_gp = df_just_facilities[df_just_facilities[gp_col]==id]
        
        # fill out fields of info that involve all of the sites.
        rec['facility_list'] = ', '.join(df_sites_for_gp.site_name.values)
        
        # Do the utilities and the account numbers
        for col in src_list + acct_list:
            rec[col] = ', '.join(df_sites_for_gp[col].unique())

        # Square feet totals
        rec['sq_ft'] = df_sites_for_gp.sq_ft.sum()
        
        # Use the degree-day site that is most prevalent, it appears 
        # first in the value_counts.
        rec['dd_site'] = df_sites_for_gp.dd_site.value_counts().index[0]
        
        self._bldg_info[id] = rec
        rec_list.append(rec)

# Make a DataFrame, indexed on site_id to hold this building info
# as well.
self._bldg_info_df = pd.DataFrame(rec_list)
self._bldg_info_df.set_index('site_id', inplace=True)

# make a list of site categories and their associated buildings
df_sites = df_bldg.reset_index()[['site_id', 'site_name', 'site_category']]
cats = df_sites.groupby('site_category')
self._site_categories = []
for nm, gp in cats:
    bldgs = list(zip(gp['site_name'], gp['site_id']))
    bldgs.sort()
    sites = []
    for site_name, site_id in bldgs:
        sites.append(dict(id=site_id, name=site_name))
    self._site_categories.append( {'name': nm, 'sites': sites} )

# read in the degree-day info from AHFC's online file
resp = requests.get('http://ahfc.webfactional.com/data/degree_days.pkl').content
df_dd = pd.read_pickle(io.BytesIO(resp), compression='bz2')

# make a dictionary keyed on fiscal_yr, fiscal_mo, site_id
# with a value of degree days.
self._dd = {}
for ix, row in df_dd.iterrows():
    f_yr, f_mo = calendar_to_fiscal(row.month.year, row.month.month)
    self._dd[(f_yr, f_mo, ix)] = row.hdd65

In [53]:
dfsc = pd.DataFrame(list(self._bldg_info.values()))
dfsc = dfsc[['site_id', 'site_name', 'site_category']]
dfsc.site_category.value_counts()

Other              101
School District     40
                     5
Name: site_category, dtype: int64

In [32]:
self._bldg_info['Pioneer Park']

{'site_name': 'Pioneer Park',
 'site_category': '',
 'address': '',
 'city': '',
 'group': '',
 'division': '',
 'campus': '',
 'primary_func': '',
 'year_built': '',
 'sq_ft': 78540.0,
 'onsite_gen': '',
 'dd_site': 'PAFA',
 'full_address': '',
 'source_fuel_oil': ', Sourdough Fuel  (Petro Star)',
 'source_natural_gas': '',
 'source_electricity': ', Golden Valley Electric',
 'source_propane': '',
 'source_wood': '',
 'source_district_heat': '',
 'source_coal': '',
 'source_water': 'Golden Heart Utilities, ',
 'source_sewer': 'Golden Heart Utilities, ',
 'source_refuse': '',
 'acct_fuel_oil': ', 39384006, 39384027, 39384001, 39384002, 39384003, 39384062, 39384004, 39384005, 39384060',
 'acct_natural_gas': '',
 'acct_electricity': ', 340325, 182918, 197933, 161849',
 'acct_propane': '',
 'acct_wood': '',
 'acct_district_heat': '',
 'acct_coal': '',
 'acct_water': '1998038100, 1998039000, , 1998037700, 1998037900, 1998038710, 1998038300, 1998038800, 1998038900, 1998037410',
 'acct_sewer'