# Process Electricity Consumption

In [189]:
# Data processing
import numpy as np
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta

import locale
locale.setlocale(locale.LC_ALL, 'en_US.UTF8')

# Common DGLIM utilities
import os, sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))
import dglim
dglim.setProjectPath('../../')

In [11]:
file_path = dglim.datasets_path + 'GRU Electric Data/'

premises_df             = pd.read_csv(file_path + 'PREMISE.csv')
contracts_df            = pd.read_csv(file_path + 'CONTRACT.csv')
installations_df        = pd.read_csv(file_path + 'INSTALL.csv')
consumption_df          = pd.read_csv(file_path + 'CONSUMPTION.csv')
contract_accounts_df    = pd.read_csv(file_path + 'CONTRACT_ACCOUNT.csv')
business_partners_df    = pd.read_csv(file_path + 'BUSINESS_PARTNER.csv')
dunning_disconnects_df  = pd.read_csv(file_path + 'DUNNING_DISCONNECTS.csv')

# Non-residential contract accounts
non_res_contract_accounts_df = contract_accounts_df[contract_accounts_df['CA_TYPE'] == 'NR']

# Non-residential contracts
business_contracts_df = contracts_df[contracts_df['CA'].isin(non_res_contract_accounts_df['CA'])]

# Non-residential installs
business_intalls_df = installations_df[installations_df['INSTALL'].isin(business_contracts_df['INSTALL'])]

# Non-residential premises
non_res_premises_df = premises_df[premises_df['PREMISE'].isin(business_intalls_df['PREMISE'])]
non_res_premises_df = non_res_premises_df[(non_res_premises_df['PREMISETYPE'] != 'XTELE') & 
                                          (non_res_premises_df['PREMISETYPE'] != 'RCMUSE')]

# Non-residential consumption
business_consumption_df = consumption_df[consumption_df['CONTRACT'].isin(business_contracts_df['CONTRACT'])]

In [48]:
len(non_res_premises['PARCEL'].unique())

4384

## Calculate Per-Parcel Change in Electricity Consumption

*** Collect monthly consumption for each parcel ***

In [142]:
# Get rid of bad data
bad_parcel_values = [
    np.nan,
    '00000-000-000',
    '55555-555-555',
    '66666-666-666',
    '77777-777-777',
    '88888-888-888',
    '99999-999-999',
    'UFCAM-PUS-000',
    'UFCAM-PUS-XXX'
]

premises_df['PARCEL'] = premises_df['PARCEL'] \
    .drop(axis='index', labels=premises_df.index[premises_df['PARCEL'].isin(bad_parcel_values)]) \
    .apply(lambda x: x.replace('-', ''))

# Merge datasets around "CONTRACT"
contract_details_df = business_contracts_df \
    .merge(contract_accounts_df[['CA', 'BP']], on='CA', how='inner') \
    .merge(business_partners_df[['BP', 'FIRST_NAME', 'LAST_NAME']], on='BP', how='inner') \
    .merge(business_consumption_df[['CONTRACT', 'CONSUMPTION', 'UNIT', 'BILLINGPERIOD']], on='CONTRACT', how='inner') \
    .merge(installations_df[installations_df['INSTALLTYPE'] == 'ELEC'][['INSTALL', 'PREMISE']], on='INSTALL', how='inner') \
    .merge(premises_df[['PREMISE', 'PARCEL']], on='PREMISE', how='inner')

# We only need consumption by parcel
parcel_consumption_df = contract_details_df[[
#         'INSTALL',
#         'CONTRACT',
#         'FIRST_NAME',
#         'LAST_NAME',
        'CONSUMPTION',
        'BILLINGPERIOD',
        'PARCEL'
    ]].rename(columns={
        'CONSUMPTION' : 'Consumption in KWH',
        'BILLINGPERIOD' : 'Billing Period',
        'PARCEL' : 'Parcel'
    }).set_index('Parcel')

# Format dates
parcel_consumption_df['Billing Period'] = parcel_consumption_df['Billing Period'].apply(
    lambda x: datetime.strptime(x, '%Y/%m'))

# Turn strings into usable numbers
parcel_consumption_df['Consumption in KWH'] = parcel_consumption_df['Consumption in KWH'].astype(str).apply(locale.atoi)

parcel_consumption_df[:5]

Unnamed: 0_level_0,Consumption in KWH,Billing Period
Parcel,Unnamed: 1_level_1,Unnamed: 2_level_1
8230004015,11,2007-06-01
8230004015,0,2007-05-01
8230004015,19,2007-07-01
8230004015,399,2007-08-01
8230004015,1227,2007-09-01


*** Group consumption by parcel ***

Note - rather than grouping by parcel, it would be better to group by "last name" so that we can parse out consumption for individual businesses (where possible), but the "last names" listed do not match well with business names in other datasets. A more intelligent matching algorithm would be required.

Since we have only collected non-residential consumption, however, parcels with only one business partner should not be affected as long as they are actually in that parcel.

In [199]:
# Only look at the change in consumption between July and August; September/October data is incomplete
recent_periods = [
    datetime(year=2017, month=7, day=1),
    datetime(year=2017, month=8, day=1)
]

# Filter out all other months
recent_parcel_consumption_df = parcel_consumption_df[parcel_consumption_df['Billing Period'].isin(recent_periods)]
recent_parcel_consumption_df = recent_parcel_consumption_df.groupby(['Parcel', 'Billing Period']).mean()
recent_parcel_consumption_df[:4]

Unnamed: 0_level_0,Unnamed: 1_level_0,Consumption in KWH
Parcel,Billing Period,Unnamed: 2_level_1
2921001000,2017-07-01,48288.25
2921001000,2017-08-01,42621.25
3218010000,2017-07-01,164.0
3218010000,2017-08-01,181.5


*** Find recent trends in consumption ***

In [215]:
# Format things nicely for calculation
trends_df = recent_parcel_consumption_df.unstack()
trends_df.columns = trends_df.columns.droplevel()
trends_df.columns.name = None

# Calculate recent trends
trends_df['Average Change in KWH'] = trends_df[recent_periods[1]] - trends_df[recent_periods[0]]
trends_df['Percent Change in KWH'] = trends_df[recent_periods[1]] / trends_df[recent_periods[0]] - 1

# Use month names instead of numbers
trends_df = trends_df.rename(columns={
    recent_periods[0]: recent_periods[0].strftime('%B'),
    recent_periods[1]: recent_periods[1].strftime('%B')
})

dglim.saveData(trends_df, 'Parcel Electricity Consumption')

print len(trends_df), 'commercial parcels identified'
print len(trends_df.dropna()), 'of those parcels contain recent consumption data'

trends_df

3385 commercial parcels identified
2990 of those parcels contain recent consumption data


Unnamed: 0_level_0,July,August,Average Change in KWH,Percent Change in KWH
Parcel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
02921001000,48288.250000,42621.250000,-5667.000000,-0.117358
03218010000,164.000000,181.500000,17.500000,0.106707
03755000000,691.000000,865.000000,174.000000,0.251809
04193003000,10305.000000,9876.666667,-428.333333,-0.041566
04193004010,305.500000,501.000000,195.500000,0.639935
04193005000,43260.000000,36960.000000,-6300.000000,-0.145631
04193005001,5603.000000,4060.000000,-1543.000000,-0.275388
04193006000,67.000000,41.000000,-26.000000,-0.388060
04193011000,207.500000,171.000000,-36.500000,-0.175904
04211002000,39080.500000,41121.500000,2041.000000,0.052226
