In [18]:
import pandas as pd
import numpy as np

## Functions

In [19]:
def merge_trade(x):
    if x[:3] == "TRD":
        return "TRD"
    if x == "INVEN":
        return 'NonIndustry'
    if x == 'NIT':
        return 'FER'
    else:
        return x

In [20]:
def change_sector_name(sector_name):
    
    if sector_name in ['Employee Compensation']:
        return 'LAB'
    elif sector_name in ['Capital']:
        return 'CAP'
    elif sector_name in ['Other Property Type Income']:
        return 'PROP'
    elif 'Propri' in sector_name:
        return 'PROF'
    elif 'Federal Government' in sector_name:
        return 'GOV_FED'
    elif 'State/Local Govt' in sector_name:
        return 'GOV_STT'
    elif 'FederalEnt' in sector_name:
        return 'GOV_FED_ENT'
    elif 'StateLocalEnt' in sector_name:
        return 'GOV_STT_ENT'
    elif 'FedEmploy' in sector_name:
        return 'GOV_FED_EMP'
    elif 'StateLocalEmploy' in sector_name:
        return 'GOV_STT_EMP'
#     elif 'gov' in sector_name.lower():
#         return 'GOV'
    elif 'inven' in sector_name.lower():
        return 'INVEN'
    elif 'Households' in sector_name:
        return 'HOH'
    elif 'corp' in sector_name.lower():
        return 'CORP'
    elif 'Not an industry' in sector_name:
        return 'OtherIND'
    elif 'energy' in sector_name.lower():
        return 'ENG'
    elif 'Agri' in sector_name:
        return 'AGR_' + sector_name.split('_')[1][0]
    elif sector_name == 'Manufacturing':
        return 'MAN'
    elif 'tax' in sector_name.lower():
        return 'TAX'
    elif 'trade' in sector_name.lower():
        return 'TRD_' + sector_name[0]
    elif 'for' == sector_name.lower():
        return 'FORE'
    else:
        return sector_name.replace(' ', '_')

## Parameters

In [39]:
## Which region to use: PJM or NE
region = 'PJM'

In [40]:
states_northeast = 'Maine, New York, New Jersey, Vermont, Massachusetts, Rhode Island, Connecticut, New Hampshire, Pennsylvania'.split(', ')
states_pjm       = 'Delaware, Illinois, Indiana, Kentucky, Maryland, Michigan, New Jersey, North Carolina, Ohio, Pennsylvania, Tennessee, Virginia, West Virginia, District of Columbia'.split(', ')

if region is 'PJM':
    states_sample = states_pjm
    input_file    = '../data/sam/SAM_PJM_raw.csv'
    output_file   = '../data/sam/SAM_PJM_beccs.csv'
elif region is 'NE': 
    states_sample = states_northeast
    input_file    = '../data/sam/SAM_NE_raw.csv'
    output_file   = '../data/sam/SAM_NE_beccs.csv'
else: 
    raise(ValueError('Unknown Region'))

## Import Data

In [41]:
# IMPLAN data
data_df = pd.read_csv(input_file, header = 1)

# Biowaste production data
bio_prod_df = pd.read_excel('../data/bioenergy/bioenergy_clean.xlsx')

# Biowaste energy data
bio_energy_df = pd.read_excel('../data/bioenergy/bioenergy_sources_energy.xlsx')

## Compute Bioenergy Availability

In [73]:
# get bioenergy of each feed stock
bio_energy_df['kWh/dt'] = bio_energy_df['kWh/tonne']/10
bio_energy_dict = bio_energy_df[['Feedstock', 'kWh/dt']].set_index('Feedstock').fillna(0).to_dict()['kWh/dt']

# include it in biowaste production data frame
bio_prod_df['feedstock_energy'] = bio_prod_df['Feedstock'].apply(lambda x: bio_energy_dict.get(x))

# subset production to northeast states in 2016, get average production for each feedstock across scenarios
bioenergy_ne_df = bio_prod_df.query('Year == 2016 and State in @states_sample').groupby(
    ['Resource Type', 'Feedstock', 'State']).mean().groupby(
    ['Resource Type', 'Feedstock']).sum().reset_index()

# compute bioenergy from production of each feedstock
bioenergy_ne_df['bioenergy'] = bioenergy_ne_df['Production']*bioenergy_ne_df['feedstock_energy']

# get total bioenergy for each resource type
bioenergy_dict = bioenergy_ne_df.groupby(['Resource Type'])['bioenergy'].sum().to_dict()

# biomass production table
biomass_prod = bioenergy_ne_df.groupby(['Resource Type', 'Feedstock'])['Production'].first().reset_index()
biomass_prod['Production'] = biomass_prod['Production'].apply(lambda x: '{:,}'.format(int(np.round(x,0))))
#biomass_prod.set_index('Resource Type')

## Prepare IMPLAN Entries

In [25]:
data_df.columns = ['Index', 'Sector_output', 'TypeCode', 'Sector_input', 'Value']

data_df_sam_raw = data_df.groupby(['Sector_input', 'Sector_output']).sum()['Value'].reset_index().pivot(
    index = 'Sector_input', columns = 'Sector_output', values = 'Value').fillna(0)

# Fix sector names
data_df['Sector_output'] = data_df['Sector_output'].apply(lambda x: merge_trade(change_sector_name(x)))
data_df['Sector_input'] = data_df['Sector_input'].apply(lambda x: merge_trade(change_sector_name(x)))

# Reaggregate sectors
data_df = data_df.groupby(['Sector_input', 'Sector_output']).sum()['Value'].reset_index()

# Pivot into SAM
data_df_sam = data_df.pivot(index = 'Sector_input', columns = 'Sector_output', values = 'Value').fillna(0)

# Remove self transfers for government and households
data_df_sam.loc['GOV_FED', 'GOV_FED'] = 0
data_df_sam.loc['GOV_STT', 'GOV_STT'] = 0
data_df_sam.loc['HOH', 'HOH'] = 0

# Change NonIndustry Inputs/Outputs so it can be modelled as a good
temp_sum_1 = np.sum(data_df_sam.loc[:, 'NonIndustry'])
data_df_sam.loc['CAP', 'NonIndustry'] = 0
data_df_sam.loc['LAB', 'NonIndustry'] = data_df_sam.loc['HOH', 'NonIndustry']
data_df_sam.loc['HOH', 'NonIndustry'] = 0
temp_sum_2 = np.sum(data_df_sam.loc[:, 'NonIndustry'])
data_df_sam.loc['NonIndustry', :] = (temp_sum_2/temp_sum_1)*data_df_sam.loc['NonIndustry', :]

# Remove direct tax from goods to government
institutions = 'CAP,CORP,GOV_FED,GOV_STT,HOH,LAB,PROF,PROP,TAX,TRD'.split(',')
goods = [x for x in data_df_sam.columns if x not in institutions]
for i in goods:
    for g in ['GOV_FED', 'GOV_STT']:
        data_df_sam.loc['TAX', i] = data_df_sam.loc['TAX', i] + data_df_sam.loc[g, i]
        data_df_sam.loc[g, i] = 0

# Remove small entries
data_df_sam = data_df_sam.applymap(lambda x: 0 if x < 1 else x)

# Remove empty entries
for col in data_df_sam.columns:
    if data_df_sam[col].sum() == 0:
        data_df_sam = data_df_sam.drop(col, axis = 1).drop(col, axis = 0)

### Bundle biomass

In [26]:
biomass_unit_scale = 1/1e6

# create biomass sector
data_df_sam['BIOMASS'] = 0
data_df_sam.loc['BIOMASS', :] = 0

# add (waste-producing sector) -> (biomass sector) links
data_df_sam.loc['AGR_CRP', 'BIOMASS'] = bioenergy_dict['Ag Residues']    *biomass_unit_scale
data_df_sam.loc['AGR_LIV', 'BIOMASS'] = bioenergy_dict['Manure']         *biomass_unit_scale
data_df_sam.loc['FORE', 'BIOMASS']    = bioenergy_dict['Forest Residues']*biomass_unit_scale

# use biomass as input for elc_biomass
data_df_sam.loc['BIOMASS', 'ELC_BIOMASS'] = data_df_sam['BIOMASS'].sum()

# clean up other sources of biomass
data_df_sam.loc['AGR_CRP', 'ELC_BIOMASS'] = 0
data_df_sam.loc['AGR_LIV', 'ELC_BIOMASS'] = 0
data_df_sam.loc['FORE', 'ELC_BIOMASS'] = 0

### Add BECCS Sector

In [27]:
sectors_elc_gen = [x for x in data_df_sam.columns if 'ELC' in x and 'DIST' not in x]

beccs_rel_size = 0.01
beccs_sector_size = data_df_sam[sectors_elc_gen].sum().sum()*beccs_rel_size

# Create beccs sector from elc sectors
data_df_sam['ELC_BECCS']        = data_df_sam.loc[:, sectors_elc_gen].sum(axis = 1)
data_df_sam.loc['ELC_BECCS', :] = data_df_sam.loc[sectors_elc_gen, :].sum(axis = 0)

# Remove dependencies on other elc sectors
data_df_sam.loc['ELC_BECCS', sectors_elc_gen] = 0
data_df_sam.loc[sectors_elc_gen, 'ELC_BECCS'] = 0

# Add dependency on biomass
data_df_sam.loc['BIOMASS', 'ELC_BECCS'] = beccs_sector_size*0.2

# Remove imports of this technology
data_df_sam.loc['TRD', 'ELC_BECCS'] = 0

# Scale sector
data_df_sam.loc[:, 'ELC_BECCS'] = data_df_sam.loc[:, 'ELC_BECCS']*(beccs_sector_size/data_df_sam.loc[:, 'ELC_BECCS'].sum())
data_df_sam.loc['ELC_BECCS', :] = data_df_sam.loc['ELC_BECCS', :]*(beccs_sector_size/data_df_sam.loc['ELC_BECCS', :].sum())

### Bundle renewable sector

In [40]:
renewable_sectors = ['ELC_BIOMASS', 'ELC_HYDRO', 'ELC_NUC', 'ELC_OTHER', 'ELC_SOLAR', 'ELC_WIND']

# create renewable bundle
data_df_sam['ELC_RNW'] = 0
data_df_sam.loc['ELC_RNW', :] = 0

# move renewable output from dist to bundle
for rs in renewable_sectors:
    
    # collect renewable output into other renewables and dist
    renewable_output = 0
    for rs2 in renewable_sectors:
        renewable_output = renewable_output + data_df_sam.loc[rs, rs2] 
        data_df_sam.loc[rs, rs2] = 0
    # move to renewable bundle
    data_df_sam.loc[rs, 'ELC_RNW'] = renewable_output + data_df_sam.loc[rs, 'ELC_DIST']
    data_df_sam.loc[rs, 'ELC_DIST'] = 0
    
    # move taxes to renewable bundle
    data_df_sam.loc['TAX', 'ELC_RNW'] = data_df_sam.loc['TAX', rs]
    data_df_sam.loc['TAX', rs] = 0

# add some factor inputs to bundle for balancing
factors = ['LAB', 'PROP', 'PROF']

for fac in factors:
    data_df_sam.loc[fac, 'ELC_RNW'] = data_df_sam.loc[fac, 'ELC_DIST']*0.01
    
# balance bundle output
data_df_sam.loc['ELC_RNW', 'ELC_DIST'] = data_df_sam['ELC_RNW'].sum()

In [42]:
data_df_sam.applymap(lambda x: ' ' if x == 0 else x)

Sector_output,AGR_CRP,AGR_LIV,CAP,CORP,ELC_BIOMASS,ELC_DIST,ELC_FF,ELC_HYDRO,ELC_NUC,ELC_OTHER,...,NonIndustry,PAP,PROF,PROP,SER,TAX,TRD,ELC_BECCS,ELC_RNW,BIOMASS
Sector_input,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AGR_CRP,4382.97,76.0111,,,,,,,,,...,25.451,,,,124.827,,2061.26,0.744719,,79.5415
AGR_LIV,4.14507,8067.3,,,,,,,,,...,7.7393,,,,71.666,,792.743,,,8.9083
CAP,,,,52748.5,,,,,,,...,,,,533072.0,,,803753.0,,,
CORP,,,,,,,,,,,...,,,,358539.0,,,,,,
ELC_BIOMASS,,,,,,,,,,,...,,,,,,,59.8519,,586.539,
ELC_DIST,32.7177,48.73,,,5.80566,61245.2,59.9125,17.9207,86.6944,1.38255,...,,821.537,,,28861.1,,8255.76,1.98402,,
ELC_FF,,,,,,,,17069.8,,,...,,,,,,,1741.84,,,
ELC_HYDRO,,,,,,,,,,,...,,,,,,,134.176,,29995.4,
ELC_NUC,,,,,,,,,,,...,,,,,,,1089.6,,10677.9,
ELC_OTHER,,,,,,,,,,,...,,,,,,,14.5341,,142.432,


In [43]:
# column row differences
data_df_sam.sum(axis = 1) - data_df_sam.sum(axis = 0)

Sector_input
AGR_CRP        4.243775e+02
AGR_LIV        7.578450e+01
CAP           -5.633291e+03
CORP           0.000000e+00
ELC_BIOMASS   -5.689379e+02
ELC_DIST      -1.440748e+04
ELC_FF         1.466590e+00
ELC_HYDRO      1.374282e+04
ELC_NUC        9.593489e+02
ELC_OTHER      9.509159e+00
ELC_SOLAR      5.393519e+00
ELC_WIND       3.040378e+01
FORE           5.619941e+02
GOV_FED       -2.927881e+02
GOV_FED_EMP    4.882812e-03
GOV_FED_ENT   -8.451048e+01
GOV_STT       -6.114355e+04
GOV_STT_EMP   -1.708984e-03
GOV_STT_ENT   -6.737291e+03
HOH           -1.193268e+04
LAB            1.493690e+04
MAN            1.190087e+03
NonIndustry    4.974064e+02
PAP            4.393106e+01
PROF           2.280419e+01
PROP          -1.967604e+04
SER            6.165275e+04
TAX            5.648101e+03
TRD            2.033688e+04
ELC_BECCS      3.366149e+02
ELC_RNW        7.275958e-12
BIOMASS        0.000000e+00
dtype: float64

In [44]:
# bring row and column sums slightly closer
tot_div = dict(data_df_sam.sum(axis = 1)/data_df_sam.sum(axis = 0))

for i in data_df_sam.index:
    data_df_sam[i] = tot_div[i]*data_df_sam[i]

data_df_sam.sum(axis = 1)/data_df_sam.sum(axis = 0)

Sector_input
AGR_CRP        1.017120
AGR_LIV        1.003284
CAP            0.998534
CORP           0.983464
ELC_BIOMASS    1.000874
ELC_DIST       0.957741
ELC_FF         1.607674
ELC_HYDRO      0.990665
ELC_NUC        1.000874
ELC_OTHER      1.000874
ELC_SOLAR      1.000874
ELC_WIND       1.000874
FORE           1.041008
GOV_FED        1.001005
GOV_FED_EMP    0.999803
GOV_FED_ENT    1.001598
GOV_STT        1.006473
GOV_STT_EMP    0.956654
GOV_STT_ENT    0.995188
HOH            0.997712
LAB            1.004562
MAN            0.999953
NonIndustry    0.999466
PAP            1.001802
PROF           1.007230
PROP           1.005298
SER            1.003543
TAX            1.004301
TRD            0.995022
ELC_BECCS      0.926473
ELC_RNW        0.904869
BIOMASS        0.531865
dtype: float64

## Export

In [45]:
data_df_sam.index.name  = ''
data_df_sam.columns.name = ''
data_df_sam.applymap(lambda x: np.round(x, 3)).to_csv(output_file)