In [1]:
import pandas as pd
import numpy as np

# Import Data

In [2]:
bioenergy_raw_df = pd.read_csv('../data/bioenergy/bioenergy_state.csv', low_memory = False)

In [4]:
bioenergy_raw_df.iloc[1:3,:20]

Unnamed: 0,Year,Scenario,Biomass Price,Feedstock,State,USDA Region,fips,Production,Production Unit,Production Density,Harvested Acres,Yield,Yield Unit,Land Area,Resource Category,Resource Form,Resource Type,Land Source,Forest Region,Diameter Class
1,2029,"Basecase, all energy crops",30.0,Barley straw,Arizona,Mountain,4,0.0,0.0,dt,16330.186,2.497476,dt/ac,113594.0907,Agriculture,Herbaceous,Ag Residues,Crop,,
2,2030,"Basecase, all energy crops",30.0,Barley straw,Arizona,Mountain,4,41202.205,0.362714,dt,16250.797,2.535476,dt/ac,113594.0907,Agriculture,Herbaceous,Ag Residues,Crop,,


In [5]:
bioenergy_raw_df['Resource Category'].unique()

array(['Agriculture', 'Forestry', 'Wastes'], dtype=object)

In [8]:
bioenergy_raw_df['Resource_Category'] = bioenergy_raw_df['Resource Category']
bioenergy_raw_df.query('Resource_Category == "Forestry"')['Feedstock'].unique()

array(['Mixedwood, residue', 'Softwood, planted, residue',
       'Softwood, natural, residue', 'Hardwood, upland, residue',
       'Hardwood, lowland, residue'], dtype=object)

In [10]:
bioenergy_raw_df.query('Resource_Category == "Forestry"').iloc[0:10,0:20]

Unnamed: 0,Year,Scenario,Biomass Price,Feedstock,State,USDA Region,fips,Production,Production Unit,Production Density,Harvested Acres,Yield,Yield Unit,Land Area,Resource Category,Resource Form,Resource Type,Land Source,Forest Region,Diameter Class
66756,2015,"High housing, high energy demands",30.0,"Mixedwood, residue",Alabama,Southeast,1,336758.6,6.649351,dt,51120.1,6.588364,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,1.0
66757,2015,"High housing, high energy demands",30.0,"Mixedwood, residue",Alabama,Southeast,1,26636.1,0.525934,dt,6573.2,4.050535,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,2.0
66758,2018,"High housing, high energy demands",30.0,"Softwood, planted, residue",Alabama,Southeast,1,1505.1,0.029718,dt,287.8,5.23,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,1.0
66759,2022,"High housing, high energy demands",30.0,"Mixedwood, residue",Alabama,Southeast,1,257107.0,5.076618,dt,35699.2,7.203157,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,1.0
66760,2026,"High housing, high energy demands",30.0,"Mixedwood, residue",Alabama,Southeast,1,5779.5,0.114117,dt,1283.4,4.502317,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,2.0
66761,2030,"High housing, high energy demands",30.0,"Mixedwood, residue",Alabama,Southeast,1,187201.8,3.696329,dt,23683.5,7.903789,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,1.0
66762,2038,"High housing, high energy demands",30.0,"Mixedwood, residue",Alabama,Southeast,1,265758.4,5.247441,dt,31083.6,8.550548,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,1.0
66763,2038,"High housing, high energy demands",30.0,"Mixedwood, residue",Alabama,Southeast,1,40574.2,0.801144,dt,4432.1,9.154797,dt/ac,50645.32999,Forestry,Woody,Forest Residues,,S,1.0
66764,2021,"High housing, high energy demands",30.0,"Mixedwood, residue",Arkansas,Delta States,5,15974.0,0.306983,dt,5185.2,3.076792,dt/ac,52035.44034,Forestry,Woody,Forest Residues,,S,2.0
66765,2024,"High housing, high energy demands",30.0,"Mixedwood, residue",Arkansas,Delta States,5,15760.9,0.302888,dt,4923.3,3.19935,dt/ac,52035.44034,Forestry,Woody,Forest Residues,,S,2.0


# Clean Data

In [159]:
# panel groups
pgroups = ['Year', 'State', 'Resource Type', 'Feedstock', 'Scenario']
pgroups_noyr = pgroups[1:]

In [160]:
# Sum up production to total values for each unique panel group
bioenergy_df = bioenergy_raw_df.groupby(pgroups).sum()['Production'].reset_index()

In [161]:
# Subset by year
bioenergy_df = bioenergy_df.query('Year in [2015, 2016, 2017]')

In [162]:
# Total production of each waste type
bioenergy_df['Waste_Type_Total_Production'] = bioenergy_df.groupby([x for x in pgroups if x is not 'Feedstock']
                                                                  )['Production'].transform(sum)

In [163]:
# Add yearly averages

temp_avg_df = bioenergy_df.groupby(pgroups_noyr).aggregate(
    {'Production': 'mean', 'Waste_Type_Total_Production': 'mean'}).reset_index()

temp_avg_df.rename(columns = {'Production': 'Avg_Production', 'Waste_Type_Total_Production': 'Avg_Waste_Type_Total_Production'},
                  inplace = True)

bioenergy_df = bioenergy_df.merge(temp_avg_df, on = pgroups_noyr)

In [164]:
# Feedstock production as a percent of Waste Type

bioenergy_df['Production_Pct'] = np.divide(bioenergy_df['Production'], bioenergy_df['Waste_Type_Total_Production'])
bioenergy_df['Avg_Production_Pct'] = np.divide(bioenergy_df['Avg_Production'], bioenergy_df['Avg_Waste_Type_Total_Production'])

In [165]:
bioenergy_df.columns

Index(['Year', 'State', 'Resource Type', 'Feedstock', 'Scenario', 'Production',
       'Waste_Type_Total_Production', 'Avg_Production',
       'Avg_Waste_Type_Total_Production', 'Production_Pct',
       'Avg_Production_Pct'],
      dtype='object')

In [166]:
main_cols = ['Year', 'State', 'Resource Type', 'Scenario', 'Feedstock', 'Production', 'Waste_Type_Total_Production',
            'Avg_Production', 'Avg_Waste_Type_Total_Production', 'Production_Pct', 'Avg_Production_Pct']
bioenergy_df[main_cols]

Unnamed: 0,Year,State,Resource Type,Scenario,Feedstock,Production,Waste_Type_Total_Production,Avg_Production,Avg_Waste_Type_Total_Production,Production_Pct,Avg_Production_Pct
0,2015,Alabama,Ag Residues,Wastes and other residues,Citrus residues,948.800,1.073390e+06,952.000000,1.086219e+06,0.000884,0.000876
1,2016,Alabama,Ag Residues,Wastes and other residues,Citrus residues,952.000,1.086208e+06,952.000000,1.086219e+06,0.000876,0.000876
2,2017,Alabama,Ag Residues,Wastes and other residues,Citrus residues,955.200,1.099059e+06,952.000000,1.086219e+06,0.000869,0.000876
3,2015,Alabama,Ag Residues,2% yield inc.,Corn stover,571490.912,5.714909e+05,573992.910333,5.739929e+05,1.000000,1.000000
4,2016,Alabama,Ag Residues,2% yield inc.,Corn stover,571018.141,5.710181e+05,573992.910333,5.739929e+05,1.000000,1.000000
5,2017,Alabama,Ag Residues,2% yield inc.,Corn stover,579469.678,5.794697e+05,573992.910333,5.739929e+05,1.000000,1.000000
6,2015,Alabama,Ag Residues,3% yield inc.,Corn stover,570928.315,5.709283e+05,573243.438333,5.732434e+05,1.000000,1.000000
7,2016,Alabama,Ag Residues,3% yield inc.,Corn stover,570839.872,5.708399e+05,573243.438333,5.732434e+05,1.000000,1.000000
8,2017,Alabama,Ag Residues,3% yield inc.,Corn stover,577962.128,5.779621e+05,573243.438333,5.732434e+05,1.000000,1.000000
9,2015,Alabama,Ag Residues,4% yield inc.,Corn stover,570928.315,5.709283e+05,573243.438333,5.732434e+05,1.000000,1.000000


# Export

In [167]:
bioenergy_df[main_cols].groupby(pgroups).first().to_excel('../data/bioenergy/bioenergy_clean.xlsx', 
                                                          merge_cells = False, freeze_panes = (1,5))