In [143]:
import pandas as pd
import numpy as np

# Import Data

In [144]:
bioenergy_raw_df = pd.read_csv('../data/bioenergy/bioenergy_state.csv', low_memory = False)

# Clean Data

In [145]:
# panel groups
pgroups = ['Year', 'State', 'Resource Type', 'Feedstock', 'Scenario']
pgroups_noyr = pgroups[1:]

In [146]:
# Sum up production to total values for each unique panel group
bioenergy_df = bioenergy_raw_df.groupby(pgroups).sum()['Production'].reset_index()

In [147]:
# Subset by year
bioenergy_df = bioenergy_df.query('Year in [2014, 2015, 2016]')

In [148]:
# Total production of each waste type
bioenergy_df['Waste_Type_Total_Production'] = bioenergy_df.groupby([x for x in pgroups if x is not 'Feedstock']
                                                                  )['Production'].transform(sum)

In [149]:
# Add yearly averages

temp_avg_df = bioenergy_df.groupby(pgroups_noyr).aggregate(
    {'Production': 'mean', 'Waste_Type_Total_Production': 'mean'}).reset_index()

temp_avg_df.rename(columns = {'Production': 'Avg_Production', 'Waste_Type_Total_Production': 'Avg_Waste_Type_Total_Production'},
                  inplace = True)

bioenergy_df = bioenergy_df.merge(temp_avg_df, on = pgroups_noyr)

In [150]:
# Feedstock production as a percent of Waste Type

bioenergy_df['Production_Pct'] = np.divide(bioenergy_df['Production'], bioenergy_df['Waste_Type_Total_Production'])
bioenergy_df['Avg_Production_Pct'] = np.divide(bioenergy_df['Avg_Production'], bioenergy_df['Avg_Waste_Type_Total_Production'])

In [152]:
bioenergy_df.columns

Index(['Year', 'State', 'Resource Type', 'Feedstock', 'Scenario', 'Production',
       'Waste_Type_Total_Production', 'Avg_Production',
       'Avg_Waste_Type_Total_Production', 'Production_Pct',
       'Avg_Production_Pct'],
      dtype='object')

In [153]:
main_cols = ['Year', 'State', 'Resource Type', 'Scenario', 'Feedstock', 'Production', 'Waste_Type_Total_Production',
            'Avg_Production', 'Avg_Waste_Type_Total_Production', 'Production_Pct', 'Avg_Production_Pct']
bioenergy_df[main_cols]

Unnamed: 0,Year,State,Resource Type,Scenario,Feedstock,Production,Waste_Type_Total_Production,Avg_Production,Avg_Waste_Type_Total_Production,Production_Pct,Avg_Production_Pct
0,2015,Alabama,Ag Residues,Wastes and other residues,Citrus residues,948.800,1.073390e+06,950.4000,1.079799e+06,0.000884,0.000880
1,2016,Alabama,Ag Residues,Wastes and other residues,Citrus residues,952.000,1.086208e+06,950.4000,1.079799e+06,0.000876,0.000880
2,2015,Alabama,Ag Residues,2% yield inc.,Corn stover,571490.912,5.714909e+05,571254.5265,5.712545e+05,1.000000,1.000000
3,2016,Alabama,Ag Residues,2% yield inc.,Corn stover,571018.141,5.710181e+05,571254.5265,5.712545e+05,1.000000,1.000000
4,2015,Alabama,Ag Residues,3% yield inc.,Corn stover,570928.315,5.709283e+05,570884.0935,5.708841e+05,1.000000,1.000000
5,2016,Alabama,Ag Residues,3% yield inc.,Corn stover,570839.872,5.708399e+05,570884.0935,5.708841e+05,1.000000,1.000000
6,2015,Alabama,Ag Residues,4% yield inc.,Corn stover,570928.315,5.709283e+05,570884.0935,5.708841e+05,1.000000,1.000000
7,2016,Alabama,Ag Residues,4% yield inc.,Corn stover,570839.872,5.708399e+05,570884.0935,5.708841e+05,1.000000,1.000000
8,2015,Alabama,Ag Residues,"Basecase, all energy crops",Corn stover,599151.443,5.991514e+05,627702.7610,6.277028e+05,1.000000,1.000000
9,2016,Alabama,Ag Residues,"Basecase, all energy crops",Corn stover,656254.079,6.562541e+05,627702.7610,6.277028e+05,1.000000,1.000000


# Export

In [154]:
bioenergy_df[main_cols].groupby(pgroups).first().to_excel('../data/bioenergy/bioenergy_clean.xlsx', freeze_panes = (1,5))