In [1]:
import pandas as pd
import numpy as np

# Import Data

In [2]:
bioenergy_raw_df = pd.read_csv('../data/bioenergy/bioenergy_state.csv', low_memory = False)

# Clean Data

In [3]:
# panel groups
pgroups = ['Year', 'State', 'Resource Type', 'Feedstock', 'Scenario']
pgroups_noyr = pgroups[1:]

In [4]:
# Sum up production to total values for each unique panel group
bioenergy_df = bioenergy_raw_df.groupby(pgroups).sum()['Production'].reset_index()

In [5]:
# Subset by year
bioenergy_df = bioenergy_df.query('Year in [2015, 2016, 2017]')

In [6]:
# Total production of each waste type
bioenergy_df['Waste_Type_Total_Production'] = bioenergy_df.groupby([x for x in pgroups if x is not 'Feedstock']
                                                                  )['Production'].transform(sum)

In [7]:
# Add yearly averages

temp_avg_df = bioenergy_df.groupby(pgroups_noyr).aggregate(
    {'Production': 'mean', 'Waste_Type_Total_Production': 'mean'}).reset_index()

temp_avg_df.rename(columns = {'Production': 'Avg_Production', 'Waste_Type_Total_Production': 'Avg_Waste_Type_Total_Production'},
                  inplace = True)

bioenergy_df = bioenergy_df.merge(temp_avg_df, on = pgroups_noyr)

In [8]:
# Feedstock production as a percent of Waste Type

bioenergy_df['Production_Pct'] = np.divide(bioenergy_df['Production'], bioenergy_df['Waste_Type_Total_Production'])
bioenergy_df['Avg_Production_Pct'] = np.divide(bioenergy_df['Avg_Production'], bioenergy_df['Avg_Waste_Type_Total_Production'])

In [9]:
bioenergy_df.columns

Index(['Year', 'State', 'Resource Type', 'Feedstock', 'Scenario', 'Production',
       'Waste_Type_Total_Production', 'Avg_Production',
       'Avg_Waste_Type_Total_Production', 'Production_Pct',
       'Avg_Production_Pct'],
      dtype='object')

In [10]:
main_cols = ['Year', 'State', 'Resource Type', 'Scenario', 'Feedstock', 'Production', 'Waste_Type_Total_Production',
            'Avg_Production', 'Avg_Waste_Type_Total_Production', 'Production_Pct', 'Avg_Production_Pct']
bioenergy_df[main_cols].head()

Unnamed: 0,Year,State,Resource Type,Scenario,Feedstock,Production,Waste_Type_Total_Production,Avg_Production,Avg_Waste_Type_Total_Production,Production_Pct,Avg_Production_Pct
0,2015,Alabama,Ag Residues,Wastes and other residues,Citrus residues,948.8,1073389.9,952.0,1086219.0,0.000884,0.000876
1,2016,Alabama,Ag Residues,Wastes and other residues,Citrus residues,952.0,1086208.1,952.0,1086219.0,0.000876,0.000876
2,2017,Alabama,Ag Residues,Wastes and other residues,Citrus residues,955.2,1099059.1,952.0,1086219.0,0.000869,0.000876
3,2015,Alabama,Ag Residues,2% yield inc.,Corn stover,571490.912,571490.912,573992.910333,573992.9,1.0,1.0
4,2016,Alabama,Ag Residues,2% yield inc.,Corn stover,571018.141,571018.141,573992.910333,573992.9,1.0,1.0


# Export

In [11]:
bioenergy_df[main_cols].groupby(pgroups).first().to_excel('../data/bioenergy/bioenergy_clean.xlsx', 
                                                          merge_cells = False, freeze_panes = (1,5))

In [12]:
feedstocks_df = bioenergy_raw_df.groupby(['Resource Type', 'Feedstock'])['Production Density'].first().reset_index()
feedstocks_df.rename(columns = {'Production Density': 'Production Unit'}, inplace = True)
feedstocks_df.to_excel('../data/bioenergy/bioenergy_sources.xlsx', index = False)