In [2]:
import pandas
import us

# Data Source: EIA SEDS

https://www.eia.gov/state/seds/seds-data-complete.php?sid=US#StatisticsIndicators

- Downloaded population and gdp: https://www.eia.gov/state/seds/sep_sum/html/xls/use_pop_gdp.xlsx
- Downloaded total energy expenditures by end use sector: https://www.eia.gov/state/seds/sep_sum/html/xls/expend_tot.xlsx
- Used 2019 population, gdp, total energy expenditure, residential energy expenditure, and transportation energy expenditure to calculate energy expenditure per capita, and as a percentage of gdp

In [3]:
state_county = pandas.read_csv("state_county.csv", dtype="str")
data = pandas.read_csv("input/energy_expenditure.csv")
data["energy_expenditure"] = data["total_expenditure_millions"] * 1_000_000
data["transportation_energy_expenditure"] = data["transportation_expenditure_millions"] * 1_000_000
data["residential_energy_expenditure"] = data["residential_expenditure_millions"] * 1_000_000
data["gdp"] = data["gdp_millions"] * 1_000_000
data["STATEFP"] = data["State"].apply(lambda x: us.states.lookup(x).fips)
data["population"] = data["population_thousands"] * 1_000
data["energy_expenditure_per_capita"] = data["energy_expenditure"] / data["population"]
data["transportation_energy_expenditure_per_capita"] = data["transportation_energy_expenditure"] / data["population"]
data["residential_energy_expenditure_per_capita"] = data["residential_energy_expenditure"] / data["population"]
data["energy_expenditure_share_of_gdp"] = data["energy_expenditure"] / data["gdp"]
data["transportation_energy_expenditure_share_of_gdp"] = data["transportation_energy_expenditure"] / data["gdp"]
data["residential_energy_expenditure_share_of_gdp"] = data["residential_energy_expenditure"] / data["gdp"]
del data["total_expenditure_millions"]
del data["transportation_expenditure_millions"]
del data["residential_expenditure_millions"]
del data["gdp_millions"]
del data["State"]
del data["population_thousands"]
del data["energy_expenditure"]
del data["transportation_energy_expenditure"]
del data["residential_energy_expenditure"]
del data["gdp"]
del data["population"]
data = data.merge(state_county, on="STATEFP")
data

Unnamed: 0,STATEFP,energy_expenditure_per_capita,transportation_energy_expenditure_per_capita,residential_energy_expenditure_per_capita,energy_expenditure_share_of_gdp,transportation_energy_expenditure_share_of_gdp,residential_energy_expenditure_share_of_gdp,COUNTYFP
0,02,7793.051771,4547.683924,1106.403270,0.105177,0.061377,0.014932,013
1,02,7793.051771,4547.683924,1106.403270,0.105177,0.061377,0.014932,016
2,02,7793.051771,4547.683924,1106.403270,0.105177,0.061377,0.014932,020
3,02,7793.051771,4547.683924,1106.403270,0.105177,0.061377,0.014932,050
4,02,7793.051771,4547.683924,1106.403270,0.105177,0.061377,0.014932,060
...,...,...,...,...,...,...,...,...
3137,56,8095.000000,3824.827586,911.206897,0.116158,0.054884,0.013075,037
3138,56,8095.000000,3824.827586,911.206897,0.116158,0.054884,0.013075,039
3139,56,8095.000000,3824.827586,911.206897,0.116158,0.054884,0.013075,041
3140,56,8095.000000,3824.827586,911.206897,0.116158,0.054884,0.013075,043


In [4]:
data.to_csv("output/energy.csv", index=False)