# U.S. Energy Production by State

## Functions and Modules

In [1]:
import os
import urllib
import json
from collections import defaultdict
import pandas as pd

# Pulls data from EIA API.
def get_data_from_EIA(APIKey, seriesID):
    url = url = 'http://api.eia.gov/series/?api_key=' + APIKey + '&series_id=' + seriesID
    response = urllib.urlopen(url)    
    data = json.loads(response.read())
    return data

# Prints JSON data to text file.
def write_json_to_txt(jsonObj, filename):
    with open(filename, "w") as outfile:
        json.dump(jsonObj, outfile, indent=4)

# Pull data for all states into a nested dictionary.
def compile_state_energy_data():
    states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
              "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
              "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
              "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
              "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
    totalEnergyProdByState = dict(keys = states)
    for state in states:
        dataFromEIA = get_data_from_EIA("DC4B181A26EDF82B41F2A7446102CAB7","SEDS.TEPRB.%s.A" % state)
        totalEnergyProdByState[state] = dataFromEIA
    return totalEnergyProdByState

# Get the range of years with recorded data.
def get_years(totalEnergyProdByState):
    datasetSample = totalEnergyProdByState['AK']['series'][0]['data']
    years = [item[0] for item in datasetSample]
    return years

# Collect data and reorganize by year.
def state_energy_by_year(totalEnergyProdByState):
    states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
              "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
              "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
              "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
              "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]
    years = get_years(totalEnergyProdByState)
    
    stateDict = dict((states,0) for states in states)
    stateEnergyByYear = dict((year,0) for year in years)
    
    for year in years:
        stateEnergyByYear[year] = stateDict
        
    energyByStateSimple = stateDict
    for state in states:
        energyByStateSimple[state] = dict(totalEnergyProdByState[state]['series'][0]['data'])  
        
    flipped = defaultdict(dict)
    for key, val in energyByStateSimple.items():
        for subkey, subval in val.items():
            flipped[subkey][key] = subval
            
    stateEnergyByYear = dict(flipped)
    
    return stateEnergyByYear

## Pulling the Data

In [2]:
# Pull data from EIA API and sort by state and year in nested dictionary.
totalEnergyProdByState = compile_state_energy_data()
stateEnergyByYear = state_energy_by_year(totalEnergyProdByState)

# Convert data to pandas dataframe
stateEnergyByYear = pd.DataFrame.from_dict(stateEnergyByYear)

In [5]:
print stateEnergyByYear['1999']

AK    2797459
AL    1578649
AR     461761
AZ     691893
CA    3103034
CO    1568048
CT     181756
DC       1257
DE       1999
FL     571981
GA     560072
HI      21821
IA     152226
ID     167747
IL    1957541
IN     812027
KS     912114
KY    3640198
LA    2913612
MA     112382
MD     284200
ME     159191
MI     598129
MN     234578
MO     131388
MS     381140
MT    1048885
NC     531858
ND     696137
NE     190064
NH     129575
NJ     342564
NM    2576134
NV      53094
NY     823734
OH     925535
OK    2281808
OR     511214
PA    3038170
RI       4403
SC     627969
SD      80564
TN     498130
TX    9544713
UT    1029827
VA    1345664
VT      63217
WA    1209424
WI     234340
WV    4242053
WY    7306693
Name: 1999, dtype: int64
