# COMP 40760 - A1 Task 1 - US Energy Information Adminstration

Analyse United States of America Co2 emissions from 2010 to 2020 

This notebook covers Task 1 - Data Collection



In [2]:
import json, requests, urllib
from pathlib import Path
from datetime import datetime
import urllib.request
import pandas as pd

In [2]:
# API Key 
api_key = "# Prefix for API URLs
states_api_prefix = "https://api.eia.gov/v2/co2-emissions/co2-emissions-aggregates/"
#define time period fro 2010 to 2020
time_period = '&start=2010&end=2020'

Create directory for raw data storage, if it does not already exist:


In [3]:
dir_raw = Path("USEraw")
dir_raw_state = Path(dir_raw/'states_raw')
dir_raw_fuel = Path(dir_raw/'fuel_raw')
dir_raw.mkdir(parents=True, exist_ok=True)
dir_raw_state.mkdir(parents=True, exist_ok=True)
dir_raw_fuel.mkdir(parents=True, exist_ok=True)

# Data Collection

## Facets

Identify Facets... the keys that allow to filter data in API Call

In [4]:
url = states_api_prefix + 'facet'+api_key
# print(url)
response = urllib.request.urlopen(url)
raw_json = response.read().decode("utf-8")
print(raw_json)
data = json.loads(raw_json)
facets = data['response']['facetOptions']
with open (str(dir_raw)+"/facet_file.txt",'w') as facet_file:
    facet_file.write(str(facets))
facet_file.close()

print('The filtering options by facet in this api call are by \n 1. '+facets[0], '\n', '2. ' + facets[1], '\n', '3. '+facets[2])

{"response":{"totalFacetOptions":3,"facetOptions":["sectorId","fuelId","stateId"]},"request":{"command":"\/v2\/co2-emissions\/co2-emissions-aggregates\/facet\/","params":{"api_key":"Iry5FiRSbc40vSwSvzgM9KW9YmmXYAuNHKQ3IYcg"}},"apiVersion":"2.0.3"}
The filtering options by facet in this api call are by 
 1. sectorId 
 2. fuelId 
 3. stateId


### What the facets mean
sectorId is the ID of the sector causing the emissions

fuelID is the ID of the fuel causing the emissions

stateId is the ID of the state causing the emissions

Define Function to extract list of facet key value pairs where the key is the ID and the name is the value and save to json file

In [5]:
def fetchFacets(facet):
    #construct url
    url = states_api_prefix + 'facet/'+facet + api_key
    response = urllib.request.urlopen(url)
    #decode json to a raw string
    raw_json = response.read().decode("utf-8")
    #load string into a json object
    data = json.loads(raw_json)['response']['facets']
    #since states are obvious, print the sectors and fuels provided by the api
    if facet == 'sectorId' or facet == 'fuelId':
        print('Instances of  ' + facet + ' are:')
        for instance in data:
            print(instance['name'])
    else:
        print('Facet instances are all states in the USA')
        
    #dump json object to file
    fname = "%s.json" % (facet)
    out_path = dir_raw / fname
    print("Writing data to %s \n" % out_path)
    fout = open(out_path, "w")
    json.dump(data, fout, indent=4, sort_keys=True)
    fout.close()
    #return only states list as will be needed in further data collection
    if facet == 'stateId':
        return data



Call above function for all facets

In [6]:
#Loop through each facet
for facet in facets:
    states = fetchFacets(facet)

Instances of  sectorId are:
Commercial carbon dioxide emissions
Residential carbon dioxide emissions
Electric Power carbon dioxide emissions
Total carbon dioxide emissions from all sectors
Industrial carbon dioxide emissions
Transportation carbon dioxide emissions
Writing data to USEraw/sectorId.json 

Instances of  fuelId are:
Coal
Petroleum
Natural Gas
All Fuels
Writing data to USEraw/fuelId.json 

Facet instances are all states in the USA
Writing data to USEraw/stateId.json 



## States

Define function to find all CO2 emissions data related to a particular state, and dump this into a json file

In [7]:
def fetch_state_info(state):
    url = states_api_prefix + 'data'+ api_key+ '&data[]=value'+time_period +'&facets[stateId][]='+state['id']
    response = urllib.request.urlopen(url)
    raw_json = response.read().decode("utf-8")
    data = json.loads(raw_json)['response']['data']
    fname = "%s.json" % (state['name'])
    out_path = dir_raw_state / fname
    print("Writing data to %s" % out_path)
    fout = open(out_path, "w")
    json.dump(data, fout, indent=4, sort_keys=True)
    fout.close()



Get CO2 emissions data of each state 

In [8]:
for state in states:
    fetch_state_info(state)

Writing data to USEraw/states_raw/Colorado.json
Writing data to USEraw/states_raw/Maryland.json
Writing data to USEraw/states_raw/Michigan.json
Writing data to USEraw/states_raw/Minnesota.json
Writing data to USEraw/states_raw/Tennessee.json
Writing data to USEraw/states_raw/Washington.json
Writing data to USEraw/states_raw/Alaska.json
Writing data to USEraw/states_raw/Arizona.json
Writing data to USEraw/states_raw/Kansas.json
Writing data to USEraw/states_raw/New Hampshire.json
Writing data to USEraw/states_raw/Ohio.json
Writing data to USEraw/states_raw/Wisconsin.json
Writing data to USEraw/states_raw/West Virginia.json
Writing data to USEraw/states_raw/Wyoming.json
Writing data to USEraw/states_raw/Mississippi.json
Writing data to USEraw/states_raw/Oregon.json
Writing data to USEraw/states_raw/Arkansas.json
Writing data to USEraw/states_raw/Delaware.json
Writing data to USEraw/states_raw/Florida.json
Writing data to USEraw/states_raw/Montana.json
Writing data to USEraw/states_raw/Ne

Get each states coal, natural gas and petroleum consumption

First get facet ideas for total coal, natural gas and petroleum consumption by state

In [9]:
total_coal_id = 'CLTCP'
total_NG_id = 'NGTCP'
total_petroleum_id = 'PATCB'

Call State Energy Data System (SEDS) API to gather fuel consumption by state 



In [10]:
#different api call to CO2 emissions
fuel_api_prefix = "https://api.eia.gov/v2/seds/"
fuel_data = []
for state in states:
    url = fuel_api_prefix +'data'+api_key+ '&data[]=value'+time_period +'&facets[stateId][]='\
    +state['id'] +'&facets[seriesId][]='+total_coal_id+'&facets[seriesId][]='\
    +total_petroleum_id+'&facets[seriesId][]='+total_NG_id
    response = urllib.request.urlopen(url)
    raw_json = response.read().decode("utf-8")
    data = json.loads(raw_json)['response']['data']
    fuel_data.append(data)
    fname = "fuel_data%s.json" % (state['name'])
    out_path = dir_raw_fuel / fname
    print("Writing data to %s" % out_path)
    fout = open(out_path, "w")
    json.dump(fuel_data, fout, indent=4, sort_keys=True)
    fout.close()


Writing data to USEraw/fuel_raw/fuel_dataColorado.json
Writing data to USEraw/fuel_raw/fuel_dataMaryland.json
Writing data to USEraw/fuel_raw/fuel_dataMichigan.json
Writing data to USEraw/fuel_raw/fuel_dataMinnesota.json
Writing data to USEraw/fuel_raw/fuel_dataTennessee.json
Writing data to USEraw/fuel_raw/fuel_dataWashington.json
Writing data to USEraw/fuel_raw/fuel_dataAlaska.json
Writing data to USEraw/fuel_raw/fuel_dataArizona.json
Writing data to USEraw/fuel_raw/fuel_dataKansas.json
Writing data to USEraw/fuel_raw/fuel_dataNew Hampshire.json
Writing data to USEraw/fuel_raw/fuel_dataOhio.json
Writing data to USEraw/fuel_raw/fuel_dataWisconsin.json
Writing data to USEraw/fuel_raw/fuel_dataWest Virginia.json
Writing data to USEraw/fuel_raw/fuel_dataWyoming.json
Writing data to USEraw/fuel_raw/fuel_dataMississippi.json
Writing data to USEraw/fuel_raw/fuel_dataOregon.json
Writing data to USEraw/fuel_raw/fuel_dataArkansas.json
Writing data to USEraw/fuel_raw/fuel_dataDelaware.json
Writ

## Web Scrape Population by State from 2010 to 2020

Download csv file from US census website detailing population by state from 2010 to 2020

In [7]:
url = 'https://www2.census.gov/programs-surveys/popest/datasets/2010-2020/national/totals/nst-est2020.csv'
response = requests.get(url)
fname = 'Population.csv'
out_path = dir_raw / fname
print(response.content)
open(out_path, "wb").write(response.content)


b'SUMLEV,REGION,DIVISION,STATE,NAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE042020,POPESTIMATE2020\r\n010,0,0,00,United States,308745538,308758105,309327143,311583481,313877662,316059947,318386329,320738994,323071755,325122128,326838199,328329953,329398742,329484123\r\n020,1,0,00,Northeast Region,55317240,55318414,55380764,55608318,55782661,55912775,56021339,56052790,56063777,56083383,56084543,56002934,55924275,55849869\r\n020,2,0,00,Midwest Region,66927001,66929737,66975328,67164092,67348275,67576524,67765576,67885682,68018175,68160342,68263019,68340091,68357895,68316744\r\n020,3,0,00,South Region,114555744,114563042,114869421,116019483,117264196,118397213,119666248,121049223,122419547,123611036,124649156,125686544,126494232,126662754\r\n020,4,0,00,West Region,71945553,71946912,72101630,72791588,73482530,74173435,74933166,75

8014