In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 17 14:11:56 2018

@author: Eric
"""

import requests
import csv
import json
from statistics import *
from collections import Counter
import datetime

In [None]:
# Prep the data
codes={} # collate the compliance category codes data at the national level
inspectionsByCodes={'S':[],'_':[], 'U':[], 'V':[]} # collate days since last inspection by most recent compliance category
inspectionDates=[] # Date of last inspection
inspectionDays=[] # Number of days since last inspection
for q in range(1,13): # set up each quarter
    codes["Q"+str(q)]=[]

In [None]:
# The main script
# Loop through each EPA region and get the ECHO results for compliance information
x=1
while x < 11:
    region=str(x)
    if len(region)<2:
        region = "0"+region
    print("R"+region) # Print out which region we're currently working on
    
    # call up ECHO
    url="https://ofmpub.epa.gov/echo/echo_rest_services.get_facility_info?output=json&p_reg="+region+"&p_act=Y&p_maj=Y&p_med=ALL"
    contents = requests.get(url).content.decode()
    obj = json.loads(contents) # Get the results from ECHO and turn them into something Python can work with
    
    facilities = obj['Results']['Facilities'] # Grab the information on the facilities
    region="R"+region # Name the current region
    
    # Option to break down results by region here...
    
    # For each facility in the EPA region...
    for facility in facilities:
        compliance = facility['Fac3yrComplianceHistory']  # Get its compliance history for the past 12 quarters in the form of "123456789012" #V = violation U = unknown S =sign. violation _ = none
        format_str = '%m/%d/%Y'
        try:
            lastInspectionDate = datetime.datetime.strptime(facility["FacDateLastInspection"], format_str) # get the last inspection date. Some facilities have None as a value here - we'll pass them
            lastInspectionDays = int(facility['FacDaysLastInspection']) # how many days (a number) since last inspection
        except:
            pass 
        if (compliance[0:3] == "UUU"): #if the first three quarters in the Obama era were Unknown status, skip this facility because they may not have been permitted at all then, only later.
            pass
        else: # these are the facilities we'll work with!
            t = compliance[8:12] # For this analysis, we measure the Trump era as the most recent 4 quarters
            o = compliance[0:4] # The Obama era is the first 4 quarters of the past 12 (as of Nov 2018)
            for pos, q in enumerate(t):# for each quarter in Trump era, add the compliance code to our list
                codes["Q"+str(pos+9)].append(q)
            for pos, q in enumerate(o): 
                codes["Q"+str(pos+1)].append(q)
            inspectionDates.append(lastInspectionDate)
            inspectionDays.append(lastInspectionDays)
            inspectionsByCodes[compliance[11]].append(lastInspectionDays) # add the days since last inspection based on current compliance status. compliance[11] = 12th quarter in 12 quarter compliance history
        # Go to the next facility...
    x+=1 # Go to the next EPA region

In [None]:
# OUTPUT: median or mean number of days since last inspection by compliance category
inspectionDates=sorted(inspectionDates)
print("median last inspection date: "+ str(median(inspectionDates))) # can't do mean last inspection date
print("average number of days since inspection: "+str(mean(inspectionDays)))
for code in inspectionsByCodes:
    print (code+": "+str(mean(inspectionsByCodes[code])))

In [None]:
# OUTPUT: percent of facilities in each compliance category
print("NTL")
for q in codes:
    total=Counter(codes[q])
    for code in total:
        pct=100*(total[code]/len(codes[q]))
        print(code + " " + q +" "+ str(pct))