In [None]:
# %matplotlib inline
import requests
import json
import pandas as pd
import os

eia_api_key = "iPjXaaJSiyfWNALV62yg1Onb0VTuUgVHa0eVHgbx"
force_update = False

def get_eia_series(series_id):
    series_url = "https://api.eia.gov/series/?api_key=" + eia_api_key + "&series_id=" + series_id
    series_response = requests.get(series_url)
    return series_response.text

def get_eia_category(category_id):
    category_url = "https://api.eia.gov/category/?api_key=" + eia_api_key + "&category_id=" + str(category_id)
    category_response = requests.get(category_url)
    category_json = json.loads(category_response.text)
    childcategories = pd.DataFrame(category_json['category']['childcategories'])
    childseries = pd.DataFrame(category_json['category']['childseries'])
    return childcategories, childseries

def get_eia_child_categories(category_id):
    return get_eia_category(category_id)[0]

def get_eia_series_for_category(category_id):
    return get_eia_category(category_id)[1]

def load_series(series_id):
    if force_update:
        try:
            series = get_eia_series(series_id)
            with open('data/' + series_id + '.json', 'w', encoding='utf-8') as f:
                json.dump(series, f, ensure_ascii=False, indent=4)
            print("loaded " + series_id + " from EIA")
        except:
            print("failed to load " + series_id + " from EIA")
    else:
        try:
            with open('data/' + series_id + '.json', 'r', encoding='utf-8') as f:
                series = json.load(f)
            print("loaded " + series_id + " from local cache")
        except:
            series = get_eia_series(series_id)
            with open('data/' + series_id + '.json', 'w', encoding='utf-8') as f:
                json.dump(series, f, ensure_ascii=False, indent=4)
            print("loaded " + series_id + " from EIA")
    return series

def load_child_categories(category_id):
    if force_update:
        try:
            categories = get_eia_child_categories(category_id)
            categories.to_pickle("meta/" + str(category_id) + "_categories.pkl")
            print("loaded " + str(category_id) + " subcategories from EIA")
        except:
            print("failed to load " + str(category_id) + " subcategories from EIA")
    else:
        try:
            categories = pd.read_pickle("meta/" + str(category_id) + "_categories.pkl")
            print("loaded " + str(category_id) + " subcategories from local cache")
        except:
            categories = get_eia_child_categories(category_id)
            categories.to_pickle("meta/" + str(category_id) + "_categories.pkl")
            print("loaded " + str(category_id) + " subcategories from EIA")
    return categories

def load_child_series(category_id):
    if force_update:
        try:
            series = get_eia_series_for_category(category_id)
            series.to_pickle("meta/" + str(category_id) + "_series.pkl")
            print("loaded " + str(category_id) + " subseries from EIA")
        except:
            print("failed to load " + str(category_id) + " subseries from EIA")
    else:
        try:
            series = pd.read_pickle("meta/" + str(category_id) + "_series.pkl")
            print("loaded " + str(category_id) + " subseries from local cache")
        except:
            series = get_eia_series_for_category(category_id)
            series.to_pickle("meta/" + str(category_id) + "_series.pkl")
            print("loaded " + str(category_id) + " subseries from EIA")
    return series

def get_eia_states_with_plants():
    return load_child_categories(1017)

In [None]:
list_of_state_categories = get_eia_states_with_plants()
for state_number in list_of_state_categories.category_id:
    state_data = load_child_categories(state_number)
    for plant_number in state_data.category_id:
        plant_data = load_child_series(plant_number)
        for series_id in plant_data.series_id:
            if series_id.startswith("ELEC.PLANT.GEN.") & series_id.endswith("-PV.M"):
                load_series(series_id)

In [33]:
files = os.listdir("data")
combine = pd.DataFrame()
for file in files:
    if file.endswith(".json"):
        with open('data/' + file, 'r', encoding='utf-8') as f:
            series = json.load(f)
        series_json = json.loads(series)
        series_id = series_json['series'][0]['series_id']
        name = series_json['series'][0]['name']
        latlon = series_json['series'][0]['latlon']
        geography = series_json['series'][0]['geography']
        updated = series_json['series'][0]['updated']
        df = pd.DataFrame(series_json['series'][0]['data'])
        df['series_id'] = series_id
        df['name'] = name
        df['year'] = df[0].apply(lambda x: x[:4])
        df['month'] = df[0].apply(lambda x: x[4:6])
        df['latlon'] = latlon
        df['geography'] = geography
        df['net_generation'] = df[1]
        df['updated'] = updated
        df.drop(0, axis=1, inplace=True)
        df.drop(1, axis=1, inplace=True)
        combine = pd.concat([combine, df])
combine.reset_index(inplace=True)
combine.to_pickle("solar_plant_data.pkl")
combine.to_csv("solar_plant_data.csv", index=False)