In [1]:
import numpy as np
import pandas as pd
import os
import shutil
import time
import datetime
import requests

In [2]:
def make_url(series_label):
    eia_key = '332ee3c2a8789382b924a4917174c2db'
    series_ids = {
        'ca_wind_gen': 'EBA.CAL-ALL.NG.WND.HL',
        'ca_solar_gen': 'EBA.CAL-ALL.NG.SUN.HL',
        'ca_demand': 'EBA.CAL-ALL.D.HL'
    }
    cur_id = series_ids[series_label]
    url = (
        f'http://api.eia.gov/series/'
        f'?api_key={eia_key}'
        f'&series_id={cur_id}'
    )
    return url
    

In [3]:
cur_url = make_url('ca_demand')

In [4]:
r = requests.get(cur_url)

In [5]:
ca_demand = r.json()['series'][0]['data']

In [6]:
cur_url = make_url('ca_wind_gen')

In [7]:
r = requests.get(cur_url)

In [8]:
ca_wind_gen = r.json()['series'][0]['data']

In [9]:
cur_url = make_url('ca_solar_gen')

In [10]:
r = requests.get(cur_url)

In [11]:
ca_solar_gen = r.json()['series'][0]['data']

In [12]:
len(ca_demand), len(ca_wind_gen), len(ca_solar_gen)

(47004, 20689, 20689)

In [13]:
ca_wind_gen[-1]

['20180701T01-07', 3958]

In [14]:
ca_solar_gen[-1]

['20180701T01-07', 0]

In [15]:
ca_demand[-26305]

['20180701T01-07', 26061]

In [16]:
ca_demand = ca_demand[:-26304]

In [17]:
ca_demand[-1]

['20180701T01-07', 26061]

In [18]:
ca_demand[0]

['20201109T11-08', 27383]

In [19]:
ca_wind_gen[0]

['20201109T00-08', 2658]

In [20]:
ca_solar_gen[0]

['20201109T00-08', 7]

In [21]:
ca_demand[11]

['20201109T00-08', 25596]

In [22]:
ca_demand = ca_demand[11:]

In [23]:
len(ca_demand)

20689

In [24]:
dt, demand = map(list, zip(*ca_demand))

In [25]:
_, wind_gen = map(list, zip(*ca_wind_gen))

In [26]:
_, solar_gen = map(list, zip(*ca_solar_gen))

In [27]:
dt[0]

'20201109T00-08'

In [28]:
dt_obj = pd.to_datetime(dt, errors='raise', yearfirst=True, utc=True)

In [29]:
dt_obj[0]

Timestamp('2020-11-09 08:00:00+0000', tz='UTC')

In [30]:
df = pd.DataFrame({
    'dt': dt_obj,
    'demand_MWh': demand,
    'wind_gen_MWh': wind_gen,
    'solar_gen_MWh': solar_gen
})

In [31]:
df = df.reindex(index=df.index[::-1]).reset_index(drop=True)

In [32]:
df.head()

Unnamed: 0,dt,demand_MWh,wind_gen_MWh,solar_gen_MWh
0,2018-07-01 08:00:00+00:00,26061,3958,0
1,2018-07-01 09:00:00+00:00,24604,4087,0
2,2018-07-01 10:00:00+00:00,23554,3710,0
3,2018-07-01 11:00:00+00:00,22843,3630,0
4,2018-07-01 12:00:00+00:00,22538,3156,0


In [33]:
df.tail()

Unnamed: 0,dt,demand_MWh,wind_gen_MWh,solar_gen_MWh
20684,2020-11-09 04:00:00+00:00,30882,2565,7
20685,2020-11-09 05:00:00+00:00,30064,2975,7
20686,2020-11-09 06:00:00+00:00,28741,2465,7
20687,2020-11-09 07:00:00+00:00,27026,2950,7
20688,2020-11-09 08:00:00+00:00,25596,2658,7


In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20689 entries, 0 to 20688
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype              
---  ------         --------------  -----              
 0   dt             20689 non-null  datetime64[ns, UTC]
 1   demand_MWh     20689 non-null  int64              
 2   wind_gen_MWh   20689 non-null  int64              
 3   solar_gen_MWh  20689 non-null  int64              
dtypes: datetime64[ns, UTC](1), int64(3)
memory usage: 646.7 KB


In [35]:
df.to_csv('EIA-20180701-20201109.csv')