# CAISO Historical Data Fetcher
This notebook will us the pycaiso python library to collect caiso data from 2018-2020

the data from the request is the bytes for a zip file. to get it to a pandas dataframe,
1. read the content with the io.BytesIO
2. open the bytes as a zip file with ZipFile
3. open the zip file by getting the first (and only) name in the extraction
4. read the data using the .read() method of the file object
5. convert the raw data to a string with BytesIO again
6. read the bytes to a dataframe with the pd.read_csv file

In [1]:
import os
if 'data' == os.getcwd().split('/')[-1]: os.chdir('..')
if 'ev_charging' == os.getcwd().split('/')[-1]: print('in the right place!')
else: os.chdir('/Users/varunvenkatesh/Documents/Github/ev_charging')
os.getcwd()

in the right place!


'/home/denny/Documents/mids/ev_charging'

Imports

In [2]:
import pytz
from io import BytesIO
from zipfile import ZipFile
import datetime
import requests
import pandas as pd

# use Elizabeth's version
from src import oasis


In [3]:

def _get_UTC_string(
    dt: datetime,
    local_tz: str = "America/Los_Angeles",
    fmt: str = "%Y%m%dT%H:%M-0000",
) -> str:
    """Convert local datetime to UTC string

    Converts datetime.datetime or pandas.Timestamp in local time to
    to UTC string for constructing HTTP request

    Args:
        dt (datetime.datetime): datetime to convert
        local_tz (str): timezone

    Returns:
        utc (str): UTC string
    """

    tz_ = pytz.timezone(local_tz)
    return tz_.localize(dt).astimezone(pytz.UTC).strftime(fmt)


In [4]:
def make_demand_url(start_date, end_date):
    """
    base url has resultformat =6 is for csv
    SLD_FCST = forcast
    """
    start = _get_UTC_string(start_date)
    end = _get_UTC_string(end_date)
    base_url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime={}&enddatetime={}'
    return base_url.format(start, end)

In [35]:
def caiso_call_for_df(url):
    
    # https://stackoverflow.com/questions/5710867/downloading-and-unzipping-a-zip-file-without-writing-to-disk
    r = requests.get(url)
    with ZipFile(BytesIO(r.content)) as zipedfolder:
        with zipedfolder.open(zipedfolder.namelist()[0]) as file:
            bytes_payload = file.read()

    raw = pd.read_csv(BytesIO(bytes_payload))
    return raw
    
def forecast_filter(raw):
    TACS = ['PGE','SCE-TAC', 'PGE-TAC', 'CA ISO-TAC']
    MARKET_RUN_IDS = ['ACTUAL', 'DAM']
    
    filtered_df = raw[(raw['MARKET_RUN_ID'].isin(MARKET_RUN_IDS)) & (raw['TAC_AREA_NAME'].isin(TACS))]
    return filtered_df

In [6]:
url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime={}&enddatetime={}'
start_date = datetime.datetime(2018,1,1) # make a datetime
end_date = start_date + datetime.timedelta(days=31)
print(_get_UTC_string(start_date), _get_UTC_string(end_date))
url.format(start_date, end_date)

20180101T08:00-0000 20180201T08:00-0000


'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime=2018-01-01 00:00:00&enddatetime=2018-02-01 00:00:00'

In [51]:
print('starting process')
start_date = datetime.datetime(2018,1,1)
all_df = pd.DataFrame({})
i = 0
while start_date < datetime.datetime(2020,3,1):
    end_date = start_date + datetime.timedelta(days=31)
    url = make_demand_url(start_date, end_date)
    print(i, url)
    raw = caiso_call_for_df(url)
    df = forecast_filter(raw)
    
    if all_df.empty:
        all_df = df
    else:
        all_df = pd.concat([all_df, df], axis=0)
    start_date = end_date
    i += 1
print(start_date)
print(url)

starting process
0 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime=20180101T08:00-0000&enddatetime=20180201T08:00-0000
1 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime=20180201T08:00-0000&enddatetime=20180304T08:00-0000
2 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime=20180304T08:00-0000&enddatetime=20180404T07:00-0000
3 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime=20180404T07:00-0000&enddatetime=20180505T07:00-0000
4 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime=20180505T07:00-0000&enddatetime=20180605T07:00-0000
5 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_FCST&version=1&startdatetime=20180605T07:00-0000&enddatetime=20180706T07:00-0000
6 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&

In [52]:
all_df.to_csv('historical_caiso_actual_load.csv')

try for renewables 

In [36]:
url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&market_run_id=DAM&startdatetime=20130919T07:00-0000&enddatetime=20130920T07:00-0000&version=1'

url = 'http://oasis.caiso.com/oasisapi/SingleZip?queryname=SLD_REN_FCST&market_run_id=RTPD&startdatetime=20130919T07:00-0000&enddatetime=20130920T07:00-0000&version=1'

start_date = datetime.datetime(2018,1,1)
end_date = start_date + datetime.timedelta(days=31)
start_date = _get_UTC_string(start_date)
end_date = _get_UTC_string(end_date)
url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime={}&enddatetime={}'
url = url.format(start_date, end_date)

r = requests.get(url)
df = caiso_call_for_df(url)


In [28]:
r = requests.get(url)
with ZipFile(BytesIO(r.content)) as zipedfolder:
    with zipedfolder.open(zipedfolder.namelist()[0]) as file:
        bytes_payload = file.read()
        

In [29]:
bytes_payload

b'OPR_DT,OPR_HR,OPR_INTERVAL,INTERVALSTARTTIME_GMT,INTERVALENDTIME_GMT,TRADING_HUB,RENEWABLE_TYPE,LABEL,XML_DATA_ITEM,MARKET_RUN_ID_POS,RENEW_POS,MW,MARKET_RUN_ID,GROUP\n2018-01-01,24,4,2018-01-02T07:15:00-00:00,2018-01-02T07:20:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0,RTD,1\n2018-01-01,24,5,2018-01-02T07:20:00-00:00,2018-01-02T07:25:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0,RTD,1\n2018-01-01,24,6,2018-01-02T07:25:00-00:00,2018-01-02T07:30:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0,RTD,1\n2018-01-01,24,7,2018-01-02T07:30:00-00:00,2018-01-02T07:35:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0,RTD,1\n2018-01-01,24,8,2018-01-02T07:35:00-00:00,2018-01-02T07:40:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0,RTD,1\n2018-01-01,24,9,2018-01-02T07:40:00-00:00,2018-01-02T07:45:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0,RTD,1\n2018-01-01,24,10,2018-01-0

In [37]:
df

Unnamed: 0,OPR_DT,OPR_HR,OPR_INTERVAL,INTERVALSTARTTIME_GMT,INTERVALENDTIME_GMT,TRADING_HUB,RENEWABLE_TYPE,LABEL,XML_DATA_ITEM,MARKET_RUN_ID_POS,RENEW_POS,MW,MARKET_RUN_ID,GROUP
0,2018-01-01,24,4,2018-01-02T07:15:00-00:00,2018-01-02T07:20:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
1,2018-01-01,24,5,2018-01-02T07:20:00-00:00,2018-01-02T07:25:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
2,2018-01-01,24,6,2018-01-02T07:25:00-00:00,2018-01-02T07:30:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
3,2018-01-01,24,7,2018-01-02T07:30:00-00:00,2018-01-02T07:35:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
4,2018-01-01,24,8,2018-01-02T07:35:00-00:00,2018-01-02T07:40:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59400,2018-01-31,18,2,2018-02-01T01:15:00-00:00,2018-02-01T01:30:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,12.36,RTPD,310
59401,2018-01-31,17,1,2018-02-01T00:00:00-00:00,2018-02-01T00:15:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,616.81,RTPD,310
59402,2018-01-31,17,2,2018-02-01T00:15:00-00:00,2018-02-01T00:30:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,439.05,RTPD,310
59403,2018-01-31,17,3,2018-02-01T00:30:00-00:00,2018-02-01T00:45:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,275.03,RTPD,310


In [None]:
url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime={}&enddatetime={}'


In [39]:
print('starting process')
start_date = datetime.datetime(2018,1,1)
all_df = pd.DataFrame({})
i = 0
while start_date < datetime.datetime(2020,3,1):
    end_date = start_date + datetime.timedelta(days=31)
    url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime={}&enddatetime={}'
    url = url.format(_get_UTC_string(start_date), _get_UTC_string(end_date))
    print(i, url)
    df = caiso_call_for_df(url)
    
    if all_df.empty:
        all_df = df
    else:
        all_df = pd.concat([all_df, df], axis=0)
    start_date = end_date
    i += 1
print(start_date)
print(url)

starting process
0 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime=20180101T08:00-0000&enddatetime=20180201T08:00-0000
1 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime=20180201T08:00-0000&enddatetime=20180304T08:00-0000
2 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime=20180304T08:00-0000&enddatetime=20180404T07:00-0000
3 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime=20180404T07:00-0000&enddatetime=20180505T07:00-0000
4 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime=20180505T07:00-0000&enddatetime=20180605T07:00-0000
5 http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=SLD_REN_FCST&version=1&startdatetime=20180605T07:00-0000&enddatetime=20180706T07:00-0000
6 http://oasis.caiso.com/oasisapi/S

In [40]:
all_df

Unnamed: 0,OPR_DT,OPR_HR,OPR_INTERVAL,INTERVALSTARTTIME_GMT,INTERVALENDTIME_GMT,TRADING_HUB,RENEWABLE_TYPE,LABEL,XML_DATA_ITEM,MARKET_RUN_ID_POS,RENEW_POS,MW,MARKET_RUN_ID,GROUP
0,2018-01-01,24,4,2018-01-02T07:15:00-00:00,2018-01-02T07:20:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
1,2018-01-01,24,5,2018-01-02T07:20:00-00:00,2018-01-02T07:25:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
2,2018-01-01,24,6,2018-01-02T07:25:00-00:00,2018-01-02T07:30:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
3,2018-01-01,24,7,2018-01-02T07:30:00-00:00,2018-01-02T07:35:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
4,2018-01-01,24,8,2018-01-02T07:35:00-00:00,2018-01-02T07:40:00-00:00,NP15,Solar,Renewable Forecast RTD,RENEW_FCST_5MIN_MW,5,2,0.00,RTD,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59310,2020-03-16,17,2,2020-03-16T23:15:00-00:00,2020-03-16T23:30:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,263.99,RTPD,310
59311,2020-03-16,17,3,2020-03-16T23:30:00-00:00,2020-03-16T23:45:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,271.89,RTPD,310
59312,2020-03-16,17,4,2020-03-16T23:45:00-00:00,2020-03-17T00:00:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,297.19,RTPD,310
59313,2020-03-16,16,3,2020-03-16T22:30:00-00:00,2020-03-16T22:45:00-00:00,ZP26,Solar,Renewable Forecast RTPD,RENEW_FCST_15MIN_MW,4,2,274.39,RTPD,310


In [None]:
all_df.to_csv('historical_renewables.csv')

In [43]:
url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=PRC_FUEL&fuel_region_id=ALL&startdatetime=20130919T07:00-0000&enddatetime=20130920T07:00-0000&version=1'
df = caiso_call_for_df(url)

In [47]:
df.FUEL_REGION_ID.unique()
df.columns
df

Unnamed: 0,INTERVALSTARTTIME_GMT,INTERVALENDTIME_GMT,OPR_DT,OPR_HR,FUEL_REGION_ID_XML,FUEL_REGION_ID,PRC,GROUP
0,2013-09-19T07:00:00-00:00,2013-09-19T08:00:00-00:00,2013-09-19,1,CISO,CISO,4.54,1
1,2013-09-20T06:00:00-00:00,2013-09-20T07:00:00-00:00,2013-09-19,24,CISO,CISO,4.54,1
2,2013-09-19T09:00:00-00:00,2013-09-19T10:00:00-00:00,2013-09-19,3,CISO,CISO,4.54,1
3,2013-09-19T10:00:00-00:00,2013-09-19T11:00:00-00:00,2013-09-19,4,CISO,CISO,4.54,1
4,2013-09-19T11:00:00-00:00,2013-09-19T12:00:00-00:00,2013-09-19,5,CISO,CISO,4.54,1
...,...,...,...,...,...,...,...,...
139,2013-09-19T11:00:00-00:00,2013-09-19T12:00:00-00:00,2013-09-19,5,SDG2,SDG2,4.20,6
140,2013-09-19T10:00:00-00:00,2013-09-19T11:00:00-00:00,2013-09-19,4,SDG2,SDG2,4.20,6
141,2013-09-19T09:00:00-00:00,2013-09-19T10:00:00-00:00,2013-09-19,3,SDG2,SDG2,4.20,6
142,2013-09-19T07:00:00-00:00,2013-09-19T08:00:00-00:00,2013-09-19,1,SDG2,SDG2,4.20,6


In [48]:
url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=ENE_SLRS&market_run_id=DAM&tac_zone_name=ALL&schedule=ALL&startdatetime=20130919T07:00-0000&enddatetime=20130920T07:00-0000&version=1'
df = caiso_call_for_df(url)

In [50]:
df

Unnamed: 0,INTERVALSTARTTIME_GMT,INTERVALENDTIME_GMT,SLRS_TYPE,OPR_DT,OPR_HR,OPR_INTERVAL,MARKET_RUN_ID,TAC_ZONE_NAME,SCHEDULE,XML_DATA_ITEM,POS,MW,GROUP
0,2013-09-19T11:00:00-00:00,2013-09-19T12:00:00-00:00,ALL,2013-09-19,5,0,DAM,Caiso_Totals,Export,ISO_TOT_EXP_MW,1,631.00,1
1,2013-09-19T12:00:00-00:00,2013-09-19T13:00:00-00:00,ALL,2013-09-19,6,0,DAM,Caiso_Totals,Export,ISO_TOT_EXP_MW,1,631.00,1
2,2013-09-20T06:00:00-00:00,2013-09-20T07:00:00-00:00,ALL,2013-09-19,24,0,DAM,Caiso_Totals,Export,ISO_TOT_EXP_MW,1,645.00,1
3,2013-09-19T19:00:00-00:00,2013-09-19T20:00:00-00:00,ALL,2013-09-19,13,0,DAM,Caiso_Totals,Export,ISO_TOT_EXP_MW,1,833.00,1
4,2013-09-19T14:00:00-00:00,2013-09-19T15:00:00-00:00,ALL,2013-09-19,8,0,DAM,Caiso_Totals,Export,ISO_TOT_EXP_MW,1,784.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
403,2013-09-19T08:00:00-00:00,2013-09-19T09:00:00-00:00,LOAD,2013-09-19,2,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,1947.61,17
404,2013-09-19T10:00:00-00:00,2013-09-19T11:00:00-00:00,LOAD,2013-09-19,4,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,1867.89,17
405,2013-09-19T15:00:00-00:00,2013-09-19T16:00:00-00:00,LOAD,2013-09-19,9,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,2505.09,17
406,2013-09-19T17:00:00-00:00,2013-09-19T18:00:00-00:00,LOAD,2013-09-19,11,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,2752.19,17


In [2]:
from io import BytesIO
from zipfile import ZipFile
import datetime
import requests
import pandas as pd
energy_url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=ENE_SLRS&market_run_id=DAM&tac_zone_name=ALL&schedule=ALL&startdatetime=20180101T07:00-0000&enddatetime=20180131T07:00-0000&version=1'
# https://stackoverflow.com/questions/5710867/downloading-and-unzipping-a-zip-file-without-writing-to-disk
r = requests.get(energy_url)
with ZipFile(BytesIO(r.content)) as zipedfolder:
    with zipedfolder.open(zipedfolder.namelist()[0]) as file:
        bytes_payload = file.read()

raw = pd.read_csv(BytesIO(bytes_payload))

In [3]:
raw

Unnamed: 0,INTERVALSTARTTIME_GMT,INTERVALENDTIME_GMT,SLRS_TYPE,OPR_DT,OPR_HR,OPR_INTERVAL,MARKET_RUN_ID,TAC_ZONE_NAME,SCHEDULE,XML_DATA_ITEM,POS,MW,GROUP
0,2018-01-01T07:00:00-00:00,2018-01-01T08:00:00-00:00,ALL,2017-12-31,24,0,DAM,Caiso_Totals,Export,ISO_TOT_EXP_MW,1,1519.00,1
1,2018-01-01T07:00:00-00:00,2018-01-01T08:00:00-00:00,ALL,2017-12-31,24,0,DAM,Caiso_Totals,Generation,ISO_TOT_GEN_MW,3,13874.24,2
2,2018-01-01T07:00:00-00:00,2018-01-01T08:00:00-00:00,ALL,2017-12-31,24,0,DAM,Caiso_Totals,Import,ISO_TOT_IMP_MW,2,8729.20,3
3,2018-01-01T07:00:00-00:00,2018-01-01T08:00:00-00:00,ALL,2017-12-31,24,0,DAM,Caiso_Totals,Load,ISO_TOT_LOAD_MW,4,20814.41,4
4,2018-01-01T07:00:00-00:00,2018-01-01T08:00:00-00:00,ETIE,2017-12-31,24,0,DAM,TAC_ECNTR,Export,TOT_EXP_MW,5,273.00,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12907,2018-01-30T13:00:00-00:00,2018-01-30T14:00:00-00:00,LOAD,2018-01-30,6,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,1898.54,556
12908,2018-01-30T15:00:00-00:00,2018-01-30T16:00:00-00:00,LOAD,2018-01-30,8,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,2209.00,556
12909,2018-01-30T21:00:00-00:00,2018-01-30T22:00:00-00:00,LOAD,2018-01-30,14,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,2159.80,556
12910,2018-01-30T23:00:00-00:00,2018-01-31T00:00:00-00:00,LOAD,2018-01-30,16,0,DAM,TAC_SOUTH,Load,TOT_LOAD_MW,8,2378.84,556


In [6]:
from io import BytesIO
from zipfile import ZipFile
import datetime
import requests
import pandas as pd
renewable_url = 'http://oasis.caiso.com/oasisapi/SingleZip?resultformat=6&queryname=ENE_SLRS&market_run_id=DAM&tac_zone_name=ALL&schedule=ALL&startdatetime=20180919T07:00-0000&enddatetime=20180920T07:00-0000&version=1'
# https://stackoverflow.com/questions/5710867/downloading-and-unzipping-a-zip-file-without-writing-to-disk
r = requests.get(renewable_url)
with ZipFile(BytesIO(r.content)) as zipedfolder:
    with zipedfolder.open(zipedfolder.namelist()[0]) as file:
        bytes_payload = file.read()

raw = pd.read_csv(BytesIO(bytes_payload))

In [8]:
raw

array(['Caiso_Totals', 'NONTAC', 'TAC_ECNTR', 'TAC_NCNTR', 'TAC_NORTH',
       'TAC_SOUTH'], dtype=object)