In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from cdec_maps import cdec
import pandas as pd

In [None]:
station_id='LIS'

In [None]:
r = cdec.Reader()

In [None]:
dflist=r.read_station_meta_info('LIS')

dfs=dflist[1]
dfs.head(3)

In [None]:
DURATION_MAP = {'(event)': 'E', '(daily)': 'D',
                '(monthly)': 'M', '(hourly)': 'H'}
DURATION_MAP_INVERTED = { DURATION_MAP[k]: k for k in DURATION_MAP.keys() }

def get_duration_code(duration):
    return DURATION_MAP[duration]


def to_date_format(str):
    try:
        return pd.to_datetime(str).strftime('%Y-%m-%d')
    except:
        return ''

###
def read_station_data_for_sensor_row(station_id, df_sensor, row_index):
    sensor_row = df_sensor.iloc[row_index]
    snum = sensor_row['Sensor Number']
    dcode = sensor_row['Duration']
    sdate, edate = tuple([s.strip()
                         for s in sensor_row['Data Available'].split('to')])
    df = r.read_station_data(station_id, snum, get_duration_code(dcode), to_date_format(sdate), to_date_format(edate))
    return df

In [None]:
dfs[(dfs['Sensor Number'] == 20) & (dfs['Duration'] == DURATION_MAP_INVERTED['E'])].iloc[0]

In [None]:
read_station_data_for_sensor_row('LIS', dfs, 0)

In [None]:
df1=r.read_station_data('LIS',28,'E','2021-10-1','2021-10-15')
df1

In [None]:
df2=r.read_station_data('LIS',28,'E','2021-10-1','2021-10-30')
df2

In [None]:
df1.combine_first(df2)

In [None]:
df2.combine_first(df1)

In [None]:
df1.index[-1].strftime('%Y-%m-%d+%H:%M')

In [None]:
dfs

In [None]:
def test_cache_function(): 
    for row_index in dfs.index:
        row=dfs.iloc[row_index]
        sensor_number=row['Sensor Number']
        duration=get_duration_code(row['Duration'])
        #
        print(f'Reading {station_id}_{sensor_number}_{duration}')
        df=read_station_data_for_sensor_row('LIS', dfs, row_index)
        df.to_pickle(f'cdec_cache/{station_id}_{sensor_number}_{duration}.pkl')

In [None]:
import dask

In [None]:
from dask.distributed import Client
client = Client()  # start distributed scheduler locally.  Launch dashboard

In [None]:
def _before_caching_cache_station_data(station_id):
    dflist=r.read_station_meta_info(station_id)
    dfs=dflist[1]
    display(dfs)
    for row_index in dfs.index:
        row=dfs.iloc[row_index]
        sensor_number=row['Sensor Number']
        duration=get_duration_code(row['Duration'])
        #
        print(f'Reading {station_id}_{sensor_number}_{duration}')
        df=read_station_data_for_sensor_row('LIS', dfs, row_index)
        df.to_pickle(f'cdec_cache/{station_id}_{sensor_number}_{duration}.pkl')
        
def cache_station_data(station_id): 
    dflist=r.read_station_meta_info(station_id)
    dfs=dflist[1]
    display(dfs)
    for row_index in dfs.index:
        row=dfs.iloc[row_index]
        sensor_number=row['Sensor Number']
        duration=get_duration_code(row['Duration'])
        start_date=pd.Timestamp.now() - pd.Timedelta('1D')
        #
        print(f'Reading {station_id}_{sensor_number}_{duration}')
        df=r.read_station_data(station_id,sensor_number, duration, start_date.strftime('%Y-%m-%d'), '')
    

In [None]:
cache_station_data('FPT')

In [None]:
cache_station_data('EMM')

In [None]:
cache_station_data('NSL')

In [None]:
cache_station_data('MRZ')

In [None]:
%timeit pd.read_pickle('cdec_cache/FPT_1_E.pkl')

In [None]:
%timeit pd.read_pickle('cdec_cache/MRZ_1_E.pkl')

In [None]:
ids=['ANH','CLL','DTO','PCT','MAL','PTS']
for id in ids:
    cache_station_data(id)

In [None]:
ids=['GYS','SNC','VOL','BDL','TMS','TSL','RIV','BLP','JER','CNT','INB','FAL','DSJ','FRP','DRO','FCT']
for id in ids:
    cache_station_data(id)

In [None]:
ids=['BET','SAL','STI','OSJ','PPT','HOL','HLL','OMR','MDM','ECD','IDB','VIC','VCU','CCW','BBI','BAP']
for id in ids:
    cache_station_data(id)

In [None]:
ids=['MTB','GLC','ODM','OLD','UNI','OH1','SJL','MSD','BDT','HRO','HBP','DMC','TRP','RRI','SFS']
for id in ids:
    cache_station_data(id)

In [None]:
ids=['SOI','SXS','MIR','BXS','GES','NMR','SRH']
for id in ids:
    cache_station_data(id)

In [None]:
ids_downloaded = ['MRZ', 'NSL', 'EMM', 'FPT'] + \
    ['ANH', 'CLL', 'DTO', 'PCT', 'MAL', 'PTS'] + \
    ['BET', 'SAL', 'STI', 'OSJ', 'PPT', 'HOL', 'HLL', 'OMR', 'MDM', 'ECD', 'IDB', 'VIC', 'VCU', 'CCW', 'BBI', 'BAP'] + \
    ['GYS', 'SNC', 'VOL', 'BDL', 'TMS', 'TSL', 'RIV', 'BLP', 'JER', 'CNT', 'INB', 'FAL', 'DSJ', 'FRP', 'DRO', 'FCT'] + \
    ['MTB', 'GLC', 'ODM', 'OLD', 'UNI', 'OH1', 'SJL', 'MSD', 'BDT', 'HRO', 'HBP', 'DMC', 'TRP', 'RRI', 'SFS'] + \
    ['SOI', 'SXS', 'MIR', 'BXS', 'GES', 'NMR', 'SRH']

In [None]:
len(ids_downloaded)

In [None]:
ids_realtime=['BIF', 'BXP', 'BTD', 'RYF', 'RYI', 'CVP', 'KA0', 'CSE', 'DLC', 'DBD', 'DBI', 'DGL', 'DAR', 'FRK', 'GGS', 'GSM', 'GSS', 'GLE', 'GCT', 'ISD', 'ISH', 'JTR', 'LIR']

In [None]:
for id in ids_realtime:
    cache_station_data(id)

In [None]:
ids_realtime2 =['LIY', 'LIB', 'LPS', 'MBW', 'MDA', 'BIR', 'MAB', 'MHR', 'MRU', 'MUP', 'HLT', 'MHO', 'MRX', 'MFV', 'HWB', 'BEN', 'MOK', 'MKN', 'ORX', 'ORB', 'ORI', 'CIS', 'OH4', 'ORQ', 'TWA', 'OAD', 'OBD', 'ORM', 'PDC', 'PCO', 'RSL', 'DWS', 'SDC', 'SWE', 'WGS', 'SSI', 'SDI', 'SRV', 'PRI', 'RGP', 'SJD', 'SJG', 'TWI', 'VNI', 'SJC', 'SGG', 'SMR', 'SUS', 'SSS', 'SDO', 'SGA', 'SUR', 'SUT', 'SR3', 'TPI', 'TPS', 'TPP', 'TRN', 'ULC', 'UCS', 'WCI', 'LIS']

In [None]:
for id in ids_realtime2:
    cache_station_data(id)