In [1]:
from rfo_core.aws.iam import ensure_glue_service_role_exists, resolve_role_arn, get_aws_session
from rfo_core.aws.s3 import ensure_s3_bucket_exists
from rfo_core.aws.s3 import get_bucket_name, create_s3_subfolders
from rfo_core.configuration import (
    aws_key, aws_secret, aws_service_role_name,
    aws_region_default, aws_default_sync_mode, aws_versioning_on
)

In [21]:
import awswrangler as wr
import boto3 as bt

In [22]:
def aws_get_session(region: str = aws_region_default) -> bt.Session:
    return get_aws_session(aws_key=aws_key, aws_secret=aws_secret, aws_region=region)

In [4]:
session = aws_get_session(region='us-east-1')

In [5]:
table="rfo_load_enriched"

In [8]:
#print the last 'datetime' in the table
sql = "SELECT MAX(CAST(datetime AS DATE)) as last_datetime FROM "+table
last_datetime = wr.athena.read_sql_query(
    sql=sql,
    database="rfo_analytics",
    boto3_session=session
)
print("Last datetime in the table:", last_datetime['last_datetime'].iloc[0])

Last datetime in the table: 2025-09-23


In [None]:
#print unique datatypes
# checking all unique datatypes in the table
sql = "SELECT DISTINCT datatype FROM rfo_load_enriched"
unique_datatypes = wr.athena.read_sql_query(
    sql=sql,
    database="rfo_analytics",
    boto3_session=session
)
print(unique_datatypes['datatype'].tolist())

['NET_LOAD_FORECAST_CURRENT', 'RA_CAP_PLUS_RA_CREDITS', 'ONTARIO_DEMAND', 'NET_LOAD', 'DA_VIRT_LOAD', 'DA_DEMAND_FORECAST', 'LOAD_FORECAST', 'HA_LOAD_FORECAST', 'NE_PEAK_LOAD', 'EIA930_DEMAND', 'RTLOAD', 'NET_DA_FC_PLUS_RESERVES', 'RTLOAD_FINAL', 'QUEBEC_DEMAND', 'DALOAD', 'DA_FC_PLUS_RESERVES', 'ORIGINAL_LOAD_FORECAST', 'NE_LOAD_AND_RSV', 'DA_VIRT_SUPP', 'NET_LOAD_FORECAST_BID_CLOSE', 'NET_RA_CAP_PLUS_RA_CREDITS']


In [16]:
# checking all unique names in the table for datatype='EIA930_DEMAND'
sql = "SELECT DISTINCT name FROM rfo_load_enriched WHERE datatype='EIA930_DEMAND'"
unique_names = wr.athena.read_sql_query(
    sql=sql,
    database="rfo_analytics",
    boto3_session=session
)
print(sorted(unique_names['name'].tolist()))

['Arizona Public Service Company', 'Associated Electric Cooperative, Inc.', 'Avista Corporation', 'BPA', 'Balancing Authority of Northern California', 'CAISO', 'California', 'Carolinas', 'Central', 'City of Tacoma, Department of Public Utilities', 'Duke Energy Carolinas', 'Duke Energy Florida, Inc.', 'Duke Energy Progress East', 'Duke Energy Progress West', 'El Paso Electric Company', 'Electric Energy, Inc.', 'Florida', 'Florida Municipal Power Pool', 'Florida Power & Light Co.', 'Gainesville Regional Utilities', 'Homestead, City of', 'Idaho Power Company', 'Imperial Irrigation District', 'JEA', 'LG&E and KU Services Company', 'Los Angeles Department of Water and Power', 'Mid-Atlantic', 'Midwest ISO', 'Midwest Region', 'NE-ISO Total', 'NYISO', 'NaturEner Wind Watch, LLC', 'Nevada Power Company', 'New England', 'New Smyrna Beach, Utilities Commission of', 'New York', 'NorthWestern Corporation', 'Northwest', 'Ohio Valley Electric Corporation', 'PUD No. 1 of Douglas County', 'PUD No. 2 of

In [9]:
#for importing load data for MIDC
names = ('AVRN',
'Avista Corporation',
'BPA',
'City of Tacoma, Department of Public Utilities',
 'Idaho Power Company',
'PUD No. 1 of Douglas County',
 'PUD No. 2 of Grant County, Washington',
'PacifiCorp West',
 'Portland General Electric Company',
'Puget Sound',
 'Puget Sound Energy, Inc.',
'Seattle City Light',
)

In [10]:
# conditions='''WHERE region IN ('CAISO', 'NWPP', 'SOUTHWEST')
#   AND datatype = 'temperature'
#   AND CAST(datetime AS DATE) BETWEEN DATE '2016-01-01' AND DATE '2025-12-31' '''
conditions=f'''WHERE name IN {names}
    AND datatype = 'RTLOAD'
    AND CAST(datetime AS DATE) BETWEEN DATE '2021-09-01' AND DATE '2025-12-31' '''
# conditions=''

In [17]:
#To query all columns
# table_info = wr.catalog.table(
#     database="rfo_analytics", 
#     table=table, 
#     boto3_session=session
# )
# columns = table_info['Column Name'].values

#If you know which columns to query
# columns = ["datetime","datatype","avgvalue","siteid","name","station_name","state","region","location","timezone"] # for weather
columns = ["datetime","name","avgvalue","objectid","yes_objectid","iso","timezone"]
# Build SELECT clause with automatic datetime casting
select_parts = []
for col_name in columns:
    if col_name.lower() == 'datetime':
        select_parts.append(f"CAST({col_name} AS timestamp) as {col_name}")
    else:
        select_parts.append(col_name)

select_clause = ",\n    ".join(select_parts)

# Build and execute query
sql = f"""SELECT 
    {select_clause}
FROM {table}
{conditions}
"""

df = wr.athena.read_sql_query(
    sql=sql,
    database="rfo_analytics",
    boto3_session=session
)

In [19]:
#save df a csv file
df.to_csv("../data/RTLoad_MIDC+NW_1Sep21-23Sep25.csv", index=False)

In [20]:
df.head()

Unnamed: 0,datetime,name,avgvalue,objectid,yes_objectid,iso,timezone
0,2021-10-13 15:00:00,BPA,6994.75,L000060,10001845403,CAISO,America/Los_Angeles
1,2021-10-13 16:00:00,BPA,7058.5,L000060,10001845403,CAISO,America/Los_Angeles
2,2021-10-13 17:00:00,BPA,6884.166667,L000060,10001845403,CAISO,America/Los_Angeles
3,2021-10-13 18:00:00,BPA,6736.5,L000060,10001845403,CAISO,America/Los_Angeles
4,2021-10-13 19:00:00,BPA,6573.166667,L000060,10001845403,CAISO,America/Los_Angeles


In [16]:
pd.to_datetime(df['datetime']).max()

NameError: name 'pd' is not defined

In [13]:
df[df['name']=='Puget Sound'].set_index('datetime').sort_index()

Unnamed: 0_level_0,avgvalue,objectid,name,yes_objectid,iso
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-01-01 01:00:00,,L000117,Puget Sound,10001865762,CAISO
2012-01-01 02:00:00,,L000117,Puget Sound,10001865762,CAISO
2012-01-01 03:00:00,,L000117,Puget Sound,10001865762,CAISO
2012-01-01 04:00:00,,L000117,Puget Sound,10001865762,CAISO
2012-01-01 05:00:00,,L000117,Puget Sound,10001865762,CAISO
...,...,...,...,...,...
2013-12-31 20:00:00,,L000117,Puget Sound,10001865762,CAISO
2013-12-31 21:00:00,,L000117,Puget Sound,10001865762,CAISO
2013-12-31 22:00:00,,L000117,Puget Sound,10001865762,CAISO
2013-12-31 23:00:00,,L000117,Puget Sound,10001865762,CAISO
