In [2]:
from usagrid import s3
import pandas as pd

# Load data from s3

files = s3.list_files_in_folder("usagrid","landingarea/balancing_authority")[1:]

dfs = [s3.read_pyarrow_df_from_s3("usagrid",f).to_pandas() for f in files[:]]


df = pd.concat(dfs)

In [None]:
df.head()

In [3]:
def set_dtypes(df:pd.DataFrame):

    data = df.copy()

    dtypes_dict = {"period":"datetime64[ns]",
    "respondent":"object",
    "respondent-name":"object",
    "type":"object",
    "type-name":"object",
    "timezone":"object",
    "timezone-description":"object",
    "value":"float64",
    "value-units":"object"}


    for k, v in dtypes_dict.items():

        data[k] = data[k].astype(v)

    return data


def clean_columns(df:pd.DataFrame):

    df.columns = [col.replace("-","_") for col in df]

    return df




In [8]:
from pathlib import Path
import itertools
from tqdm import tqdm

#Apply transformations
df_mod = df.pipe(set_dtypes).pipe(clean_columns).set_index("period").sort_index()

df_mod = df_mod.assign(year=df_mod.index.year.astype("str"))


#Create unique grouping combinations
combos = [df_mod.timezone.unique(),df_mod.type_name.unique(),df_mod.year.unique()]

combos = itertools.product(*combos)


#Iterate over all combinations
pbar = tqdm(list(combos)[:1],leave=True)

for combo in pbar:

    pbar.set_description(" ".join(combo))

    groups = df_mod.groupby(["timezone","type_name","year"])


    df_write = groups.get_group((combo[0],combo[1],combo[2])).drop(columns="year")


    write_path = (str(Path("bronze")
                    .joinpath("balancing_authority")
                    .joinpath(combo[0])
                    .joinpath(combo[1].replace(" ","_").replace("-","_") )
                    .joinpath(combo[1].replace(" ","_").replace("-","_") + "_" + str(combo[2]))
                    .with_suffix(".arrow")))

    s3.write_data_to_s3_pyarrow(bucket_name="usagrid",object_key=write_path,data=df_write)



Central Day-ahead demand forecast 2022: 100%|██████████| 1/1 [00:01<00:00,  1.25s/it]


In [9]:
df_write

Unnamed: 0_level_0,respondent,respondent_name,type,type_name,timezone,timezone_description,value,value_units
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-01-01,MISO,"Midcontinent Independent System Operator, Inc.",DF,Day-ahead demand forecast,Central,Central,1671693.0,megawatthours
2022-01-01,WALC,Western Area Power Administration - Desert Sou...,DF,Day-ahead demand forecast,Central,Central,20011.0,megawatthours
2022-01-01,SE,Southeast,DF,Day-ahead demand forecast,Central,Central,529744.0,megawatthours
2022-01-01,CPLW,Duke Energy Progress West,DF,Day-ahead demand forecast,Central,Central,11263.0,megawatthours
2022-01-01,TIDC,Turlock Irrigation District,DF,Day-ahead demand forecast,Central,Central,6904.0,megawatthours
...,...,...,...,...,...,...,...,...
2022-12-31,SPA,Southwestern Power Administration,DF,Day-ahead demand forecast,Central,Central,1384.0,megawatthours
2022-12-31,SCEG,"Dominion Energy South Carolina, Inc.",DF,Day-ahead demand forecast,Central,Central,52403.0,megawatthours
2022-12-31,TVA,Tennessee Valley Authority,DF,Day-ahead demand forecast,Central,Central,360407.0,megawatthours
2022-12-31,LDWP,Los Angeles Department of Water and Power,DF,Day-ahead demand forecast,Central,Central,59906.0,megawatthours


In [20]:
write_path

'bronze/balancing_authority/Central/Day_ahead_demand_forecast_2024.arrow'

In [17]:
df_write

Unnamed: 0_level_0,respondent,respondent_name,type,type_name,timezone,timezone_description,value,value_units
period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-01-01,TAL,City of Tallahassee,DF,Day-ahead demand forecast,Central,Central,6750.0,megawatthours
2024-01-01,US48,United States Lower 48,DF,Day-ahead demand forecast,Central,Central,10604160.0,megawatthours
2024-01-01,GVL,Gainesville Regional Utilities,DF,Day-ahead demand forecast,Central,Central,4468.0,megawatthours
2024-01-01,PJM,"PJM Interconnection, LLC",DF,Day-ahead demand forecast,Central,Central,2174759.0,megawatthours
2024-01-01,CAR,Carolinas,DF,Day-ahead demand forecast,Central,Central,612626.0,megawatthours
...,...,...,...,...,...,...,...,...
2024-03-05,IPCO,Idaho Power Company,DF,Day-ahead demand forecast,Central,Central,49565.0,megawatthours
2024-03-05,CISO,California Independent System Operator,DF,Day-ahead demand forecast,Central,Central,521052.0,megawatthours
2024-03-05,PSEI,"Puget Sound Energy, Inc.",DF,Day-ahead demand forecast,Central,Central,61271.0,megawatthours
2024-03-05,TPWR,"City of Tacoma, Department of Public Utilities...",DF,Day-ahead demand forecast,Central,Central,16192.0,megawatthours
