# Retrieve generation data from AEMO

In [1]:
import nemosis, pandas as pd, numpy
from nemosis import static_table, dynamic_data_compiler
from pathlib import Path

### Dispatch table

Should only need to do this once

In [2]:
# # Downloading the Generator and Load Dispatch table from NEMOSIS for a specified time period
# dynamic_data_compiler(
#     start_time='2023/12/01 00:00:00',
#     end_time='2024/01/01 00:10:00',
#     table_name='DISPATCHLOAD',
#     raw_data_location=raw_NEM_cache
# )

### Static table of generator IDs

In [3]:
raw_NEM_cache = "/scratch/w42/dr6273/raw_AEMO_cache"

In [4]:
# Downloading the Generators and Scheduled Loads table from NEMOSIS
NEM_gen = static_table(
    table_name="Generators and Scheduled Loads", 
    raw_data_location=raw_NEM_cache,
    update_static_file=True
)
                              
#  Selecting only generating units, not loads or batteries
NEM_gen = NEM_gen.loc[NEM_gen['Dispatch Type'] == "Generating Unit"]

INFO: Retrieving static table Generators and Scheduled Loads
INFO: Downloading data for table Generators and Scheduled Loads


In [5]:
NEM_gen.head()

Unnamed: 0,Participant,Station Name,Region,Dispatch Type,Category,Classification,Fuel Source - Primary,Fuel Source - Descriptor,Technology Type - Primary,Technology Type - Descriptor,Aggregation,DUID,Reg Cap generation (MW),Max Cap generation (MW)
1,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Scheduled,Battery storage,Grid,Storage,Battery and Inverter,Y,ADPBA1G,7.76,6.15
3,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Non-Scheduled,Hydro,Water,Renewable,Run of River,Y,ADPMH1,1.44,1.0
4,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Semi-Scheduled,Solar,Solar,Renewable,Photovoltaic Tracking Flat panel,Y,ADPPV1,24.75,19.0
5,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Non-Scheduled,Solar,Solar,Renewable,Photovoltaic Flat panel,Y,ADPPV2,0.2,0.2
6,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Non-Scheduled,Solar,Solar,Renewable,Photovoltaic Flat panel,Y,ADPPV3,0.02,0.02


In [6]:
x = NEM_gen[NEM_gen["Fuel Source - Primary"] == "Wind"]["Max Cap generation (MW)"]

In [7]:
sum([float(i) for i in x])

13857.000000000002

In [None]:
y = NEM_gen[NEM_gen["Fuel Source - Primary"] == "Solar"]["Max Cap generation (MW)"]

In [None]:
sum([float(i) for i in y])

## Generation data

### Download data

In [4]:
# Downloading the Generator and Load Dispatch table from NEMOSIS for a specified time period
dynamic_data_compiler(
    start_time='2024/08/01 00:00:00',
    end_time='2025/06/30 23:55:00',
    table_name='DISPATCHLOAD',
    raw_data_location=raw_NEM_cache
)

INFO: Compiling data for table DISPATCHLOAD
INFO: Downloading data for table DISPATCHLOAD, year 2024, month 08
INFO: Creating feather file for DISPATCHLOAD, 2024, 08
INFO: Downloading data for table DISPATCHLOAD, year 2024, month 09
INFO: Creating feather file for DISPATCHLOAD, 2024, 09
INFO: Downloading data for table DISPATCHLOAD, year 2024, month 10
INFO: Creating feather file for DISPATCHLOAD, 2024, 10
INFO: Downloading data for table DISPATCHLOAD, year 2024, month 11
INFO: Creating feather file for DISPATCHLOAD, 2024, 11
INFO: Downloading data for table DISPATCHLOAD, year 2024, month 12
INFO: Creating feather file for DISPATCHLOAD, 2024, 12
INFO: Downloading data for table DISPATCHLOAD, year 2025, month 01
INFO: Creating feather file for DISPATCHLOAD, 2025, 01
INFO: Downloading data for table DISPATCHLOAD, year 2025, month 02
INFO: Creating feather file for DISPATCHLOAD, 2025, 02
INFO: Downloading data for table DISPATCHLOAD, year 2025, month 03
INFO: Creating feather file for DIS

Unnamed: 0,SETTLEMENTDATE,DUID,INTERVENTION,DISPATCHMODE,AGCSTATUS,INITIALMW,TOTALCLEARED,RAMPDOWNRATE,RAMPUPRATE,LOWER5MIN,...,LOWERREG,RAISEREG,AVAILABILITY,RAISEREGENABLEMENTMAX,RAISEREGENABLEMENTMIN,LOWERREGENABLEMENTMAX,LOWERREGENABLEMENTMIN,SEMIDISPATCHCAP,LOWER1SEC,RAISE1SEC
0,2024-08-01 00:05:00,ADPBA1G,0,0,1,0.00000,0.00000,93.12,93.12,0.0,...,0.0,0.0,6.00000,6.00,0.0,6.00,0.0,0,0.0,0.0
1,2024-08-01 00:05:00,ADPBA1L,0,0,1,2.90700,0.00000,93.12,93.12,2.0,...,0.0,0.0,6.00000,6.00,0.0,6.00,0.0,0,0.0,0.0
2,2024-08-01 00:05:00,ADPPV1,0,0,0,0.00000,0.00000,120.00,120.00,0.0,...,0.0,0.0,0.00000,0.00,0.0,0.00,0.0,0,0.0,0.0
3,2024-08-01 00:05:00,AGLHAL,0,0,0,0.00000,0.00000,720.00,720.00,0.0,...,0.0,0.0,161.00000,0.00,0.0,0.00,0.0,0,0.0,0.0
4,2024-08-01 00:05:00,AGLSOM,0,0,0,19.82500,0.00000,360.00,360.00,0.0,...,0.0,0.0,40.00000,0.00,0.0,0.00,0.0,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4430934,2025-06-30 23:55:00,YENDWF1,0,0,0,10.55000,10.31176,1680.00,1680.00,0.0,...,0.0,0.0,10.31176,0.00,0.0,0.00,0.0,0,0.0,0.0
4430935,2025-06-30 23:55:00,YWPS1,0,0,1,348.50000,350.00000,180.00,180.00,0.0,...,0.0,0.0,350.00000,350.75,250.0,350.75,250.0,0,0.0,0.0
4430936,2025-06-30 23:55:00,YWPS2,0,0,1,363.00000,365.00000,180.00,180.00,0.0,...,0.0,0.0,365.00000,365.00,250.0,365.00,250.0,0,0.0,0.0
4430937,2025-06-30 23:55:00,YWPS3,0,0,0,1.12500,0.00000,180.00,180.00,0.0,...,0.0,0.0,0.00000,385.00,250.0,385.00,250.0,0,0.0,0.0


### Process data

In [8]:
def load_dispatch(year, month, filepath):
    """
    Load DISPATCHLOAD data and tidy it
    """
    df = pd.read_csv(
        filepath + "/PUBLIC_DVD_DISPATCHLOAD_" + year + month + "010000.CSV",
        sep=",",
        skiprows=1,
        # nrows=10000,
        # index_col=0,
        parse_dates=True,
        usecols=["SETTLEMENTDATE", "DUID", "INITIALMW"]
    )
    df = df.dropna()
    df["SETTLEMENTDATE"] = pd.to_datetime(df["SETTLEMENTDATE"]) - pd.Timedelta(minutes=5)
    
    return df

In [9]:
def merge_dfs(load_df, generation_df):
    """
    Merge dispatch load and generation info dataframes
    """
    return pd.merge(
        load_df,
        generation_df[["DUID", "Station Name", 'Fuel Source - Primary', 'Fuel Source - Descriptor', 'Region']],
        how="left",
        on=["DUID"]
    )

In [10]:
def hourly_fuel(fuel_type, dispatch_df):
    """
    Aggregate 5-minute df data to hourly
    """
    # Select data for desired fuel
    dispatch = dispatch_df.loc[(dispatch_df['Fuel Source - Primary'] == fuel_type)]
    
    # Group by State and date and sum MW
    dispatch_sum = dispatch.groupby(["Region", "SETTLEMENTDATE"])["INITIALMW"].sum()
    
    # Sum to hourly
    level_values = dispatch_sum.index.get_level_values
    hourly = dispatch_sum.groupby(
        [level_values(0)] + [pd.Grouper(freq='h', level=-1)]
    ).sum()
    
    # Divide by 12 to get MWh
    hourly_mwh = hourly / 12
    
    return hourly_mwh

In [11]:
# years = [2021, 2022, 2023]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

In [12]:
years = [2024, 2025]
# months = ["12"]

In [13]:
for year in years:
    y = str(year)
    print(y)
    
    for month in months:
        print(month)
        
        df = load_dispatch(y, month, raw_NEM_cache)
        
        dispatch = merge_dfs(df, NEM_gen)
        
        wind_hourly_mwh = hourly_fuel("Wind", dispatch)
        solar_hourly_mwh = hourly_fuel("Solar", dispatch)
        
        wind_hourly_mwh.to_csv(
            "/scratch/w42/dr6273/processed_AEMO/aemo_hourly_wind_" + y + "_" + month + ".csv"
        )
        solar_hourly_mwh.to_csv(
            "/scratch/w42/dr6273/processed_AEMO/aemo_hourly_solar_" + y + "_" + month + ".csv"
        )

2024
01
02


FileNotFoundError: [Errno 2] No such file or directory: '/scratch/w42/dr6273/raw_AEMO_cache/PUBLIC_DVD_DISPATCHLOAD_202402010000.CSV'