# Retrieve generation data from AEMO

In [10]:
import nemosis, pandas as pd, numpy
from nemosis import static_table, dynamic_data_compiler
from pathlib import Path

### Statis table of generator IDs

In [9]:
raw_NEM_cache = "/scratch/w42/dr6273/raw_AEMO_cache"

In [6]:
# Downloading the Generators and Scheduled Loads table from NEMOSIS
NEM_gen = static_table(
    table_name="Generators and Scheduled Loads", 
    raw_data_location=raw_NEM_cache,
    update_static_file=True
)
                              
#  Selecting only generating units, not loads or batteries
NEM_gen = NEM_gen.loc[NEM_gen['Dispatch Type'] == "Generating Unit"]

INFO: Retrieving static table Generators and Scheduled Loads
INFO: Downloading data for table Generators and Scheduled Loads


In [7]:
NEM_gen.head()

Unnamed: 0,Participant,Station Name,Region,Dispatch Type,Category,Classification,Fuel Source - Primary,Fuel Source - Descriptor,Technology Type - Primary,Technology Type - Descriptor,Aggregation,DUID,Reg Cap generation (MW),Max Cap generation (MW)
0,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Scheduled,Battery storage,Grid,Storage,Battery and Inverter,Y,ADPBA1G,7.76,6.15
2,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Non-Scheduled,Hydro,Water,Renewable,Run of River,Y,ADPMH1,1.44,1.0
3,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Semi-Scheduled,Solar,Solar,Renewable,Photovoltaic Tracking Flat panel,Y,ADPPV1,24.75,19.0
4,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Non-Scheduled,Solar,Solar,Renewable,Photovoltaic Flat panel,Y,ADPPV2,0.2,0.2
5,South Australian Water Corporation,Adelaide Desalination Plant,SA1,Generating Unit,Market,Non-Scheduled,Solar,Solar,Renewable,Photovoltaic Flat panel,Y,ADPPV3,0.02,0.02


### Generation data

In [246]:
def load_dispatch(year, month, filepath):
    """
    Load DISPATCHLOAD data and tidy it
    """
    df = pd.read_csv(
        filepath + "/PUBLIC_DVD_DISPATCHLOAD_" + year + month + "010000.CSV",
        sep=",",
        skiprows=1,
        # nrows=10000,
        # index_col=0,
        parse_dates=True,
        usecols=["SETTLEMENTDATE", "DUID", "INITIALMW"]
    )
    df = df.dropna()
    df["SETTLEMENTDATE"] = pd.to_datetime(df["SETTLEMENTDATE"]) - pd.Timedelta(minutes=5)
    
    return df

In [250]:
def merge_dfs(load_df, generation_df):
    """
    Merge dispatch load and generation info dataframes
    """
    return pd.merge(
        load_df,
        generation_df[["DUID", "Station Name", 'Fuel Source - Primary', 'Fuel Source - Descriptor', 'Region']],
        how="left",
        on=["DUID"]
    )

In [265]:
def hourly_fuel(fuel_type, dispatch_df):
    """
    Aggregate 5-minute df data to hourly
    """
    # Select data for desired fuel
    dispatch = dispatch_df.loc[(dispatch_df['Fuel Source - Primary'] == fuel_type)]
    
    # Group by State and date and sum MW
    dispatch_sum = dispatch.groupby(["Region", "SETTLEMENTDATE"])["INITIALMW"].sum()
    
    # Sum to hourly
    level_values = dispatch_sum.index.get_level_values
    hourly = dispatch_sum.groupby(
        [level_values(0)] + [pd.Grouper(freq='h', level=-1)]
    ).sum()
    
    # Divide by 12 to get MWh
    hourly_mwh = hourly / 12
    
    return hourly_mwh

In [266]:
years = [2023]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

In [268]:
for year in years:
    y = str(year)
    print(y)
    
    for month in months:
        print(month)
        
        df = load_dispatch(y, month, raw_NEM_cache)
        
        dispatch = merge_dfs(df, NEM_gen)
        
        wind_hourly_mwh = hourly_fuel("Wind", dispatch)
        solar_hourly_mwh = hourly_fuel("Solar", dispatch)
        
        wind_hourly_mwh.to_csv(
            "/scratch/w42/dr6273/processed_AEMO/aemo_hourly_wind_" + y + "_" + month + ".csv"
        )
        solar_hourly_mwh.to_csv(
            "/scratch/w42/dr6273/processed_AEMO/aemo_hourly_solar_" + y + "_" + month + ".csv"
        )

2023
01
02
03
04
05
06
07
08
09
10
11
12
