# Power output for REZ for different energy scenarios

In [24]:
from dask.distributed import Client,LocalCluster
from dask_jobqueue import PBSCluster

In [36]:
# One node on Gadi has 48 cores - try and use up a full node before going to multiple nodes (jobs)

walltime = "00:10:00"
cores = 48
memory = str(4 * cores) + "GB"

cluster = PBSCluster(walltime=str(walltime), cores=cores, memory=str(memory), processes=cores,
                     job_extra_directives=["-q normal",
                                           "-P w42",
                                           "-l ncpus="+str(cores),
                                           "-l mem="+str(memory),
                                           "-l storage=gdata/w42+gdata/rt52"],
                     local_directory="$TMPDIR",
                     job_directives_skip=["select"])

In [37]:
cluster.scale(jobs=3)
client = Client(cluster)

In [38]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.6.121.1:34049,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [5]:
import xarray as xr
import pandas as pd

In [6]:
%cd /g/data/w42/dr6273/work/power_models
import functions as fn

/g/data/w42/dr6273/work/power_models


In [7]:
%load_ext autoreload
%autoreload 2

# Load REZ data

#### REZ mask

In [8]:
mask = xr.open_dataset('/g/data/w42/dr6273/work/projects/Aus_energy/data/rez_2024_mask_era5_grid.nc').REZ

#### REZ generation

In [9]:
_gen = pd.read_csv("/g/data/w42/dr6273/work/data/REZ/2024/REZ_potential.csv", index_col=0)

In [10]:
# Tidy column names
_gen.columns = [i.replace("90", "9-") for i in _gen.columns]
_gen.columns = [i.replace(" ", "_") for i in _gen.columns]

In [11]:
# Sum scenarios with existing capacity
gen = _gen.copy()
for col in _gen.columns[2:]:
    if col[:3] == "Exi":
        pass
    else:
        gen_type = col.split("_")[-2]
        gen[col] = _gen[col] + _gen["Existing_" + gen_type]

In [12]:
gen.head()

Unnamed: 0_level_0,Solar_renewable_potential_(MW),Wind_renewable_potential_(MW),Existing_solar,Progressive_solar_2029-30,Progressive_solar_2039-40,Progressive_solar_2049-50,Step_change_solar_2029-30,Step_change_solar_2039-40,Step_change_solar_2049-50,Green_energy_solar_2029-30,...,Existing_wind,Progressive_wind_2029-30,Progressive_wind_2039-40,Progressive_wind_2049-50,Step_change_wind_2029-30,Step_change_wind_2039-40,Step_change_wind_2049-50,Green_energy_wind_2029-30,Green_energy_wind_2039-40,Green_energy_wind_2049-50
REZ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
N1,6385,0,166,216,216,266,166,216,266,216,...,0,0,0,0,0,0,0,0,0,-300
N2,2950,7400,855,905,1355,3855,855,3255,6255,3155,...,442,3492,4042,7842,3442,7842,7842,4992,9142,9592
N3,6850,3000,1497,3197,3447,7597,2947,5897,8347,4447,...,673,4373,4373,4873,6123,6123,8473,7373,7473,8173
N4,8000,5100,53,253,253,403,203,203,453,253,...,198,298,298,298,298,298,348,348,348,348
N5,2256,3900,1122,1822,1822,3372,2272,2272,3372,1922,...,0,450,450,450,600,600,1000,1000,1000,1000


### Pre-compute capacity factor averages:

In [13]:
def calculate_mean_cf(years, gen_type, method="van_der_Wiel", chunks=None):
    """
    Return mean capacity factor over all years
    
    gen_type: str, 'wind' or 'solar'
    method: str, method used to compute capacity factor, e.g. 'van_der_Wiel'
    chunks: dict, how to chunk. Default is None
    """
    da_list = []
    for year in years:
        cf = fn.load_hourly_cf(year, gen_type, method=method, chunks={"lat": 75, "lon": 100, "time": -1}).capacity_factor
        cf = cf.mean("time").expand_dims({"time": [year]})
        da_list.append(cf)
    cf = xr.concat(da_list, dim="time")
    return cf.mean("time")

In [14]:
fp = "/g/data/w42/dr6273/work/projects/Aus_energy/production_metrics/"

In [15]:
years = range(1940, 2024)

In [16]:
# for gen_type in ["wind", "solar"]:
#     mean_cf = calculate_mean_cf(years, gen_type)
#     mean_cf.to_dataset(name="capacity_factor").to_netcdf(
#         fp + gen_type + "_mean_capacity_factor_van_der_Wiel_era5_hourly_" + str(years[0]) + "-" + str(years[-1]) + ".nc"
#     )

# Compute generation

In [16]:
def get_even_capacity_mask(mask_da, generation):
    """
    Return DataArray with mask of capacity divided evenly across grid cells
    
    mask_da: array of REZ mask
    generation: pandas Series of capacity values for each region (index).
    """
    da_list = []
    for r in generation.index:
        capacity = generation.loc[r]
        n_cells = mask_da.sel(region=r).sum().values
        
        da = mask_da.sel(region=r).where(
            mask_da.sel(region=r) == 0,
            capacity / n_cells
        ).expand_dims({"REZ": [r]})
        
        da_list.append(da)
        
    return xr.concat(da_list, dim="REZ")

In [17]:
def get_cf_scaled_capacity_mask(cf_mean, mask_da, generation):
    """
    Return DataArray with mask of capacity divided according to weights from average capacity factor
    
    cf_mean: array of capacity factor weights
    mask_da: array of REZ mask
    generation: pandas Series of capacity values for each region (index).
    """
    da_list = []
    for r in generation.index:
        capacity = generation.loc[r]
        
        cf_region = cf_mean.where(
            mask_da.sel(region=r) == 1,
            drop=True
        )
        weights = cf_region / cf_region.sum()
        
        tolerance = 0.01
        if 1 - weights.sum().values > tolerance:
            print(weights.sum().values)
            raise ValueError("Weights don't sum to one.")
            
        da = (weights * capacity).expand_dims({"REZ": [r]})
        
        da_list.append(da)
        
    return xr.concat(da_list, dim="REZ")

In [18]:
def calc_generation(capacity_factor, generation_capacity):
    """
    Return DataArray with time series of power for each REZ
    
    capacity_factor: DataArray of capacity factors
    generation_capacity: DataArray of each grid cells generation capacity
    """
    da_list = []
    for r in generation_capacity.REZ.values:
        r_mask = generation_capacity.sel(REZ=r).where(
            generation_capacity.sel(REZ=r) > 0, drop=True
        )
        da = capacity_factor * r_mask
        da = da.sum(["lat", "lon"])
        da = da.expand_dims({"REZ": [r]})

        da_list.append(da)

    return xr.concat(da_list, dim="REZ")

In [19]:
def calc_all_years_generation(years, mask_da, generation, scale_mask="capacity_factor", method="van_der_Wiel"):
    """
    Compute power time series for each REZ and each year.
    
    years: range
    facilities: dict of facilities data in dataframe
    method: str, which method of capacity factors was used
    scale_mask: str, 'capacity_factor' to scale by mean capacity factors,
        'even' for an even distribution of generation amongst REZ cells,
        'none' to use mask_da directly.
    """
    def _get_gen_type(gen):
        g_split = gen.name.split("_")
        if g_split[0] == "Solar":
            gt = "solar"
        elif g_split[0] == "Wind":
            gt = "wind"
        elif g_split[0] == "Existing":
            gt = g_split[1]
        else:
            gt = g_split[-2]
        return gt
    
    gen_type = _get_gen_type(generation)
    
    if scale_mask == "capacity_factor": # Use mean capacity factor weights to split the generation capacities by
        mean_cf = xr.open_dataset(
            "/g/data/w42/dr6273/work/projects/Aus_energy/production_metrics/" + \
            gen_type + "_mean_capacity_factor_van_der_Wiel_era5_hourly_" + str(years[0]) + "-" + str(years[-1]) + ".nc"
        )["capacity_factor"]
        
        gen_capacity = get_cf_scaled_capacity_mask(mean_cf, mask_da, generation)
        
    elif scale_mask == "even": # Divide the generation capacity by the size of the region and assign each cell that number
        gen_capacity = get_even_capacity_mask(mask_da, generation)
        
    else: # Just use mask_da as the generation capacities for each cell
        gen_capacity = mask_da.copy()
    
    da_list = []
    for year in years:
        cf = fn.load_hourly_cf(year, gen_type, method=method, chunks={"lat": -1, "lon": -1, "time": 2500}).capacity_factor
        p = calc_generation(cf, gen_capacity)
        da_list.append(p)
    REZ_power = xr.concat(da_list, dim="time")
    REZ_power = REZ_power.chunk({"REZ": -1, "time": -1})
    
    return REZ_power

In [20]:
# cf = fn.load_hourly_cf(1940, "wind", method="van_der_Wiel", chunks={"lat": -1, "lon": -1, "time": 2500}).capacity_factor

In [23]:
gen.columns[14:]

Index(['Progressive_wind_2039-40', 'Progressive_wind_2049-50',
       'Step_change_wind_2029-30', 'Step_change_wind_2039-40',
       'Step_change_wind_2049-50', 'Green_energy_wind_2029-30',
       'Green_energy_wind_2039-40', 'Green_energy_wind_2049-50'],
      dtype='object')

In [24]:
%%time
for scenario in gen.columns[14:]:
# for scenario in ["Existing_wind", "Existing_solar"]:
    print(scenario)
    gen_ds = calc_all_years_generation(years, mask, gen[scenario])
    gen_ds = gen_ds.to_dataset(name="power")
    
    # Tidy path to write
    if scenario.split("_")[-1] == "(MW)":
        scenario = scenario[:-5]
    scenario = scenario.lower()
    
    gen_ds.to_netcdf(
        fp + "REZ_power_" + scenario + "_van_der_Wiel" + "_era5_hourly_" + str(years[0]) + "-" + str(years[-1]) + ".nc"
    )

Progressive_wind_2039-40
Progressive_wind_2049-50
Step_change_wind_2029-30
Step_change_wind_2039-40
Step_change_wind_2049-50
Green_energy_wind_2029-30
Green_energy_wind_2039-40
Green_energy_wind_2049-50
CPU times: user 18min 15s, sys: 52.3 s, total: 19min 7s
Wall time: 19min 24s


In [22]:
%%time
# for scenario in gen.columns[14:]:
for scenario in ["Existing_wind", "Existing_solar"]:
    print(scenario)
    gen_ds = calc_all_years_generation(years, mask, gen[scenario], scale_mask="even")
    gen_ds = gen_ds.to_dataset(name="power")
    
    # Tidy path to write
    if scenario.split("_")[-1] == "(MW)":
        scenario = scenario[:-5]
    scenario = scenario.lower()
    
    gen_ds.to_netcdf(
        fp + "REZ_power_even_weights_" + scenario + "_van_der_Wiel" + "_era5_hourly_" + str(years[0]) + "-" + str(years[-1]) + ".nc"
    )

Existing_wind
Existing_solar
CPU times: user 3min 50s, sys: 12.2 s, total: 4min 2s
Wall time: 4min 29s


### Repeat but for existing facilities in sites outside REZs

In [39]:
solar_outside = xr.open_mfdataset(
    "/g/data/w42/dr6273/work/projects/Aus_energy/production_metrics/solar/power/solar_site_outside_REZ_mask_era5.nc",
).solar_site
# Change dimension name so it works with function
solar_outside = solar_outside.rename({"region": "REZ"}).compute()

In [40]:
wind_outside = xr.open_mfdataset(
    "/g/data/w42/dr6273/work/projects/Aus_energy/production_metrics/wind/power/wind_site_outside_REZ_mask_era5.nc",
).wind_site
wind_outside = wind_outside.rename({"region": "REZ"}).compute()

In [42]:
for scenario, m in zip(['Existing_solar', 'Existing_wind'], [solar_outside, wind_outside]):
    print(scenario)
    gen_ds = calc_all_years_generation(years, m, gen[scenario], scale_mask="none")
    gen_ds = gen_ds.to_dataset(name="power")
    
    # Tidy path to write
    if scenario.split("_")[-1] == "(MW)":
        scenario = scenario[:-5]
    scenario = scenario.lower()
    
    gen_ds = gen_ds.rename({"REZ": "region"}) # Change dimension name back
    
    gen_ds.to_netcdf(
        fp + "site_outside_REZ_power_" + scenario + "_van_der_Wiel" + "_era5_hourly_" + str(years[0]) + "-" + str(years[-1]) + ".nc",
        mode="w"
    )

Existing_solar
Existing_wind


# Close cluster

In [43]:
client.close()
cluster.close()