In [15]:
import xarray as xr
from pathlib import Path
import sys
from tqdm import tqdm
import os
import pandas as pd
import numpy as np
DIRPATH= str(Path.cwd().parents[1])

module_path = DIRPATH + "/src"
if module_path not in sys.path:
    sys.path.append(module_path)
from utils.eem2020 import load_NWP, load_wind_power, load_max_power, load_turbine_map, load_wind_power_TS, load_wind_turbines

In [2]:
# Creat netcdf file for task1: 2001/01/01 - 2001/02/28
NWP_data = load_NWP(start="20010101", end="20010228")
NWP_data.to_netcdf(DIRPATH + "/data/eem20/processed/EEM2020_task1.nc")

In [3]:
# Creat netcdf file for task2: 2001/03/01 - 2001/04/30
NWP_data = load_NWP(start="20010301", end="20010430")
NWP_data.to_netcdf(DIRPATH + "/data/eem20/processed/EEM2020_task2.nc")

In [4]:
# Creat netcdf file for the 2000 year: 2000/01/01 - 2000/12/31
NWP_data = load_NWP(start="20000101", end="20001231")
NWP_data.to_netcdf(DIRPATH + "/data/eem20/processed/EEM2020_2000.nc")

In [5]:
# Creat netcdf file for 2000 till task3: 2000/01/01 - 2001/06/30
NWP_data = NWP_data.combine_first(xr.open_dataset(DIRPATH + "/data/eem20/processed/EEM2020_task1.nc"))
NWP_data = NWP_data.combine_first(xr.open_dataset(DIRPATH + "/data/eem20/processed/EEM2020_task2.nc"))
NWP_data = NWP_data.combine_first(load_NWP(start="20010501", end="20010630"))
NWP_data.to_netcdf(DIRPATH + "/data/eem20/processed/EEM2020_tilltask3.nc")

In [6]:
# Creat netcdf file for 2000 till task4: 2000/01/01 - 2001/08/31
NWP_data = NWP_data.combine_first(load_NWP(start="20010701", end="20010831"))
NWP_data.to_netcdf(DIRPATH + "/data/eem20/processed/EEM2020_tilltask4.nc")

In [7]:
# Creat netcdf file for 2000 till task5: 2000/01/01 - 2001/10/31
NWP_data = NWP_data.combine_first(load_NWP(start="20010901", end="20011031"))
NWP_data.to_netcdf(DIRPATH + "/data/eem20/processed/EEM2020_tilltask5.nc")

In [8]:
# Creat netcdf file for 2000 till task6: 2000/01/01 - 2001/12/31
NWP_data = NWP_data.combine_first(load_NWP(start="20011101", end="20011231"))
NWP_data.to_netcdf(DIRPATH + "/data/eem20/processed/EEM2020_tilltask6.nc")

#### Creating uncertainty features

In [12]:
start, end = "20000101", "20001231"
files= list(pd.date_range(start=start, end=end, freq="D").strftime("%Y%m%d") + "T00Z.nc")
dirname = DIRPATH + "/data/eem20/raw"

ds_NWP_mean = xr.open_dataset(os.path.join(dirname,files.pop(0)))
ds_NWP_mean = ds_NWP_mean.mean(dim="ensemble_member")

for day in tqdm(files):
    if os.path.isfile(os.path.join(dirname,day)):
        temp_ds = xr.open_dataset(os.path.join(dirname,day))
        temp_ds = temp_ds.mean(dim="ensemble_member")
        #temp_ds = temp_ds.drop_vars("CloudCover").isel(y=slice(60,120), x=slice(11,71))
        ds_NWP_mean = ds_NWP_mean.combine_first(temp_ds)

100%|██████████| 365/365 [23:51<00:00,  3.92s/it]


In [13]:
ds_NWP_mean

In [16]:
ds_NWP_mean = ds_NWP_mean.assign(Wind=np.sqrt(ds_NWP_mean.Wind_U**2 + ds_NWP_mean.Wind_V**2))

In [17]:
ds_NWP_mean = ds_NWP_mean.assign(WindDensity= (ds_NWP_mean.Pressure/(287.05*ds_NWP_mean.Temperature))*(8/27)* (ds_NWP_mean.Wind**3) )

In [18]:
ds_NWP_mean

In [19]:
ds_NWP_day1 = xr.open_dataset(DIRPATH+"/data/eem20/raw/20000101T00Z.nc")
ds_NWP_day1 = ds_NWP_day1.drop_vars("CloudCover")

In [20]:
ds_NWP_mean_mean = ds_NWP_mean.mean()
ds_NWP_mean_std = ds_NWP_mean.std()

In [21]:
ds_NWP_day1_norm = (ds_NWP_day1 - ds_NWP_mean_mean)/ds_NWP_mean_std

In [22]:
ds_NWP_day1_norm_std = ds_NWP_day1_norm.std(dim="ensemble_member")

In [23]:
ds_NWP_day1_norm_std.mean(dim=["x","y"])

In [24]:
files= list(pd.date_range(start="20000101", end="20011231", freq="D").strftime("%Y%m%d") + "T00Z.nc")
dirname = DIRPATH + "/data/eem20/raw"

ds = xr.open_dataset(os.path.join(dirname,files.pop(0)))
ds = ds.drop_vars("CloudCover")
ds = ds.assign(Wind=np.sqrt(ds.Wind_U**2 + ds.Wind_V**2))
ds = ds.assign(WindDensity= (ds.Pressure/(287.05*ds.Temperature))*(8/27)* (ds.Wind**3) )
ds_SE1 = ds.isel(y=slice(92,156), x=slice(7,71))
ds_SE2 = ds.isel(y=slice(58,122), x=slice(7,71))
ds_SE3 = ds.isel(y=slice(14,78), x=slice(1,65))
ds_SE4 = ds.isel(y=slice(0,64), x=slice(0,64))

ds_SE1 = (ds_SE1 - ds_NWP_mean_mean)/ds_NWP_mean_std
ds_SE2 = (ds_SE2 - ds_NWP_mean_mean)/ds_NWP_mean_std
ds_SE3 = (ds_SE3 - ds_NWP_mean_mean)/ds_NWP_mean_std
ds_SE4 = (ds_SE4 - ds_NWP_mean_mean)/ds_NWP_mean_std

ds_SE1 = ds_SE1.std(dim="ensemble_member").mean(dim=["x","y"])
ds_SE2 = ds_SE2.std(dim="ensemble_member").mean(dim=["x","y"])
ds_SE3 = ds_SE3.std(dim="ensemble_member").mean(dim=["x","y"])
ds_SE4 = ds_SE4.std(dim="ensemble_member").mean(dim=["x","y"])

for day in tqdm(files):
    if os.path.isfile(os.path.join(dirname,day)):
        temp_ds = xr.open_dataset(os.path.join(dirname,day))
        temp_ds = temp_ds.drop_vars("CloudCover")
        temp_ds = temp_ds.assign(Wind=np.sqrt(temp_ds.Wind_U**2 + temp_ds.Wind_V**2))
        temp_ds = temp_ds.assign(WindDensity= (temp_ds.Pressure/(287.05*temp_ds.Temperature))*(8/27)* (temp_ds.Wind**3) )
        temp_ds_SE1 = temp_ds.isel(y=slice(92,156), x=slice(7,71))
        temp_ds_SE2 = temp_ds.isel(y=slice(58,122), x=slice(7,71))
        temp_ds_SE3 = temp_ds.isel(y=slice(14,78), x=slice(1,65))
        temp_ds_SE4 = temp_ds.isel(y=slice(0,64), x=slice(0,64))

        temp_ds_SE1 = (temp_ds_SE1 - ds_NWP_mean_mean)/ds_NWP_mean_std
        temp_ds_SE2 = (temp_ds_SE2 - ds_NWP_mean_mean)/ds_NWP_mean_std
        temp_ds_SE3 = (temp_ds_SE3 - ds_NWP_mean_mean)/ds_NWP_mean_std
        temp_ds_SE4 = (temp_ds_SE4 - ds_NWP_mean_mean)/ds_NWP_mean_std

        temp_ds_SE1 = temp_ds_SE1.std(dim="ensemble_member").mean(dim=["x","y"])
        temp_ds_SE2 = temp_ds_SE2.std(dim="ensemble_member").mean(dim=["x","y"])
        temp_ds_SE3 = temp_ds_SE3.std(dim="ensemble_member").mean(dim=["x","y"])
        temp_ds_SE4 = temp_ds_SE4.std(dim="ensemble_member").mean(dim=["x","y"])
        
        ds_SE1 = ds_SE1.combine_first(temp_ds_SE1)
        ds_SE2 = ds_SE2.combine_first(temp_ds_SE2)
        ds_SE3 = ds_SE3.combine_first(temp_ds_SE3)
        ds_SE4 = ds_SE4.combine_first(temp_ds_SE4)

100%|██████████| 730/730 [07:38<00:00,  1.59it/s]


In [25]:
ds_SE1.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_ensemble_std_SE1.nc",format="NETCDF4")
ds_SE2.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_ensemble_std_SE2.nc",format="NETCDF4")
ds_SE3.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_ensemble_std_SE3.nc",format="NETCDF4")
ds_SE4.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_ensemble_std_SE4.nc",format="NETCDF4")

#### Creating temparature features

In [26]:
ds_tillend = xr.open_dataset(DIRPATH + "/data/eem20/processed/EEM2020_tilltask6.nc")

In [27]:
ds_temp_SE1 = ds_tillend.Temperature.isel(y=slice(92,156), x=slice(7,71)).mean(dim=["x","y"])
ds_temp_SE2 = ds_tillend.Temperature.isel(y=slice(58,122), x=slice(7,71)).mean(dim=["x","y"])
ds_temp_SE3 = ds_tillend.Temperature.isel(y=slice(14,78), x=slice(1,65)).mean(dim=["x","y"])
ds_temp_SE4 = ds_tillend.Temperature.isel(y=slice(0,64), x=slice(0,64)).mean(dim=["x","y"])


In [28]:
ds_temp_SE1.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_mean_temp_SE1.nc",format="NETCDF4")
ds_temp_SE2.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_mean_temp_SE2.nc",format="NETCDF4")
ds_temp_SE3.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_mean_temp_SE3.nc",format="NETCDF4")
ds_temp_SE4.to_netcdf(DIRPATH+"/data/eem20/processed/NWP_mean_temp_SE4.nc",format="NETCDF4")