## UNSEEN via seasonal mean relationships ##

Trying something slightly different. On a seasonal (e.g. ONDJFM) mean timescale, there are likely to be fairly strong relationships between wind speed and wind power generation, temperature and demand, and irradiance and solar power generation. It therefore stands to reason that a combination of these variables, used with a multi linear regression, could project onto the seasonal mean demand net wind (or demand net renewables). Where we have an observed (hopefully linear) relationship between the surface variables and demand net wind (or demand net renewables), we can extrapolate this so that "worse" (i.e. lower wind speeds, colder temperatures, more dull conditions) conditions produced by DePreSys can map onto worse impacts for demand net wind (or demand net renewables). 

Steps for this:

1. Load in the CLEARHEADS dataset and process into demand net wind (+ demand net renewables), along with the surface variables for the different countries (maybe just UK first).
2. Create scatter plots of the relationship between seasonal mean variables (e.g. 10m wind speed/100m wind speed) and the energy system variable (e.g. wind power generation).
3. Create scatter plots for how the individual seasonal mean variables map onto demand net wind/demand net renewables.
4. Create a multi linear regression for mapping the combination of individual surface variables onto demand net wind/demand net renewables.

In [1]:
# Import local modules
import os
import sys
import glob
import time

# Import third-party modules
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt

# Specific imports
from tqdm import tqdm

In [2]:
# Set up the directory where the clearhedas data are stored
data_dir = '/home/users/benhutch/CLEARHEADS_EU_Power_Data'

# List the clearheads data files
files = os.listdir(data_dir)

files

['EEZ_zones_wp_historical.nc',
 'NUTS_0_CDD_historical_pop_weighted.nc',
 'NUTS_0_HDD_historical_pop_weighted.nc',
 'NUTS_0_sp_historical.nc',
 'NUTS_0_sp_historical_loc_weighted.nc',
 'NUTS_0_speed100m_historical.nc',
 'NUTS_0_speed10m_historical.nc',
 'NUTS_0_speed10m_historical_pop_weighted.nc',
 'NUTS_0_ssrd_historical.nc',
 'NUTS_0_ssrd_historical_pop_weighted.nc',
 'NUTS_0_t2m_detrended_timeseries_historical.nc',
 'NUTS_0_t2m_detrended_timeseries_historical_pop_weighted.nc',
 'NUTS_0_wp_ofs_sim_0_historical_loc_weighted.nc',
 'NUTS_0_wp_ofs_sim_1_historical_loc_weighted.nc',
 'NUTS_0_wp_ons_sim_0_historical_loc_weighted.nc',
 'NUTS_0_wp_ons_sim_1_historical_loc_weighted.nc',
 'NUTS_1_sp_historical.nc',
 'NUTS_1_speed100m_historical.nc',
 'NUTS_1_speed10m_historical.nc',
 'NUTS_1_ssrd_historical.nc',
 'NUTS_1_t2m_detrended_timeseries_historical.nc',
 'NUTS_1_wp_historical.nc',
 'NUTS_2_sp_historical.nc',
 'NUTS_2_speed100m_historical.nc',
 'NUTS_2_speed10m_historical.nc',
 'NUTS_2

In [3]:
# Create a list of the files to extract
files_to_extract = [
    "NUTS_0_sp_historical.nc",
    "NUTS_0_speed100m_historical.nc",
    "NUTS_0_speed10m_historical.nc",
    "NUTS_0_ssrd_historical.nc",
    "NUTS_0_wp_ofs_sim_0_historical_loc_weighted.nc",
    "NUTS_0_wp_ons_sim_0_historical_loc_weighted.nc",
]

# Create a list with the names
names = [
    "solar_power",
    "100m_wind_speed",
    "10m_wind_speed",
    "solar_irradiance",
    "ofs_cfs",
    "ons_cfs",
]

In [4]:
# Set up an empty dataframe to store the data
df_full = pd.DataFrame()

# Loop over the files to extract
for file, name in tqdm(zip(files_to_extract, names)):
    # Load the data
    file_path = os.path.join(data_dir, file)

    # assert that the file exists
    assert os.path.exists(file_path), f"File {file_path} does not exist"

    # Load the data
    ds = xr.open_dataset(file_path)

    # # print ds.variables
    # print(ds.variables)

    # assert that NUTS_keys is in the variables
    assert "NUTS_keys" in ds.variables, f"Variable NUTS_keys not found in {file_path}"

    # extract the nuts keys
    nuts_keys = ds["NUTS_keys"].values

    # turn the data into a dataframe
    df = ds.to_dataframe()

    # Pivot the dataframe
    df_pivot = df.reset_index().pivot(
        index="time_in_hours_from_first_jan_1950",
        columns="NUTS",
        values="timeseries_data",
    )

    # set the columns as the nuts keys
    df_pivot.columns = nuts_keys

    # Convert 'time_in_hours_from_first_jan_1950' column to datetime
    df_pivot.index = pd.to_datetime(df_pivot.index, unit="h", origin="1950-01-01")

    # constrain to the "UK" column
    df_pivot = df_pivot["UK"]

    # if this is the first iteration, set the index
    if df_full.empty:
        df_full = df_pivot.to_frame(name=name)
    else:
        df_full[name] = df_pivot

    # print the head of the dataframe
    print(df_full.head())

    # sys.exit()

0it [00:00, ?it/s]

1it [00:06,  6.85s/it]

                                   solar_power
time_in_hours_from_first_jan_1950             
1950-01-01 00:00:00                        0.0
1950-01-01 01:00:00                        0.0
1950-01-01 02:00:00                        0.0
1950-01-01 03:00:00                        0.0
1950-01-01 04:00:00                        0.0


2it [00:13,  6.89s/it]

                                   solar_power  100m_wind_speed
time_in_hours_from_first_jan_1950                              
1950-01-01 00:00:00                        0.0         6.148996
1950-01-01 01:00:00                        0.0         6.156016
1950-01-01 02:00:00                        0.0         6.138777
1950-01-01 03:00:00                        0.0         6.390699
1950-01-01 04:00:00                        0.0         6.461800


3it [00:22,  7.61s/it]

                                   solar_power  100m_wind_speed  \
time_in_hours_from_first_jan_1950                                 
1950-01-01 00:00:00                        0.0         6.148996   
1950-01-01 01:00:00                        0.0         6.156016   
1950-01-01 02:00:00                        0.0         6.138777   
1950-01-01 03:00:00                        0.0         6.390699   
1950-01-01 04:00:00                        0.0         6.461800   

                                   10m_wind_speed  
time_in_hours_from_first_jan_1950                  
1950-01-01 00:00:00                      3.976531  
1950-01-01 01:00:00                      3.948778  
1950-01-01 02:00:00                      3.957120  
1950-01-01 03:00:00                      4.150732  
1950-01-01 04:00:00                      4.243430  


4it [00:29,  7.67s/it]

                                   solar_power  100m_wind_speed  \
time_in_hours_from_first_jan_1950                                 
1950-01-01 00:00:00                        0.0         6.148996   
1950-01-01 01:00:00                        0.0         6.156016   
1950-01-01 02:00:00                        0.0         6.138777   
1950-01-01 03:00:00                        0.0         6.390699   
1950-01-01 04:00:00                        0.0         6.461800   

                                   10m_wind_speed  solar_irradiance  
time_in_hours_from_first_jan_1950                                    
1950-01-01 00:00:00                      3.976531               0.0  
1950-01-01 01:00:00                      3.948778               0.0  
1950-01-01 02:00:00                      3.957120               0.0  
1950-01-01 03:00:00                      4.150732               0.0  
1950-01-01 04:00:00                      4.243430               0.0  


5it [00:32,  5.80s/it]

                                   solar_power  100m_wind_speed  \
time_in_hours_from_first_jan_1950                                 
1950-01-01 00:00:00                        0.0         6.148996   
1950-01-01 01:00:00                        0.0         6.156016   
1950-01-01 02:00:00                        0.0         6.138777   
1950-01-01 03:00:00                        0.0         6.390699   
1950-01-01 04:00:00                        0.0         6.461800   

                                   10m_wind_speed  solar_irradiance   ofs_cfs  
time_in_hours_from_first_jan_1950                                              
1950-01-01 00:00:00                      3.976531               0.0  0.126081  
1950-01-01 01:00:00                      3.948778               0.0  0.132902  
1950-01-01 02:00:00                      3.957120               0.0  0.149275  
1950-01-01 03:00:00                      4.150732               0.0  0.163094  
1950-01-01 04:00:00                      4.243430 

6it [00:34,  5.81s/it]

                                   solar_power  100m_wind_speed  \
time_in_hours_from_first_jan_1950                                 
1950-01-01 00:00:00                        0.0         6.148996   
1950-01-01 01:00:00                        0.0         6.156016   
1950-01-01 02:00:00                        0.0         6.138777   
1950-01-01 03:00:00                        0.0         6.390699   
1950-01-01 04:00:00                        0.0         6.461800   

                                   10m_wind_speed  solar_irradiance   ofs_cfs  \
time_in_hours_from_first_jan_1950                                               
1950-01-01 00:00:00                      3.976531               0.0  0.126081   
1950-01-01 01:00:00                      3.948778               0.0  0.132902   
1950-01-01 02:00:00                      3.957120               0.0  0.149275   
1950-01-01 03:00:00                      4.150732               0.0  0.163094   
1950-01-01 04:00:00                      4.2




In [5]:
df_full.head()

Unnamed: 0_level_0,solar_power,100m_wind_speed,10m_wind_speed,solar_irradiance,ofs_cfs,ons_cfs
time_in_hours_from_first_jan_1950,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1950-01-01 00:00:00,0.0,6.148996,3.976531,0.0,0.126081,0.150885
1950-01-01 01:00:00,0.0,6.156016,3.948778,0.0,0.132902,0.169805
1950-01-01 02:00:00,0.0,6.138777,3.95712,0.0,0.149275,0.19589
1950-01-01 03:00:00,0.0,6.390699,4.150732,0.0,0.163094,0.235261
1950-01-01 04:00:00,0.0,6.4618,4.24343,0.0,0.160107,0.261474


In [6]:
# import the temperature data with different levels of detrending
file_path = os.path.join(data_dir, "NUTS_0_t2m_detrended_timeseries_historical.nc")

# assert that the file exists
assert os.path.exists(file_path), f"File {file_path} does not exist"

# Load the data
ds = xr.open_dataset(file_path)

# # print ds.variables
# print(ds.variables)

# assert that NUTS_keys is in the variables
assert "NUTS_keys" in ds.variables, f"Variable NUTS_keys not found in {file_path}"

# extract the nuts keys
nuts_keys = ds["NUTS_keys"].values

# trend levle = [1950., 1980., 2010., 2020.,    0.]

# Select the level of detrending
# print the trend_levels variable
print(ds["trend_levels"])

trend_level = 0

# select 0 detrend
# extract the trend levels
trend_levels = ds.trend_levels.values

# Find the index of the trend level
idx = np.where(trend_levels == trend_level)[0][0]

# print the index
print(idx)

# Extract the data
ds = ds.isel(trend=idx)

# turn the data into a dataframe
df = ds.to_dataframe()

# Pivot the dataframe
df_pivot = df.reset_index().pivot(
    index="time_in_hours_from_first_jan_1950",
    columns="NUTS",
    values="detrended_data",
)

# set the columns as the nuts keys
df_pivot.columns = nuts_keys

# Convert 'time_in_hours_from_first_jan_1950' column to datetime
df_pivot.index = pd.to_datetime(df_pivot.index, unit="h", origin="1950-01-01")

# constrain to the "UK" column
df_pivot = df_pivot["UK"]

# if this is the first iteration, set the index
if df_full.empty:
    df_full = df_pivot.to_frame(name="t2m_no_detrend")
else:
    df_full["t2m_no_detrend"] = df_pivot

<xarray.DataArray 'trend_levels' (trend: 5)>
[5 values with dtype=float32]
Dimensions without coordinates: trend
4


In [7]:
# import the temperature data with different levels of detrending
file_path = os.path.join(data_dir, "NUTS_0_t2m_detrended_timeseries_historical.nc")

# assert that the file exists
assert os.path.exists(file_path), f"File {file_path} does not exist"

# Load the data
ds = xr.open_dataset(file_path)

# # print ds.variables
# print(ds.variables)

# assert that NUTS_keys is in the variables
assert "NUTS_keys" in ds.variables, f"Variable NUTS_keys not found in {file_path}"

# extract the nuts keys
nuts_keys = ds["NUTS_keys"].values

# trend levle = [1950., 1980., 2010., 2020.,    0.]

# Select the level of detrending
# print the trend_levels variable
print(ds["trend_levels"])

trend_level = 2020

# select 0 detrend
# extract the trend levels
trend_levels = ds.trend_levels.values

# Find the index of the trend level
idx = np.where(trend_levels == trend_level)[0][0]

# print the index
print(idx)

# Extract the data
ds = ds.isel(trend=idx)

# turn the data into a dataframe
df = ds.to_dataframe()

# Pivot the dataframe
df_pivot = df.reset_index().pivot(
    index="time_in_hours_from_first_jan_1950",
    columns="NUTS",
    values="detrended_data",
)

# set the columns as the nuts keys
df_pivot.columns = nuts_keys

# Convert 'time_in_hours_from_first_jan_1950' column to datetime
df_pivot.index = pd.to_datetime(df_pivot.index, unit="h", origin="1950-01-01")

# constrain to the "UK" column
df_pivot = df_pivot["UK"]

# if this is the first iteration, set the index
if df_full.empty:
    df_full = df_pivot.to_frame(name="t2m_2020_detrend")
else:
    df_full["t2m_2020_detrend"] = df_pivot

<xarray.DataArray 'trend_levels' (trend: 5)>
[5 values with dtype=float32]
Dimensions without coordinates: trend
3


In [8]:
df_full.head()

Unnamed: 0_level_0,solar_power,100m_wind_speed,10m_wind_speed,solar_irradiance,ofs_cfs,ons_cfs,t2m_no_detrend,t2m_2020_detrend
time_in_hours_from_first_jan_1950,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1950-01-01 00:00:00,0.0,6.148996,3.976531,0.0,0.126081,0.150885,4.42869,5.895898
1950-01-01 01:00:00,0.0,6.156016,3.948778,0.0,0.132902,0.169805,4.583863,6.051068
1950-01-01 02:00:00,0.0,6.138777,3.95712,0.0,0.149275,0.19589,4.598047,6.06525
1950-01-01 03:00:00,0.0,6.390699,4.150732,0.0,0.163094,0.235261,4.460886,5.928087
1950-01-01 04:00:00,0.0,6.4618,4.24343,0.0,0.160107,0.261474,4.640312,6.10751
