In [1]:
# Import the relevant modules
import os
import sys
import glob

# Import third party modules
import numpy as np
import xarray as xr
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import dictionaries_em as dicts

In [3]:
# Testing the NUTS shapefiles
# Load in the shapefile fo the eez data
NUTS_shapefile = gpd.read_file("~/shapefiles/NUTS/NUTS_RG_10M_2021_4326.shp")

In [4]:
NUTS_shapefile.head()

Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,FID,geometry
0,BG423,3,BG,Pazardzhik,Пазарджик,3.0,2,3,BG423,"POLYGON ((24.42101 42.55306, 24.41032 42.46950..."
1,BG424,3,BG,Smolyan,Смолян,3.0,3,3,BG424,"POLYGON ((25.07422 41.79348, 25.05851 41.75177..."
2,BG425,3,BG,Kardzhali,Кърджали,3.0,3,3,BG425,"POLYGON ((25.94863 41.32034, 25.90644 41.30757..."
3,CH011,3,CH,Vaud,Vaud,3.0,2,3,CH011,"MULTIPOLYGON (((6.86623 46.90929, 6.89621 46.9..."
4,CH012,3,CH,Valais,Valais,3.0,2,3,CH012,"POLYGON ((8.47767 46.52760, 8.39953 46.48872, ..."


In [5]:
# Restrict to level code 0
NUTS_shapefile = NUTS_shapefile[NUTS_shapefile.LEVL_CODE == 0]

In [6]:
NUTS_shapefile.head()

Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,FID,geometry
1901,AL,0,AL,Shqipëria,Shqipëria,0.0,0,0,AL,"POLYGON ((19.82698 42.46950, 19.83939 42.46950..."
1902,AT,0,AT,Österreich,Österreich,0.0,0,0,AT,"POLYGON ((15.54245 48.90770, 15.75363 48.85218..."
1903,BE,0,BE,Belgique/België,Belgique/België,0.0,0,0,BE,"POLYGON ((5.10218 51.42900, 5.08780 51.38230, ..."
1916,DK,0,DK,Danmark,Danmark,0.0,0,0,DK,"MULTIPOLYGON (((14.82540 55.25410, 14.94371 55..."
1920,DE,0,DE,Deutschland,Deutschland,0.0,0,0,DE,"MULTIPOLYGON (((9.42015 54.83196, 9.42293 54.8..."


In [7]:
# Restrict to only the countries with codes in the countries_nuts_id dictionarry
print(dicts.countries_nuts_id)

{'Austria': 'AT', 'Albania': 'AL', 'Belarus': 'BY', 'Belgium': 'BE', 'Bosnia and Herzegovina': 'BA', 'Bulgaria': 'BG', 'Croatia': 'HR', 'Czech Republic': 'CZ', 'Denmark': 'DK', 'Estonia': 'EE', 'Finland': 'FI', 'France': 'FR', 'Germany': 'DE', 'Greece': 'EL', 'Hungary': 'HU', 'Ireland': 'IE', 'Italy': 'IT', 'Kosovo': 'XK', 'Latvia': 'LV', 'Lithuania': 'LT', 'Luxembourg': 'LU', 'Macedonia': 'MK', 'Moldova': 'MD', 'Montenegro': 'ME', 'Netherlands': 'NL', 'Norway': 'NO', 'Poland': 'PL', 'Portugal': 'PT', 'Romania': 'RO', 'Serbia': 'RS', 'Slovakia': 'SK', 'Slovenia': 'SI', 'Spain': 'ES', 'Sweden': 'SE', 'Switzerland': 'CH', 'Turkey': 'TR', 'Ukraine': 'UA', 'United Kingdom': 'UK'}


In [8]:
# Extract the second element of the tuple
countries_codes = list(dicts.countries_nuts_id.values())

In [9]:
# Limit the gpd to the countries in the dictionary
NUTS_shapefile = NUTS_shapefile[NUTS_shapefile.NUTS_ID.isin(countries_codes)]

In [10]:
NUTS_shapefile.head()

Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,FID,geometry
1901,AL,0,AL,Shqipëria,Shqipëria,0.0,0,0,AL,"POLYGON ((19.82698 42.46950, 19.83939 42.46950..."
1902,AT,0,AT,Österreich,Österreich,0.0,0,0,AT,"POLYGON ((15.54245 48.90770, 15.75363 48.85218..."
1903,BE,0,BE,Belgique/België,Belgique/België,0.0,0,0,BE,"POLYGON ((5.10218 51.42900, 5.08780 51.38230, ..."
1916,DK,0,DK,Danmark,Danmark,0.0,0,0,DK,"MULTIPOLYGON (((14.82540 55.25410, 14.94371 55..."
1920,DE,0,DE,Deutschland,Deutschland,0.0,0,0,DE,"MULTIPOLYGON (((9.42015 54.83196, 9.42293 54.8..."


In [11]:
# Keep only the NUTS_ID, NUTS_NAME, and geometry columns
NUTS_shapefile = NUTS_shapefile[["NUTS_ID", "NUTS_NAME", "geometry"]]

In [12]:
NUTS_shapefile.head()

Unnamed: 0,NUTS_ID,NUTS_NAME,geometry
1901,AL,Shqipëria,"POLYGON ((19.82698 42.46950, 19.83939 42.46950..."
1902,AT,Österreich,"POLYGON ((15.54245 48.90770, 15.75363 48.85218..."
1903,BE,Belgique/België,"POLYGON ((5.10218 51.42900, 5.08780 51.38230, ..."
1916,DK,Danmark,"MULTIPOLYGON (((14.82540 55.25410, 14.94371 55..."
1920,DE,Deutschland,"MULTIPOLYGON (((9.42015 54.83196, 9.42293 54.8..."


### Loading model NAO data ###

Loading the lagged and var adjusted NAO data from the .csv file

In [None]:
# Define the file dire
nao_df_dir = "/gws/nopw/j04/canari/users/benhutch/nao_stats_df/"
nao_df_fname = "psl_ONDJFM_global_1961_2014_2-9_4_nao_default.csv"

# Set up the model config
model_config = {
    "variable": "psl",
    "season": "ONDJFM",
    "region": "global",
    "start_year": 1961,
    "end_year": 2014,
    "forecast_range": "2-9",
    "lag": 4,
    "nao": "nao_default",
    "gridbox": "North Europe Grid",
    "method": "nao_matched",
}

# # Load this in using pandas
# nao_df = pd.read_csv(nao_df_dir + nao_df_fname)

In [None]:
# # view the head of the df
# nao_df.head()

Load in the other NAO data

In [None]:
# # Load the data into chunks
# ds_era5 = xr.open_mfdataset(
#     dicts.regrid_file,
#     combine="by_coords",
#     parallel=True,
#     chunks={"time": "auto", "latitude": "auto", "longitude": "auto"},
# )[
#     "msl"
# ]  # for mean sea level pressure

# # Combine the first two expver variables
# obs_msl = ds_era5.sel(expver=1).combine_first(ds_era5.sel(expver=5))

In [None]:
# # Constrain obs to ONDJFM
# obs_msl = obs_msl.sel(time=obs_msl.time.dt.month.isin([10, 11, 12, 1, 2, 3]))

# # Shift the time index back by 3 months
# obs_msl_shifted = obs_msl.shift(time=-3)

# # Take annual means
# obs_msl_annual = obs_msl_shifted.resample(time="Y").mean()

# # Throw away years 1959, 2021, 2022 and 2023
# obs_msl_annual = obs_msl_annual.sel(time=slice("1960", "2019"))

# # Remove the climatology
# obs_msl_anomaly = obs_msl_annual - obs_msl_annual.mean(dim="time")

In [None]:
# # Extract the lats and lons of the azores
# lat1, lat2 = dicts.azores_grid_corrected["lat1"], dicts.azores_grid_corrected["lat2"]
# lon1, lon2 = dicts.azores_grid_corrected["lon1"], dicts.azores_grid_corrected["lon2"]

# # Calculate the mean for the azores gridbox
# obs_msl_anomaly_azores = obs_msl_anomaly.sel(
#     lat=slice(lat1, lat2), lon=slice(lon1, lon2)
# ).mean(dim=["lat", "lon"])

# # Same for iceland
# lat1, lat2 = dicts.iceland_grid_corrected["lat1"], dicts.iceland_grid_corrected["lat2"]
# lon1, lon2 = dicts.iceland_grid_corrected["lon1"], dicts.iceland_grid_corrected["lon2"]

# # Calculate the mean for the iceland gridbox
# obs_msl_anomaly_iceland = obs_msl_anomaly.sel(
#     lat=slice(lat1, lat2), lon=slice(lon1, lon2)
# ).mean(dim=["lat", "lon"])

# # Calculate the NAO index (azores - iceland)
# nao_index = obs_msl_anomaly_azores - obs_msl_anomaly_iceland

In [None]:
# # EXtract the time series
# nao_index_time = nao_index.time.values

# # Extract the values
# nao_index_values = nao_index.values

# # Create a dataframe
# nao_running = pd.DataFrame({"time": nao_index_time, "value": nao_index_values})

# # Take a centred 8-year running mean
# nao_running = nao_running.set_index("time").rolling(8, center=True).mean()

In [None]:
# nao_index_values

In [None]:
# # Drop the Nans
# nao_running = nao_running.dropna()

In [None]:
# nao_running.head()

In [None]:
# # split the time column into year and month
# # split by - and take the first element
# nao_running.index = nao_running.index.year

In [None]:
# nao_running.head()

In [None]:
# # align the two dataframes by the index of time for nao_running
# # and the valid time for nao_df
# nao_df.head()

In [None]:
# # Set the index of nao_df as the valid_time column
# nao_df = nao_df.set_index("valid_time")

In [None]:
# nao_df.head()

In [None]:
# # join the two dataframes
# nao_df = nao_df.join(nao_running)

In [None]:
# nao_df.head()

In [None]:
# # Set the column with name 'value' to obs_nao_pd
# nao_df = nao_df.rename(columns={"value": "obs_nao_pd"})

In [None]:
# nao_df.head()

In [None]:
# from scipy.stats import pearsonr

# # Plot the obs_nao and obs_nao_pd, both divided by 100
# # with valid_time (index) on the x-axis
# plt.figure(figsize=(12, 8))

# plt.plot(nao_df.index, nao_df["obs_nao"] / 100, label="ERA5", color="black")

# plt.plot(
#     nao_df.index, nao_df["model_nao_mean"] / 100, label="dcppA-hindcast", color="blue"
# )

# corr, p = pearsonr(nao_df["obs_nao"], nao_df["model_nao_mean"])

# # Include a textbox in the top left hand corner with the corr and p values
# plt.text(
#     0.05,
#     0.95,
#     f"Correlation: {round(corr, 2)}\n p-value: {round(p, 2)}",
#     horizontalalignment="left",
#     verticalalignment="top",
#     transform=plt.gca().transAxes,
#     bbox=dict(facecolor="white", alpha=0.5),
# )

# plt.legend()

# plt.show()

Now we want to process the correlation data for the EEZ domains, then plot the strength of these correlations on the map.

In [None]:
# # Import the functions
# import functions_em as funcs

# # Extract the data from the .nc file
# eez_cfs = funcs.extract_offshore_eez_to_df(
#     filepath=os.path.join(dicts.clearheads_dir, "EEZ_zones_wp_historical.nc")
# )

In [None]:
# # Look at the head of the dataframe
# eez_cfs.head()

In [20]:
# Import the functions
import functions_em as funcs

In [21]:
import importlib

importlib.reload(funcs)

<module 'functions_em' from '/home/users/benhutch/energy-met-corr/functions_em.py'>

### Solar correlations ###

Looking at correlations between climate indices (solar irradiance, NAO, delta P) and countrywide solar power generation from the CLEARHEADS data.

The data we want is in:

* *NUTS_0_sp_historical.nc* - Hourly area-averaged solar power capacity factors at NUTS0 level across Europe from 1950 to 2020.
* *NUTS_0_sp_historical_loc_weighted.nc* - Hourly solar power capacity factors at NUTS0 level across Europe, from 01/01/1950 - 31/12/2020. Data is weighted by the location of known solar panels from Dunnett et al., (2020) and Stowell et al., (2020) for the UK.
    * This dataset appears to be buggy, use the former.

In [None]:
# Ste up the model config
# Set up the model config
model_config = {
    "variable": "rsds",
    "season": "ONDJFM",
    "region": "global",
    "start_year": 1964,
    "end_year": 2014,
    "forecast_range": "2-9",
    "lag": 4,
    "method": "alternate_lag",
}

# Call the function
dfs = funcs.correlate_nao_uread(
    filename="NUTS_0_sp_historical.nc",
    shp_file="NUTS_RG_10M_2021_4326.shp",
    shp_file_dir="/home/users/benhutch/shapefiles/NUTS/",
    obs_var="ssrd",
    use_model_data=True,
    model_config=model_config,
)

In [None]:
# Process the data for obsserved correlations between
# the NAO and solar power cpacity factors at NUTS0 levels
# -- Not location weighted in this case --
dfs = funcs.correlate_nao_uread(
    filename="NUTS_0_sp_historical.nc",
    shp_file="",
    shp_file_dir="",
    obs_var="ssrd",
    use_model_data=True,
    model_config=model_config,
)

In [None]:
print(type(dfs))

In [None]:
print(len(dfs))

In [None]:
df1, df2 = dfs

In [None]:
merged_df = df1

In [None]:
# Corr all NaNs - why?
corr_df = df2

In [None]:
nao_df_dir = "/gws/nopw/j04/canari/users/benhutch/nao_stats_df/"
nao_df_fname = "psl_ONDJFM_global_1961_2014_2-9_4_nao_default.csv"

# # Set up the model config
# model_config = {
#     "variable": "psl",
#     "season": "ONDJFM",
#     "region": "global",
#     "start_year": 1961,
#     "end_year": 2014,
#     "forecast_range": "2-9",
#     "lag": 4,
#     "nao": "thornton_2019_uk",
# }

# Set up the model config
model_config = {
    "variable": "psl",
    "season": "ONDJFM",
    "region": "global",
    "start_year": 1961,
    "end_year": 2014,
    "forecast_range": "2-9",
    "lag": 4,
    "nao": "thornton_2019_uk",
    "gridbox": "Scandinavia",
    "method": "nao_matched",
}


# EEZ_zones_wp_historical.nc
# NUTS_0_HDD_historical_pop_weighted.nc
# test the other function for doing this
# days for cooling degree days
df, merged_df, merged_df_full, corr_df = funcs.correlate_nao_uread(
    filename="NUTS_0_HDD_historical_pop_weighted.nc",
    time_unit="d",
    obs_var="msl",
    avg_grid=dicts.scandi_box,
    use_model_data=True,
    model_config=model_config,
)

In [None]:
df = funcs.calc_model_nao_gridbox_var_corr(
    nao_df=merged_df,
    gridbox=dicts.scandi_box,
    obs_var="var228",
    obs_var_data_path=dicts.regrid_file_pr,
    coeff_fname="delta_p_pr_scandi_slope.csv",
)

In [None]:
df.head()

In [None]:
# Test the plotting function
funcs.plot_calib_corr(
    df=df,
    obs_var="var228",
    index_name="Calibrated delta P",
    ylabel="Scandi precip anoms. (mm/day)",
)

In [None]:
# Plot the calibrated_model_nao_mean against the var228 anomaly mean
# with seperate y-axes
from scipy.stats import pearsonr

# Create a new figure and an axes
fig, ax1 = plt.subplots()

# Plot the calibrated_model_nao_mean on the first y-axis
ax1.plot(df.index, df["calibrated_model_nao_mean"], color="blue", label="nao")
ax1.set_ylabel("pr anomalies (mm/day)")
# Plot the var228 anomaly mean on the second y-axis
ax1.plot(df.index, df["var228 anomaly mean"], color="red", label="var228")

# show the correlation coefiients
corr, p = pearsonr(df["calibrated_model_nao_mean"], df["var228 anomaly mean"])

# Include a textbox in the top left hand corner with the corr and p values
plt.text(
    0.05,
    0.95,
    f"Corr: {round(corr, 2)}\n p-value: {round(p, 2)}",
    horizontalalignment="left",
    verticalalignment="top",
    transform=plt.gca().transAxes,
    bbox=dict(facecolor="white", alpha=0.5),
)

# Include a horixzontal black dashed line at y=0
plt.axhline(0, color="black", linestyle="--")

# Include a legend
plt.legend(loc="upper right")

# Show the plot
plt.show()

In [None]:
print(f"{p:.2f}")

In [None]:
# Testing the NUTS shapefiles
# Load in the shapefile fo the eez data
NUTS_shapefile = gpd.read_file("~/shapefiles/NUTS/NUTS_RG_10M_2021_4326.shp")

# Restrict to level code 0
NUTS_shapefile = NUTS_shapefile[NUTS_shapefile.LEVL_CODE == 0]

# Extract the second element of the tuple
countries_codes = list(dicts.countries_nuts_id.values())

# Limit the gpd to the countries in the dictionary
NUTS_shapefile = NUTS_shapefile[NUTS_shapefile.NUTS_ID.isin(countries_codes)]

# Keep only the NUTS_ID, NUTS_NAME, and geometry columns
NUTS_shapefile = NUTS_shapefile[["NUTS_ID", "NUTS_NAME", "geometry"]]

In [None]:
NUTS_shapefile.head()

In [None]:
corr_df.head()

In [None]:
# Load in the shapefile fo the eez data
EEZ_shapefile = gpd.read_file("shapefiles/EEZ/eez_v12.shp")

In [None]:
EEZ_shapefile.head()

In [None]:
# Print all of the column names for the eeZ shapefile
print(EEZ_shapefile.columns)

In [None]:
# Throw away all of the columns, apart from "GEONAME", 'SOVEREIGN1',
# "ISOSOV1", "geometry"
EEZ_shapefile = EEZ_shapefile[["GEONAME", "SOVEREIGN1", "ISO_SOV1", "geometry"]]

In [None]:
EEZ_shapefile.head()

In [None]:
iso_sov1 = EEZ_shapefile["ISO_SOV1"].values

In [None]:
iso_sov1

In [None]:
# Extract the values of the region column from corr_df
region_values = corr_df.region.values

In [None]:
region_values

In [None]:
# reload the dictionary
importlib.reload(dicts)

In [None]:
# Convert the region values to equivalent iso_sov1 values
# using the mapping in the dictionary
iso_sov1_values = [dicts.iso_mapping[region] for region in region_values]

In [None]:
iso_sov1_values

In [None]:
# Constrain the geo dataframe to only include the iso_sov1 values
EEZ_shapefile = EEZ_shapefile[EEZ_shapefile["ISO_SOV1"].isin(iso_sov1_values)]

In [None]:
# Find where ISO_SOV1 is equal to "ITA"
EEZ_shapefile.head()

In [None]:
# Where corr_df.region passed through iso_mapping dict is
# equal to the values in EEZ_shapefile.ISO_SOV1
# Add the corresponding correlation and p-value to the dataframe

In [None]:
# Filter df to only include the rows where GEONAME includes: "Exclusive Economic Zone"
EEZ_shapefile = EEZ_shapefile[
    EEZ_shapefile["GEONAME"].str.contains("Exclusive Economic Zone")
]

In [None]:
EEZ_shapefile.head()

In [None]:
# Now we want to append the correlation and p-value to the dataframe
# Add a new column to corr_df called "ISO_SOV1"
corr_df["ISO_SOV1"] = iso_sov1_values

In [None]:
corr_df["region"] == "EL"

In [None]:
# Loop over the columns in EEZ_shapefile and add the correlation and p-value
# where the ISO_SOV1 values are equal
for index, row in EEZ_shapefile.iterrows():
    # Extract the ISO_SOV1 value
    iso_sov1 = row["ISO_SOV1"]
    # Find the index of the row in corr_df that matches the ISO_SOV1
    index_corr = corr_df[corr_df["ISO_SOV1"] == iso_sov1].index
    # Add the correlation and p-value to the dataframe
    EEZ_shapefile.loc[index, "correlation"] = corr_df.loc[
        index_corr, "correlation"
    ].values
    EEZ_shapefile.loc[index, "p-value"] = corr_df.loc[index_corr, "p-value"].values

In [None]:
# Same thing for the NUTS_shapefile
for index, row in NUTS_shapefile.iterrows():
    # Extract the NUTS_ID value
    nuts_id = row["NUTS_ID"]

    # Find the index of the row in corr_df that matches the NUTS_ID
    index_corr = corr_df[corr_df["region"] == nuts_id].index

    if len(index_corr) == 0:
        print(f"No match found for {nuts_id}")
        continue

    # Add the correlation and p-value to the dataframe
    NUTS_shapefile.loc[index, "correlation"] = corr_df.loc[
        index_corr, "correlation"
    ].values

    NUTS_shapefile.loc[index, "p-value"] = corr_df.loc[index_corr, "p-value"].values

In [None]:
EEZ_shapefile.head()

In [None]:
NUTS_shapefile.head()

In [None]:
# Remove any rows from EEZ shapefile which contain "(*)" in the GEONAME column
EEZ_shapefile = EEZ_shapefile[~EEZ_shapefile["GEONAME"].str.contains(r"\(.*\)")]

In [None]:
EEZ_shapefile.head()

In [None]:
print(type(EEZ_shapefile))

In [None]:
# Reload the dicts
importlib.reload(dicts)

In [None]:
# Import cartopy
import cartopy.crs as ccrs

# Now plot the EEZ_shapefile with the correlation as the color
plt.figure(figsize=(10, 10))
ax = plt.axes(projection=ccrs.PlateCarree())
NUTS_shapefile.plot(
    column="correlation",
    ax=ax,
    legend=True,
    cmap="coolwarm",
    vmin=-1,
    vmax=1,
    legend_kwds={
        "label": "Correlation",
        "orientation": "horizontal",
        "shrink": 0.8,
        "pad": 0.01,
    },
)
# Use cartopy to add the coastlines
ax.coastlines()
# Make the colorbar smaller
cbar = ax.get_figure().get_axes()[1]
cbar.set_ylabel("Correlation", fontsize=12)
cbar.tick_params(labelsize=10)

# Extract the lats of the northern eu grid box
lat1, lat2 = dicts.med_box_focus["lat1"], dicts.med_box_focus["lat2"]
lon1, lon2 = dicts.med_box_focus["lon1"], dicts.med_box_focus["lon2"]

# Plot the grid box
plt.plot([lon1, lon2, lon2, lon1, lon1], [lat1, lat1, lat2, lat2, lat1], "r")

# Include hazels grid box
lat1_n, lat2_n = (
    dicts.uk_n_box_corrected["lat1"],
    dicts.uk_n_box_corrected["lat2"],
)
lon1_n, lon2_n = (
    dicts.uk_n_box_corrected["lon1"],
    dicts.uk_n_box_corrected["lon2"],
)

# Plot the grid box
# plt.plot(
#     [lon1_n, lon2_n, lon2_n, lon1_n, lon1_n],
#     [lat1_n, lat1_n, lat2_n, lat2_n, lat1_n],
#     "g",
# )

# Include hazels grid box
lat1_s, lat2_s = (
    dicts.uk_s_box_corrected["lat1"],
    dicts.uk_s_box_corrected["lat2"],
)
lon1_s, lon2_s = (
    dicts.uk_s_box_corrected["lon1"],
    dicts.uk_s_box_corrected["lon2"],
)

# Plot the grid box
# plt.plot(
#     [lon1_s, lon2_s, lon2_s, lon1_s, lon1_s],
#     [lat1_s, lat1_s, lat2_s, lat2_s, lat1_s],
#     "g",
# )

# Include ticks for the lat and lon
ax.gridlines(draw_labels=True)

# Constrain to specific bounds
ax.set_xlim(-40, 50)
ax.set_ylim(30, 80)

In [None]:
# Reload the dictionary
importlib.reload(dicts)

In [None]:
# Limit the EEZ_shapefile to only include only the ISO_SOV1 values
# Which are in dicts.eez_agg_countries
EEZ_shapefile_n = EEZ_shapefile[EEZ_shapefile["ISO_SOV1"].isin(dicts.eez_agg_countries)]

In [None]:
# Now plot the EEZ_shapefile with the correlation as the color
plt.figure(figsize=(10, 10))
ax = plt.axes(projection=ccrs.PlateCarree())
EEZ_shapefile_n.plot(
    column="correlation",
    ax=ax,
    legend=True,
    cmap="coolwarm",
    vmin=-1,
    vmax=1,
    legend_kwds={
        "label": "Correlation",
        "orientation": "horizontal",
        "shrink": 0.8,
        "pad": 0.01,
    },
)
# Use cartopy to add the coastlines
ax.coastlines()
# Make the colorbar smaller
cbar = ax.get_figure().get_axes()[1]
cbar.set_ylabel("Correlation", fontsize=12)
cbar.tick_params(labelsize=10)

# Include ticks for the lat and lon
ax.gridlines(draw_labels=True)

# Constrain to specific bounds
ax.set_xlim(-40, 50)
ax.set_ylim(30, 80)

In [None]:
corr_df.head()

In [None]:
# Reload dicts
importlib.reload(dicts)

In [None]:
# Create a new column in cfs called N_Europe
# which is the average of all of the countries (columns) in dicts.eez_agg_countries
# Convert to three character names first
for key in dicts.iso_mapping:
    merged_df = merged_df.rename(columns={key: dicts.iso_mapping[key]})

merged_df["N_Europe"] = merged_df[dicts.eez_agg_countries].mean(axis=1)

In [None]:
merged_df.head()

In [None]:
from scipy.stats import pearsonr

# Create a plot with two y-axes
# Time on the x-axes
# The variable on the left y-axes is the NAO anomaly (Pa)
# The variable on the right y-axes is the wind power (GW) for N_Europe
fig, ax1 = plt.subplots(figsize=(10, 5))

# Plot the NAO anomaly
ax1.plot(merged_df.index, merged_df["ssrd anomaly mean"], "b-")

# Set the x-axis label
ax1.set_xlabel("Time")

# Set the y-axis label
ax1.set_ylabel("Obs solar irradiance (W/m^2)", color="b")

# Include a black dashed line for y=0
ax1.axhline(0, color="black", linestyle="--")

# Set the color of the ticks
ax1.tick_params("y", colors="b")

# Create a second y-axis
ax2 = ax1.twinx()

# Plot the wind power
ax2.plot(merged_df.index, merged_df.ES, "r-")

# Set the y-axis label
ax2.set_ylabel("Spain solar CFs (GW)", color="r")

# Set the colour of the ticks
ax2.tick_params("y", colors="r")

# # Invert the y-axis
# ax2.invert_yaxis()

# Calculate the correlation between the NAO anomaly and the wind power
corr, p = pearsonr(merged_df["ssrd anomaly mean"], merged_df.ES)

# Include the correlation and p-value on the plot
ax2.text(
    0.05,
    0.95,
    f"Correlation: {corr:.2f}\nP-value: {p:.2f}",
    horizontalalignment="left",
    verticalalignment="top",
    bbox=dict(facecolor="white", alpha=0.5),
    transform=ax2.transAxes,
)

# Show the plot
plt.show()

In [None]:
# Plot a scatter plot of NAO agaist wind power
plt.figure(figsize=(8, 8))

# Plot the scatter plot
plt.scatter(merged_df["fcst_ts_mean"], merged_df.N_Europe, color="k")

# Include a line of best fit
m, b = np.polyfit(merged_df["fcst_ts_mean"], merged_df.N_Europe, 1)

# Plot the line of best fit
plt.plot(
    merged_df["fcst_ts_mean"],
    m * merged_df["fcst_ts_mean"] + b,
    "k",
)

# Set the x-axis label
plt.xlabel("Model wind speed anomalies (m/s))", color="b")

# Set the xticks to blue
plt.tick_params(axis="x", colors="b")

# Set the y-axis label
plt.ylabel("Obs Wind Power (GW)", color="r")

# Set the yticks to red
plt.tick_params(axis="y", colors="r")

In [None]:
# Load in the ERA5 data for the NAO index
# Use this file
# adaptor.mars.internal-1691509121.3261805-29348-4-3a487c76-fc7b-421f-b5be-7436e2eb78d7.nc
# in ~/ERA5/
# Load the dataset
era5_data_path = "~/ERA5/adaptor.mars.internal-1691509121.3261805-29348-4-3a487c76-fc7b-421f-b5be-7436e2eb78d7.nc"

# Load the data into chunks
ds_era5 = xr.open_mfdataset(
    era5_data_path,
    combine="by_coords",
    parallel=True,
    chunks={"time": 100, "latitude": 100, "longitude": 100},
)[
    "msl"
]  # for mean sea level pressure

# Combine the first two expver variables
obs_msl = ds_era5.sel(expver=1).combine_first(ds_era5.sel(expver=5))

In [None]:
# Constrain obs to ONDJFM
obs_msl = obs_msl.sel(time=obs_msl.time.dt.month.isin([10, 11, 12, 1, 2, 3]))

# Shift the time index back by 3 months
obs_msl_shifted = obs_msl.shift(time=-3)

# Take annual means
obs_msl_annual = obs_msl_shifted.resample(time="Y").mean()

# Throw away years 1959, 2021, 2022 and 2023
obs_msl_annual = obs_msl_annual.sel(time=slice("1960", "2019"))

# Remove the climatology
obs_msl_anomaly = obs_msl_annual - obs_msl_annual.mean(dim="time")

In [None]:
# Extract the lats and lons of the azores
lat1, lat2 = dicts.era5_azores["lat1"], dicts.era5_azores["lat2"]
lon1, lon2 = dicts.era5_azores["lon1"], dicts.era5_azores["lon2"]

# Calculate the mean for the azores gridbox
obs_msl_anomaly_azores = obs_msl_anomaly.sel(
    latitude=slice(lat1, lat2), longitude=slice(lon1, lon2)
).mean(dim=["latitude", "longitude"])

In [None]:
# Same for iceland
lat1, lat2 = dicts.era5_iceland["lat1"], dicts.era5_iceland["lat2"]
lon1, lon2 = dicts.era5_iceland["lon1"], dicts.era5_iceland["lon2"]

# Calculate the mean for the iceland gridbox
obs_msl_anomaly_iceland = obs_msl_anomaly.sel(
    latitude=slice(lat1, lat2), longitude=slice(lon1, lon2)
).mean(dim=["latitude", "longitude"])

In [None]:
# Calculate the NAO index (azores - iceland)
nao_index = obs_msl_anomaly_azores - obs_msl_anomaly_iceland

In [None]:
# EXtract the time series
nao_index_time = nao_index.time.values

# Extract the values
nao_index_values = nao_index.values

# Create a dataframe
nao_df = pd.DataFrame({"time": nao_index_time, "value": nao_index_values})

# Take a centred 8-year running mean
nao_running = nao_df.set_index("time").rolling(8, center=True).mean()

In [None]:
# Have a look at the dataframe
nao_running.head()

In [None]:
# Drop the NaN values
nao_running = nao_running.dropna()

In [None]:
# Combine the two dataframes into one, using the index of the first
eez_df = eez_cfs.join(nao_running, how="inner")

In [None]:
eez_df.head()

In [None]:
# Rename the value column as 'NAO anomaly (Pa)'
eez_df = eez_df.rename(columns={"value": "NAO anomaly (Pa)"})

In [None]:
# Drop the rows which contain NaN values in the NAO anomaly column
eez_df = eez_df.dropna()

In [None]:
eez_df.head()

In [None]:
from scipy.stats import pearsonr
import pandas as pd

# Create a new dataframe with columns for:
# 'region' - e.g. Netherlands_7
# 'correlation' - the correlation between the NAO and the offshore wind CFs
# 'p-value' - the p-value of the correlation
# Set up the dataframe
correlation_df = pd.DataFrame(columns=["region", "correlation", "p-value"])

# Loop over the regions
for region in eez_df.columns[:-1]:
    # Calculate the correlation
    corr, p = pearsonr(eez_df[region], eez_df["NAO anomaly (Pa)"])

    # Create a new DataFrame to append
    df_to_append = pd.DataFrame(
        {"region": [region], "correlation": [corr], "p-value": [p]}
    )

    # Append to the dataframe
    correlation_df = pd.concat([correlation_df, df_to_append], ignore_index=True)

In [None]:
correlation_df.head()

In [None]:
# Remove the numbers from the region column by removing the last 2 characters
correlation_df["region"] = correlation_df["region"].str

In [None]:
correlation_df

In [None]:
# if any of the region names contain the string "_" then remove it
correlation_df["region"] = correlation_df["region"].str.replace("_", " ")

In [None]:
correlation_df.head()

In [None]:
EEZ_shapefile["SOVEREIGN1"]

In [None]:
# Create two new columns in the geopandas dataframe 'EEZ_shapefile'
# 'correlation' - the correlation between the NAO and the offshore wind CFs
# 'p-value' - the p-value of the correlation
EEZ_shapefile["correlation"] = np.nan
EEZ_shapefile["p-value"] = np.nan

In [None]:
EEZ_shapefile.head()

In [None]:
# Loop over the regions in correlation_df
for region in correlation_df["region"]:
    # Extract the row from correlation_df
    row = correlation_df[correlation_df["region"] == region]

    # Extract the correlation and p-value
    corr = row["correlation"].values[0]
    p = row["p-value"].values[0]

    # Set the values in the EEZ_shapefile
    EEZ_shapefile.loc[EEZ_shapefile["TERRITORY1"] == region, "correlation"] = corr
    EEZ_shapefile.loc[EEZ_shapefile["TERRITORY1"] == region, "p-value"] = p

In [None]:
EEZ_shapefile["TERRITORY1"] == "France", "correlation"

In [None]:
# Extract the list of Terrirories
territories = EEZ_shapefile["TERRITORY1"]

# Convert to a list
territories = list(territories)

# Print the territories
print(territories)

In [None]:
# Constrain EEZ shapefile to only include the territories in the list
EEZ_shapefile = EEZ_shapefile[EEZ_shapefile["TERRITORY1"].isin(dicts.countries_list)]

In [None]:
# Print the correlation values for FRance
print(EEZ_shapefile[EEZ_shapefile["SOVEREIGN1"] == "France"]["correlation"])

In [None]:
# Import cartopy
import cartopy.crs as ccrs

# Now plot the EEZ_shapefile with the correlation as the color
plt.figure(figsize=(10, 10))
ax = plt.axes(projection=ccrs.PlateCarree())
EEZ_shapefile.plot(
    column="correlation", ax=ax, legend=True, cmap="coolwarm", shrink=0.5
)
# Use cartopy to add the coastlines
ax.coastlines()
# Make the colorbar smaller
cbar = ax.get_figure().get_axes()[1]
cbar.set_ylabel("Correlation", fontsize=12)
cbar.tick_params(labelsize=10)

# Constrain to specific bounds
ax.set_xlim(-50, 50)
ax.set_ylim(30, 80)

In [None]:
# Now plot the EEZ_shapefile with the correlation as the color
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw={"projection": ccrs.PlateCarree()})
cax = EEZ_shapefile.plot(
    column="correlation", ax=ax, cmap="coolwarm", add_colorbar=False
)

# Use cartopy to add the coastlines
ax.coastlines()

# Add colorbar
cbar = fig.colorbar(cax.collections[0], ax=ax, shrink=0.5)
cbar.set_label("Correlation")

# Constrain to specific bounds
ax.set_xlim(-50, 50)
ax.set_ylim(30, 80)