### Bloomfield demand correlations ###

Exploring the correlations between some of Hannah's older datasets which include demand (hopefully in some kind of GW/GWh unit) to explore the relationship between climate indices and electricity demand.

Goal is to move towards a space where we have some quantification of UK demand (in GW/GWh) which we can predict to some capacity, along with UK offshore wind power CF.'s (in GW/GWh, which we know we can predict using delta P), to quantify a time series of demand-net-wind (demand - wind) to explore whether this is something we can predict on decadal timescales.

In [None]:
# Local libraries
import os
import sys
import glob

# Third-party libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import pearsonr
from scipy import signal

In [None]:
# Show the directory in which the files are stored
dir = "/home/users/benhutch/ERA5_energy_update"

# LIst the files in this directory
os.listdir(dir)

### Demand data ###

* *ERA5_full_demand_1979_2018.csv* - Daily mean country level electricity demand for the 28 countries modelled in this study from 01/01/1979 to 31/12/2018.   
* *ERA5_weather_dependent_demand_1979_2018.csv* - Daily mean country level electricity weather-dependent demand for the 28 countries modelled in this study from 01/01/1979 to 31/12/2018.

#### Demand model ####

A time series of daily mean electricity demand for 28 European countries was derived from 2m temperature (T2m) data from the ERA5 reanalysis. The model was developed on a daily resolution using a multiple-linear regression model where the possible model inputs are the day of the week, heating degree days (HDD) and cooling degree days (CDD). The model is optimised to choose the best set of parameters to minimise the Akaike information criteria. Time-series of the weather-dependent model input parameters (T2m, HDD, CDD) are available as well as the full demand model output and weather-dependent demand (i.e. the demand with the effects of the day of the week removed by setting those regression coefficients to zero).

In [None]:
# Load in the demand data
full_demand_fname = "ERA5_full_demand_1979_2018.csv"
full_demand_wdependent_fname = "ERA5_weather_dependent_demand_1979_2018.csv"

# Load in the demand data
full_demand = pd.read_csv(os.path.join(dir, full_demand_fname))

# Load in the demand data
full_demand_weather = pd.read_csv(os.path.join(dir, full_demand_wdependent_fname))

In [None]:
full_demand.head()

In [None]:
# restrict to column United_Kingdom_full_demand_no_pop_weights_1979_2018.dat
# and Unnamed: 0
full_demand = full_demand[['Unnamed: 0', 'United_Kingdom_full_demand_no_pop_weights_1979_2018.dat']]


In [None]:
full_demand.head()

In [None]:
# rename Unnamed: 0	as date
full_demand.rename(columns = {'Unnamed: 0':'date'}, inplace = True)

# rename United_Kingdom_full_demand_no_pop_weights_1979_2018.dat
# as UK_demand (GW)
full_demand.rename(columns = {'United_Kingdom_full_demand_no_pop_weights_1979_2018.dat':'UK_demand (GW)'}, inplace = True)

In [None]:
full_demand.head()

In [None]:
# if date is now 	('1979-01-01',)
# strip out YYYY-MM-DD and convert to datetime
# remove the brackets
full_demand['date'] = full_demand['date'].str[2:-3]

# convert to datetime
full_demand['date'] = pd.to_datetime(full_demand['date'])

In [None]:
full_demand.head()

In [None]:
# process for ONDJFM 8yrRM
# Set the date as the index
full_demand.set_index('date', inplace=True)

# Take monthly averages
full_demand_monthly = full_demand.resample('M').mean()

# Set up the months of interest
ondjfm_months = [10, 11, 12, 1, 2, 3]

# Select only the months of interest
full_demand_ondjfm = full_demand_monthly[full_demand_monthly.index.month.isin(ondjfm_months)]

# Remove the first 3 and last 3 months
full_demand_ondjfm = full_demand_ondjfm[3:-3]

# Take the annual mean
full_demand_ondjfm_8yrRM = full_demand_ondjfm.resample('A').mean()

# Take the 8-year rolling mean
full_demand_ondjfm_8yrRM = full_demand_ondjfm_8yrRM.rolling(window=8, center=True).mean()

In [None]:
# Set month back to a column
full_demand_ondjfm_8yrRM.reset_index(inplace=True)

In [None]:
full_demand_ondjfm_8yrRM.head()

In [None]:
# set up a filename
fname = "ERA5_full_demand_1979_2018_8yrRM_ONDJFM.csv"

# save the file
full_demand_ondjfm_8yrRM.to_csv(os.path.join(dir, fname), index=False)

In [None]:
# full_demand_weather.columns

In [None]:
# now for full_demand_weather
# restrict to column United_Kingdom_full_demand_no_pop_weights_1979_2018.dat and Unnamed: 0
full_demand_weather = full_demand_weather[['Unnamed: 0', 'United_Kingdom_wd_demand_no_pop_weights_no_time_trend_1979_2018.dat']]

# rename Unnamed: 0	as date
full_demand_weather.rename(columns = {'Unnamed: 0':'date'}, inplace = True)

# rename United_Kingdom_full_demand_no_pop_weights_1979_2018.dat as UK_demand (GW)
full_demand_weather.rename(columns = {'United_Kingdom_wd_demand_no_pop_weights_no_time_trend_1979_2018.dat':'UK_wd_demand (GW)'}, inplace = True)

# convert date to datetime
full_demand_weather['date'] = pd.to_datetime(full_demand_weather['date'])

# Set the date as the index
full_demand_weather.set_index('date', inplace=True)

In [None]:
# Take monthly averages
full_demand_weather_monthly = full_demand_weather.resample('M').mean()

# Select only the months of interest
full_demand_weather_ondjfm = full_demand_weather_monthly[full_demand_weather_monthly.index.month.isin(ondjfm_months)]

# Remove the first 3 and last 3 months
full_demand_weather_ondjfm = full_demand_weather_ondjfm[3:-3]

# Take the annual mean
full_demand_weather_ondjfm_8yrRM = full_demand_weather_ondjfm.resample('A').mean()

# Take the 8-year rolling mean
full_demand_weather_ondjfm_8yrRM = full_demand_weather_ondjfm_8yrRM.rolling(window=8, center=True).mean()

# Set month back to a column
full_demand_weather_ondjfm_8yrRM.reset_index(inplace=True)

In [None]:
full_demand_weather_ondjfm_8yrRM.head()

In [None]:
# Set up a filename
fname = "ERA5_weather_dependent_demand_1979_2018_8yrRM_ONDJFM.csv"

# Save the file
full_demand_weather_ondjfm_8yrRM.to_csv(os.path.join(dir, fname), index=False)

In [None]:
print(dir)

In [None]:
# join the two dataframes
UK_demand = pd.merge(full_demand_ondjfm_8yrRM, full_demand_weather_ondjfm_8yrRM, on='date')

In [None]:
UK_demand.head()

In [None]:
# merge with climate indices demand to look at decadal correlations
path = "/home/users/benhutch/NGrid_demand/csv_files/climate_indices_demand.csv"

# load in the climate indices demand data
climate_indices_demand = pd.read_csv(
    path,
)

In [None]:
climate_indices_demand.head()

In [None]:
# Ensure that date is in datetime format
climate_indices_demand['date'] = pd.to_datetime(climate_indices_demand['date'])

# Join the two dataframes
climate_indices_demand = climate_indices_demand.merge(UK_demand, on='date', how='left')

In [None]:
climate_indices_demand.head()

In [None]:
# write a function for plotting the data
def plot_corr(
    df: pd.DataFrame,
    predictor: str,
    predictand: str,
    time: str = "date",
    standardise: bool = True,
    predictor_label: str = None,
    predictand_label: str = None,
    time_interval: int = 5,
    ylabel: str = "Normalised anomaly",
):
    """
    A function to plot the correlation between two variables
    
    Inputs

    df: pd.DataFrame
        A pandas dataframe containing the data

    predictor: str
        The column name of the predictor variable

    predictand: str
        The column name of the predictand variable

    time: str
        The column name of the time variable

    standardise: bool
        Whether to standardise the data before calculating the correlation

    predictor_label: str
        The label for the predictor variable

    predictand_label: str
        The label for the predictand variable

    time_interval: int
        The time interval over which to set xticks

    ylabel: str
        The y-axis label

    Outputs

    None
    """

    # Set the figure size
    plt.figure(figsize=(10, 5))

    # Find the indices where both variables are not NaN
    idx = np.isfinite(df[predictor]) & np.isfinite(df[predictand])

    # Apply these indices to the dataframe
    df = df[idx]

    # Calculate the correlation and p-value
    corr, pval = pearsonr(df[predictor], df[predictand])



    # Plot the data
    if standardise:
        plt.plot(
            df[time],
            (df[predictor] - df[predictor].mean()) / df[predictor].std(),
            label=f"{predictor_label} (corr = {corr:.2f}, p = {pval:.2f})",
        )
        plt.plot(
            df[time],
            (df[predictand] - df[predictand].mean()) / df[predictand].std(),
            label=f"{predictand_label}",
        )
    else:
        plt.plot(df[time], df[predictor], label=f"{predictor_label} (corr = {corr:.2f}, p = {pval:.2f})")
        plt.plot(df[time], df[predictand], label=predictand_label)

    # Set the xticks
    plt.xticks(
        df[time][::time_interval],
        df[time].dt.year[::time_interval],
    )

    # Set the y-axis label
    plt.ylabel(ylabel)

    # Set the x-axis label
    plt.xlabel("Year")

    # Set the title
    plt.title(f"{predictor} vs {predictand}")

    # Add a legend
    plt.legend()

    # Show the plot
    plt.show()
    

In [None]:
climate_indices_demand.head()

In [None]:
# Test the function
plot_corr(
    df=climate_indices_demand,
    predictor="NAO anomaly (Pa)",
    predictand="UK_demand (GW)",
    predictor_label="NAO anomaly (Pa)",
    predictand_label="UK demand (GW)",
)

In [None]:
# Now for weathe dependent
plot_corr(
    df=climate_indices_demand,
    predictor="NAO anomaly (Pa)",
    predictand="UK_wd_demand (GW)",
    predictor_label="NAO anomaly (Pa)",
    predictand_label="UK weather-dependent demand (GW)",
)

In [None]:
# Now for delta P
plot_corr(
    df=climate_indices_demand,
    predictor="delta P anomaly (Pa)",
    predictand="UK_demand (GW)",
    predictor_label="delta P anomaly (Pa)",
    predictand_label="UK demand (GW)",
)

In [None]:
# And for weather dependent
plot_corr(
    df=climate_indices_demand,
    predictor="delta P anomaly (Pa)",
    predictand="UK_wd_demand (GW)",
    predictor_label="delta P anomaly (Pa)",
    predictand_label="UK weather-dependent demand (GW)",
)

In [None]:
# Temperature and UK demand
plot_corr(
    df=climate_indices_demand,
    predictor="UK_temp_anomaly",
    predictand="UK_demand (GW)",
    predictor_label="UK temperature (K)",
    predictand_label="UK demand (GW)",
)

In [None]:
# Temperature and UK weather dependent demand
plot_corr(
    df=climate_indices_demand,
    predictor="UK_temp_anomaly",
    predictand="UK_wd_demand (GW)",
    predictor_label="UK temperature (K)",
    predictand_label="UK weather-dependent demand (GW)",
)