In [None]:
import os
import sys
import glob
from copy import deepcopy
import pickle
from scipy.fft import fft, fftfreq
# from scipy.signal import butter, filtfilt
# from sklearn.metrics import r2_score
# from tqdm import tqdm
import seaborn as sns

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

import tensorflow as tf
from tensorflow import keras

if '..' not in sys.path:
    sys.path.append('..')
# from dlml.utils import collect_experiments
from dlml.data import load_data_files, load_data_areas

#### Which model to load

In [None]:
# training on frequency data, 2 output values
# experiment_ID = '9ea493c789b542bf979c51a6031f4044'
# training on frequency data, 4 output values
# experiment_ID = 'f6d9a03f1cfe450288e9cb86da94235f'
# training on time data, 2 output values
# experiment_ID = '034a1edb0797475b985f0e1335dab383'
# training on time data, 4 output values
# experiment_ID = 'b346a89d384c4db2ba4058a2c83c4f12'
# training on time data, 2 output values, 8 input values
experiment_ID = '474d2016e33b441889ce8b17531487cb' # replaces 98475b819ecb4d569646d7e1467d7c9c'
# training on time data, 4 output values, 16 input values (with compensators)
experiment_ID = 'c6f72abb5e364c4cb7770250e135bd73' # replaces '302a21340f354ac2949184be98d8e907'

#### Load the model

In [None]:
experiments_path = '../experiments/neural_network/'
network_parameters = pickle.load(open(os.path.join(experiments_path, experiment_ID, 'parameters.pkl'), 'rb'))
checkpoint_path = experiments_path + experiment_ID + '/checkpoints/'
checkpoint_files = glob.glob(checkpoint_path + '*.h5')
try:
    epochs = [int(os.path.split(file)[-1].split('.')[1].split('-')[0]) for file in checkpoint_files]
    best_checkpoint = checkpoint_files[epochs.index(np.argmin(val_loss) + 1)]
except:
    best_checkpoint = checkpoint_files[-1]
model = keras.models.load_model(best_checkpoint)
x_train_mean = network_parameters['x_train_mean']
x_train_std  = network_parameters['x_train_std']
x_train_min = network_parameters['x_train_min']
x_train_max = network_parameters['x_train_max']
var_names = network_parameters['var_names']
print(f'Loaded network from {best_checkpoint}.')
print(f'Variable names: {var_names}')

#### Model topology

In [None]:
model.summary()

Some variables used in the following:

In [None]:
X, y, Xf = {}, {}, {}
group_index, n_mom_groups = {}, {}

In [None]:
os.path.isdir(os.path.join('..', network_parameters['data_dirs'][0].format(network_parameters['area_IDs'][0])))

In [None]:
os.path.join('..', network_parameters['data_dirs'][0].format(network_parameters['area_IDs'][0]))

#### Load the original data set

In [None]:
use_fft = network_parameters['use_fft'] if 'use_fft' in network_parameters else False
if use_fft:
    raise Exception('This notebook must be used on a network that uses time-domain inputs')

set_name = 'test'
low_high = True

data_dir = os.path.join('..', network_parameters['data_dirs'][0].format(network_parameters['area_IDs'][0]))
# if 'comp' in os.path.split(data_dir)[-1]:
#     data_dir = os.path.join(*os.path.split(data_dir)[:-1], os.path.split(data_dir)[-1].replace('_comp',''))
data_files = sorted(glob.glob(data_dir + os.path.sep + f'*_{set_name}_set.h5'))
ret = load_data_areas({set_name: data_files}, network_parameters['var_names'],
                        network_parameters['generators_areas_map'][:1],
                        network_parameters['generators_Pnom'],
                        network_parameters['area_measure'],
                        trial_dur=network_parameters['trial_duration'],
                        max_block_size=5000,
                        use_tf=False, add_omega_ref=True,
                        use_fft=False)
t = ret[0]
X_raw = ret[1][set_name]
y[set_name] = ret[2][set_name]

X[set_name] = np.zeros(X_raw.shape)
for i,(m,s) in enumerate(zip(x_train_mean, x_train_std)):
    X[set_name][i,:,:] = (X_raw[i,:,:] - m) / s
X[set_name] = X[set_name].squeeze()
y[set_name] = y[set_name].squeeze()
if low_high:
    m = y[set_name].mean()
    idx = y[set_name] > m
    jdx = y[set_name] < m
    y[set_name][idx] = y[set_name][idx].mean()
    y[set_name][jdx] = y[set_name][jdx].mean()
dt = np.diff(t[:2])[0]
N_samples = t.size
Xf[set_name] = fft(X[set_name])
Xf[set_name] = 2.0 / N_samples * np.abs(Xf[set_name][:, :N_samples//2])
F = fftfreq(N_samples, dt)[:N_samples//2]

group_index[set_name] = [np.where(y[set_name] == mom)[0] for mom in np.unique(y[set_name])]
n_mom_groups[set_name] = len(group_index[set_name])

#### Load the first data set
Here, the values of inertia of G2 and G3 are changed while keeping the area momentum constant.

#### Load the second data set
Here, the values of inertia of G4 or G8 are changed. The momentum of area 1 is unchanged.

#### Load the third data set
Here, the values of inertia of G2 and G3 are increased beyond the values used to generate the low-momentum configuration of the test set.

In [None]:
data_dir = os.path.join('..',
                        network_parameters['data_dirs'][0].format(network_parameters['area_IDs'][0]),
                        '..',
                        'coarse')
data_file = 'inertia_5.000_3.863_4.003_3.570_4.330_4.350_3.770_3.470_3.450_4.200_0.100_0.100_0.100_test_set.h5'
base_folder = network_parameters['data_dirs'][0]
while '{}' in base_folder:
    base_folder,_ = os.path.split(base_folder)
data_files = [os.path.join(data_dir, data_file)]

ret = load_data_areas({set_name: data_files}, network_parameters['var_names'],
                        network_parameters['generators_areas_map'][:1],
                        network_parameters['generators_Pnom'],
                        network_parameters['area_measure'],
                        trial_dur=network_parameters['trial_duration'],
                        max_block_size=5000,
                        use_tf=False, add_omega_ref=True,
                        use_fft=False)

key = 'var_G2_G3'
X_raw = ret[1][set_name]
X[key] = np.zeros(X_raw.shape)
for i,(m,s) in enumerate(zip(x_train_mean, x_train_std)):
    X[key][i,:,:] = (X_raw[i,:,:] - m) / s
y[key] = ret[2][set_name]
X[key] = X[key].squeeze()
y[key] = y[key].squeeze()
Xf[key] = fft(X[key])
Xf[key] = 2.0 / N_samples * np.abs(Xf[key][:, :N_samples//2])

group_index[key] = [np.arange(y[key].size)]
n_mom_groups[key] = 1

#### Load the fourth data set
Here, the values of inertia of G2 and G3 are fixed to the low-momentum configuration of the test set, while the inertia of the compensator in area 1 is increased to 6.1 s: this leads to an area momentum that is equivalent to that of the previous file.

In [None]:
data_dir = os.path.join('..',
                        network_parameters['data_dirs'][0].format(network_parameters['area_IDs'][0]),
                        '..')
data_file = 'inertia_5.000_3.463_3.603_3.570_4.330_4.350_3.770_3.470_3.450_4.200_6.100_0.100_0.100_test_set.h5'
base_folder = network_parameters['data_dirs'][0]
while '{}' in base_folder:
    base_folder,_ = os.path.split(base_folder)
data_files = [os.path.join(data_dir, data_file)]

ret = load_data_areas({set_name: data_files}, network_parameters['var_names'],
                        network_parameters['generators_areas_map'][:1],
                        network_parameters['generators_Pnom'],
                        network_parameters['area_measure'],
                        trial_dur=network_parameters['trial_duration'],
                        max_block_size=5000,
                        use_tf=False, add_omega_ref=True,
                        use_fft=False)

key = 'var_Comp11'
X_raw = ret[1][set_name]
X[key] = np.zeros(X_raw.shape)
for i,(m,s) in enumerate(zip(x_train_mean, x_train_std)):
    X[key][i,:,:] = (X_raw[i,:,:] - m) / s
y[key] = ret[2][set_name]
X[key] = X[key].squeeze()
y[key] = y[key].squeeze()
Xf[key] = fft(X[key])
Xf[key] = 2.0 / N_samples * np.abs(Xf[key][:, :N_samples//2])

group_index[key] = [np.arange(y[key].size)]
n_mom_groups[key] = 1

#### Predict the values of area momentum for all the input data sets

In [None]:
y_pred = {k: model.predict(v) for k,v in X.items()}
ym = {k: [v[group_index[k][i]].mean() for i in range(n_mom_groups[k])] for k,v in y.items()}
ys = {k: [v[group_index[k][i]].std() for i in range(n_mom_groups[k])] for k,v in y.items()}
ym_pred = {k: [v[group_index[k][i]].mean() for i in range(n_mom_groups[k])] for k,v in y_pred.items()}
ys_pred = {k: [v[group_index[k][i]].std() for i in range(n_mom_groups[k])] for k,v in y_pred.items()}

In [None]:
y_exact = deepcopy(y)
y_exact['var_G2_G3'] -= 1e-6
y_exact['var_Comp11'] += 1e-6
exact_momentum = np.concatenate(list(y_exact.values()))
pred_momentum = np.concatenate(list(y_pred.values()))
df = pd.DataFrame(data={'exact': exact_momentum, 'pred': np.concatenate(pred_momentum)})
data = {'ym': ym, 'ym_pred': ym_pred, 'ys_pred': ys_pred, 'F': F, 'Xf': Xf,
        'group_index': group_index, 'n_mom_groups': n_mom_groups, 'df': df}
data_file = os.path.join(experiments_path, experiment_ID, f'variable_inertia_{experiment_ID[:6]}.npz')
np.savez_compressed(data_file, **data)

#### Plot the spectra of all the input data sets

In [None]:
use_violin = False

fig,ax = plt.subplots(3, 1, figsize=(6.5, 10))

cmap_name = 'tab10'
cmap = plt.get_cmap(cmap_name)

if use_violin:
    palette = [cmap(i) for i in [0, 2, 3, 1]]
    sns.violinplot(x='exact', y='pred', data=df, cut=0, inner='quartile',
                   palette=palette, linewidth=1, ax=ax[0])
else:
    ax[0].plot(ym['test'], ym['test'], 'k--', lw=2, markerfacecolor='w')
    n = 0
    for k in ym:
        for j in range(len(ym[k])):
            ax[0].plot(ym[k][j] + np.zeros(2),
                       ym_pred[k][j] + ys_pred[k][j] * np.array([-1,1]),
                       color=cmap(n), linewidth=3)
            ax[0].plot(ym[k][j], ym_pred[k][j], 'o', color=cmap(n), markersize=10,
                       markerfacecolor='w', markeredgewidth=3)
            n += 1
ax[0].set_xlabel(r'Exact momentum [GW$\cdot$s$^2$]')
ax[0].set_ylabel(r'Estimated momentum [GW$\cdot$s$^2$]')
ax[0].grid(which='major', axis='both', lw=0.5, ls=':', color=[.6,.6,.6])

for a in (1,2):
    n = 0
    for i,(k,v) in enumerate(Xf.items()):
        for j in range(n_mom_groups[k]):
            m = v[group_index[k][j], :].mean(axis=0)
            s = v[group_index[k][j], :].std(axis=0)
            ci = 1.96 * s / np.sqrt(group_index[k][j].size)
            ax[a].fill_between(F, 20*np.log10(m + ci), 20*np.log10(m - ci),
                            color=cmap(n), label=k, alpha=0.5)
            n += 1
    ax[a].set_xscale('log')
    ax[a].set_ylabel('Power [dB]')
ax[1].legend(loc='lower left', frameon=False, fontsize=8)
ax[2].set_xlabel('Frequency [Hz]')
ax[2].set_xlim([0.4, 1.5])
ax[2].set_ylim([-33, -10])

for a in ax:
    for side in 'right','top':
        a.spines[side].set_visible(False)

fig.tight_layout()
pdf_file = os.path.join(experiments_path, experiment_ID, f'variable_inertia_{experiment_ID[:6]}.pdf')
fig.savefig(pdf_file)