# ROMY - Operation Performance Analysis Plot

In [None]:
import os
import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import obspy as obs
import matplotlib.colors

from pandas import DataFrame, merge, read_pickle
from obspy import UTCDateTime


In [None]:
from functions.load_beat import __load_beat
from functions.load_status import __load_status
from functions.load_mlti import __load_mlti
from functions.get_mlti_intervals import __get_mlti_intervals

from functions.load_mlti import __load_mlti
from functions.get_mlti_intervals import __get_mlti_intervals
from functions.load_beat import __load_beat
from functions.load_lxx import __load_lxx
from functions.get_lxx_intervals import __get_lxx_intervals
from functions.get_mlti_statistics import __get_mlti_statistics

In [None]:
if os.uname().nodename == 'lighthouse':
    root_path = '/home/andbro/'
    data_path = '/home/andbro/kilauea-data/'
    archive_path = '/home/andbro/freenas/'
    bay_path = '/home/andbro/ontap-ffb-bay200/'
    lamont_path = '/home/andbro/lamont/'
elif os.uname().nodename == 'kilauea':
    root_path = '/home/brotzer/'
    data_path = '/import/kilauea-data/'
    archive_path = '/import/freenas-ffb-01-data/'
    bay_path = '/import/ontap-ffb-bay200/'
    lamont_path = '/lamont/'
elif os.uname().nodename in ['lin-ffb-01', 'ambrym', 'hochfelln']:
    root_path = '/home/brotzer/'
    data_path = '/import/kilauea-data/'
    archive_path = '/import/freenas-ffb-01-data/'
    bay_path = '/import/ontap-ffb-bay200/'
    lamont_path = '/lamont/'

## Configurations

In [None]:
config = {}

# config['tbeg'] = obs.UTCDateTime("2024-03-09")
# config['tend'] = obs.UTCDateTime("2024-04-09")

config['tbeg'] = obs.UTCDateTime("2024-03-09")
config['tend'] = obs.UTCDateTime("2024-08-01")

# specify path to store figures
config['path_to_figures'] = data_path+"manuscripts/manuscript_romy_drifts/figures/"

# specify patht to data
config['path_to_data'] = data_path+"manuscripts/manuscript_romy_drifts/data/"

# choose if MLTI is sequence or not
config['interval'] = True


### Load Status Data

In [None]:
statusU = read_pickle(config['path_to_data']+f"performance_status_U_{config['tbeg'].date}_{config['tend'].date}.pkl")
statusV = read_pickle(config['path_to_data']+f"performance_status_V_{config['tbeg'].date}_{config['tend'].date}.pkl")
statusZ = read_pickle(config['path_to_data']+f"performance_status_Z_{config['tbeg'].date}_{config['tend'].date}.pkl")

### Status of all Rings running simultaneously

In [None]:
overall_minutes = (config['tend'] - config['tbeg'])/60

In [None]:
# select only good quality
statusU_good = statusU[statusU.quality == 1]
print(f"RU good quality: {round(statusU_good.quality.size/overall_minutes*100, 2)}%")

statusV_good = statusV[statusV.quality == 1]
print(f"RV good quality: {round(statusV_good.quality.size/overall_minutes*100, 2)}%")

statusZ_good = statusZ[statusZ.quality == 1]
print(f"RZ good quality: {round(statusZ_good.quality.size/overall_minutes*100, 2)}%")


In [None]:
# merge U and V
status_all = merge(statusU_good, statusV_good, how="inner", on="times_utc")

# merge Z
status_all = merge(status_all, statusZ_good, how="inner", on="times_utc")


In [None]:

good_quality_all = status_all.times_utc.size

good_quality_all_percentage = good_quality_all / overall_minutes * 100

print(f"Simultaneous good quality: {round(good_quality_all_percentage, 2)}%")

## Processing Quality Status

In [None]:
# sort by times
statusU = statusU.sort_values(by="times_utc").reset_index()
statusV = statusV.sort_values(by="times_utc").reset_index()
statusZ = statusZ.sort_values(by="times_utc").reset_index()

# create sub-frames
statusZ_mod = statusZ[['times_utc', 'quality']]
statusU_mod = statusU[['times_utc', 'quality']]
statusV_mod = statusV[['times_utc', 'quality']]

# remove NaN from times
statusZ_mod = statusZ_mod.dropna(subset="times_utc").reset_index()
statusV_mod = statusV_mod.dropna(subset="times_utc").reset_index()
statusU_mod = statusU_mod.dropna(subset="times_utc").reset_index()

# make times all to string objects
statusZ_mod['times_utc'] = statusZ_mod.times_utc.astype(str)
statusU_mod['times_utc'] = statusU_mod.times_utc.astype(str)
statusV_mod['times_utc'] = statusV_mod.times_utc.astype(str)

In [None]:
# create new frame with all rings and their quality
df_new = statusZ_mod.copy()
df_new = df_new.merge(statusU_mod, how="outer", on=["times_utc"])
df_new = df_new.merge(statusV_mod, how="outer", on=["times_utc"])

# drop irrelevant columns
df_new.drop(columns=["index_x", "index_y", "index"], inplace=True)

# renaming
df_new.rename(columns={"quality_x":"RZ", "quality_y":"RU", "quality":"RV"}, inplace=True)

# add relative times
ref_time = df_new.times_utc.iloc[0]
df_new['times_utc_sec'] = np.array([abs(obs.UTCDateTime(ref_time) - obs.UTCDateTime(_t) +30) for _t in df_new.times_utc])

# add RW column
df_new['RW'] = np.ones(df_new.shape[0])*np.nan

# sort all by time
df_new.sort_values(by="times_utc", inplace=True)

#### Compute Percentages

In [None]:
df_stats = DataFrame(columns=["ring", "R_0", "R_1", "R_nan", "R_N", "R_0_p", "R_1_p", "R_nan_p"])

rings = ['RZ', 'RU', 'RV', 'RW']

R_0, R_1, R_nan = np.zeros(len(rings)), np.zeros(len(rings)), np.zeros(len(rings))
for _n, k in enumerate(rings):

    R_0 = df_new[k][df_new[k] == 0].size
    R_1 = df_new[k][df_new[k] == 1].size
    R_nan = df_new[k][df_new[k].isnull()].size
    R_N = df_new[k].size

    R_0_p = round(R_0/R_N*100, 2)
    R_1_p = round(R_1/R_N*100, 2)
    R_nan_p = round(R_nan/R_N*100, 2)

    df_stats.loc[len(df_stats)] = [k, R_0, R_1, R_nan, R_N, R_0_p, R_1_p, R_nan_p]

df_stats

### Load LXX maintenance log

In [None]:
lxx = read_pickle(config['path_to_data']+f"performance_lxx_{config['tbeg'].date}_{config['tend'].date}.pkl")
lxx_t1 = lxx['t1']
lxx_t2 = lxx['t2']

### Load MLTI times

In [None]:
mltiU = read_pickle(config['path_to_data']+f"performance_mltiU_{config['tbeg'].date}_{config['tend'].date}.pkl")

mltiV = read_pickle(config['path_to_data']+f"performance_mltiV_{config['tbeg'].date}_{config['tend'].date}.pkl")

mltiZ = read_pickle(config['path_to_data']+f"performance_mltiZ_{config['tbeg'].date}_{config['tend'].date}.pkl")


#### Compute MLTI sequences

The mltistats can be computed here or load, which is faster.

In [None]:

mltistats = {}

print(" U ...")
mltistats["U"] = __get_mlti_statistics(mltiU, config['tbeg'], config['tend'],
                                       intervals=config['interval'], plot=False, ylog=False
                                      )

print(" V ...")
mltistats["V"] = __get_mlti_statistics(mltiV, config['tbeg'], config['tend'],
                                       intervals=config['interval'], plot=False, ylog=False
                                      )

print(" Z ...")
mltistats["Z"] = __get_mlti_statistics(mltiZ, config['tbeg'], config['tend'],
                                       intervals=config['interval'], plot=False, ylog=False
                                      )


In [None]:
mltistats = read_pickle(config['path_to_data']+f"performance_mltistasts_{config['tbeg'].date}_{config['tend'].date}.pkl")

### Plotting

In [None]:
def __makeplot_performance(data, ylog=False, sec=False):

    from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)

    tscale = 1/86400

    font = 12

    fig, ax = plt.subplots(1, 3, figsize=(18, 5))

    plt.subplots_adjust(wspace=0.2)

    colors = {"Z":"tab:blue", "U":"tab:green", "V":"tab:red"}

    s_max = []
    for r in data.keys():
        s_max.append(max(data[r]["cumsumsec"]))
    s_max = max(s_max)

    if s_max > 3600:
        sscale, sunit = 1/60, "min"
    elif s_max > 86400:
        sscale, sunit = 1/3600, "hour"
    else:
        sscale, sunit = 1, "s"

    _datainter, _cols, _lbls = [], [], []

    offset = [-0.3, 0, 0.3]
    for _i, r in enumerate(data.keys()):

        n = data[r]["cumsum"][-1].astype(int)

        if sec:
            ax[0].plot(data[r]["tsec"]*tscale, data[r]["cumsumsec"]*sscale, label=f"R{r} (N={n})", color=colors[r], zorder=3)
            ax[0].set_ylim(0, s_max*sscale)
            ax[0].set_ylabel(f"MLTI time ({sunit})", fontsize=font)
        else:
            ax[0].plot(data[r]["tsec"]*tscale, data[r]["cumsump"], label=f"R{r} (N={n})", color=colors[r], zorder=3)
            ax[0].set_ylim(0, 102)
            ax[0].set_ylabel("MLTI Count (%)", fontsize=font)

        imax = round(max(data[r]["intersec"]/3600), 1)
        h, b = np.histogram(data[r]["intersec"]/3600, bins=int(24/1), range=(0, 24), density=True)
        bins = b[:-1] + b[1]/2 + offset[_i]

        ax[1].bar(bins, h*np.nan, width=0.3, color=colors[r], label=f"R{r} (max={imax} hours)",
                  edgecolor="k", linewidth=1, alpha=0.5)

        _datainter.append(data[r]["intersec"]/3600)
        _cols.append(colors[r])
        _lbls.append(f"R{r} (max={imax} hours)")

        # datainter = data[r]["intersec"][data[r]["intersec"] > 3600] / 3600
        # sns.kdeplot(datainter, ax=ax[1], color=colors[r], log_scale=True)

    sns.histplot(_datainter, ax=ax[1], palette=_cols, log_scale=True,
                 kde=True, fill=True, multiple='dodge')
    # sns.histplot(_datainter, ax=ax[1], palette=_cols, log_scale=False,
    #              kde=True, binwidth=5, binrange=(0.1, 96), fill=True, multiple='dodge')

    # if ylog:
        # ax[1].set_yscale("log")

    ax[0].set_xlim(0-0.5, max(data[r]["tsec"]*tscale)+0.5)
    ax[1].set_xlim(left=1/60)

    ax[0].legend(loc=4)
    ax[1].legend(loc=1)

    ax[0].grid(color="grey", ls="--", alpha=0.4)
    ax[1].grid(color="grey", ls="--", alpha=0.4)

    ax[0].set_xlabel(f"Time (days) from {config['tbeg'].date}", fontsize=12)

    ax[1].set_xlabel("Inter-MLTI-Time (hours)", fontsize=font)
    ax[1].set_ylabel("MLTI Count", fontsize=font)

    majors = len(ax[0].get_xticks())
    ax[0].xaxis.set_major_locator(MultipleLocator(10))
    ax[0].xaxis.set_minor_locator(MultipleLocator(5))
    # ax[1].xaxis.set_major_locator(MultipleLocator(4))
    # ax[1].xaxis.set_minor_locator(MultipleLocator(1))

    barWidth = 0.85
    r = range(df_stats.ring.size)

    ax[2].bar(r, df_stats.R_0_p,
            color='darkred', edgecolor='k', width=barWidth, label="Bad Quality")

    ax[2].bar(r, df_stats.R_1_p, bottom=df_stats.R_0_p,
            color='green', edgecolor='k', width=barWidth, label="Good Quality")

    ax[2].bar(r, df_stats.R_nan_p, bottom=[i+j for i,j in zip(df_stats.R_0_p, df_stats.R_1_p)],
            color='white', edgecolor='k', width=barWidth, label="N/A")

    ax[2].set_xticks(r, df_stats.ring, fontsize=font)

    ax[2].legend(loc='upper left', bbox_to_anchor=(-0.0, 1.1), ncol=3)

    # ax[2].set_title(f"Quality Statistics [ {config['tbeg'].date} - {config['tend'].date} ]", pad=35)

    vals = ax[2].get_yticks()
    ax[2].set_yticklabels([f'{int(x)}%' for x in vals], fontsize=font-2)

    ax[2].spines['top'].set_visible(False)
    ax[2].spines['right'].set_visible(False)

    for c in ax[2].containers:

        # Optional: if the segment is small or 0, customize the labels
        labels = [round(v.get_height(), 2) if v.get_height() > 0 else '' for v in c]

        # pads = [5 if v.get_height() > 10 else 0 for v  in c]

        # remove the labels parameter if it's not needed for customized labels
        plt.bar_label(c, labels=labels, label_type='center', padding=0, alpha=0.8)


    for _k, ll in enumerate(['(a)', '(b)', '(c)']):
        ax[_k].text(-.08, 1.07, ll, ha='left', va='top', transform=ax[_k].transAxes, fontsize=14)

    # add maintenance times
    for lx1, lx2 in zip(lxx_t1, lxx_t2):
        lx1_sec = (lx1-UTCDateTime(config['tbeg']))*tscale
        lx2_sec = (lx2-UTCDateTime(config['tbeg']))*tscale
        ax[0].fill_betweenx([-10000, 10000], lx1_sec, lx2_sec, color="yellow", alpha=0.7)


    plt.show();
    return fig

In [None]:
fig = __makeplot_performance(mltistats, ylog=True, sec=False)

In [None]:
if config['interval']:
    fig.savefig(config['path_to_figures']+f"QS_Performance_interval_{config['tbeg'].date}_{config['tend'].date}.png", format="png", dpi=150, bbox_inches='tight')
else:
    fig.savefig(config['path_to_figures']+f"QS_Performance_{config['tbeg'].date}_{config['tend'].date}.png", format="png", dpi=150, bbox_inches='tight')

del fig


In [None]:
def __makeplot_performance2(data, ylog=False, sec=False):

    from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)

    tscale = 1/86400

    font = 12

    fig, ax = plt.subplots(3, 1, figsize=(5, 18))

    plt.subplots_adjust(wspace=0.2)

    colors = {"Z":"tab:blue", "U":"tab:green", "V":"tab:red"}

    s_max = []
    for r in data.keys():
        s_max.append(max(data[r]["cumsumsec"]))
    s_max = max(s_max)

    if s_max > 3600:
        sscale, sunit = 1/60, "min"
    elif s_max > 86400:
        sscale, sunit = 1/3600, "hour"
    else:
        sscale, sunit = 1, "s"

    _datainter, _cols, _lbls = [], [], []

    offset = [-0.3, 0, 0.3]
    for _i, r in enumerate(data.keys()):

        n = data[r]["cumsum"][-1].astype(int)

        if sec:
            ax[0].plot(data[r]["tsec"]*tscale, data[r]["cumsumsec"]*sscale, label=f"R{r} (N={n})", color=colors[r], zorder=3)
            ax[0].set_ylim(0, s_max*sscale)
            ax[0].set_ylabel(f"MLTI time ({sunit})", fontsize=font)
        else:
            ax[0].plot(data[r]["tsec"]*tscale, data[r]["cumsump"], label=f"R{r} (N={n})", color=colors[r], zorder=3)
            ax[0].set_ylim(0, 102)
            ax[0].set_ylabel("MLTI Count (%)", fontsize=font)

        imax = round(max(data[r]["intersec"]/3600), 1)
        h, b = np.histogram(data[r]["intersec"]/3600, bins=int(24/1), range=(0, 24), density=True)
        bins = b[:-1] + b[1]/2 + offset[_i]

        ax[1].bar(bins, h*np.nan, width=0.3, color=colors[r], label=f"R{r} (max={imax} hours)",
                  edgecolor="k", linewidth=1, alpha=0.5)

        _datainter.append(data[r]["intersec"]/3600)
        _cols.append(colors[r])
        _lbls.append(f"R{r} (max={imax} hours)")

        # datainter = data[r]["intersec"][data[r]["intersec"] > 3600] / 3600
        # sns.kdeplot(datainter, ax=ax[1], color=colors[r], log_scale=True)

    sns.histplot(_datainter, ax=ax[1], palette=_cols, log_scale=True,
                 kde=True, fill=True, multiple='dodge')
    # sns.histplot(_datainter, ax=ax[1], palette=_cols, log_scale=False,
    #              kde=True, binwidth=5, binrange=(0.1, 96), fill=True, multiple='dodge')

    # if ylog:
        # ax[1].set_yscale("log")

    ax[0].set_xlim(0-0.5, max(data[r]["tsec"]*tscale)+0.5)
    ax[1].set_xlim(left=1/60)

    ax[0].legend(loc=4)
    ax[1].legend(loc=1)

    ax[0].grid(color="grey", ls="--", alpha=0.4)
    ax[1].grid(color="grey", ls="--", alpha=0.4)

    ax[0].set_xlabel(f"Time (days) from {config['tbeg'].date}", fontsize=12)

    ax[1].set_xlabel("Inter-MLTI-Time (hours)", fontsize=font)
    ax[1].set_ylabel("MLTI Count", fontsize=font)

    majors = len(ax[0].get_xticks())
    ax[0].xaxis.set_major_locator(MultipleLocator(10))
    ax[0].xaxis.set_minor_locator(MultipleLocator(5))
    # ax[1].xaxis.set_major_locator(MultipleLocator(4))
    # ax[1].xaxis.set_minor_locator(MultipleLocator(1))

    barWidth = 0.85
    r = range(df_stats.ring.size)

    ax[2].bar(r, df_stats.R_1_p,
            color='green', edgecolor='k', width=barWidth, label="Good Quality")

    ax[2].bar(r, df_stats.R_0_p, bottom=df_stats.R_1_p,
            color='darkred', edgecolor='k', width=barWidth, label="Bad Quality")

    ax[2].bar(r, df_stats.R_nan_p, bottom=[i+j for i,j in zip(df_stats.R_0_p, df_stats.R_1_p)],
            color='white', edgecolor='k', width=barWidth, label="Downtime")

    ax[2].set_xticks(r, df_stats.ring, fontsize=font)

    ax[2].legend(loc='upper left', bbox_to_anchor=(-0.05, -0.05), ncol=3)

    # ax[2].set_title(f"Quality Statistics [ {config['tbeg'].date} - {config['tend'].date} ]", pad=35)

    vals = ax[2].get_yticks()
    ax[2].set_yticklabels([f'{int(x)}%' for x in vals], fontsize=font-2)

    ax[2].spines['top'].set_visible(False)
    ax[2].spines['right'].set_visible(False)

    for c in ax[2].containers:

        # Optional: if the segment is small or 0, customize the labels
        labels = [round(v.get_height(), 2) if v.get_height() > 0 else '' for v in c]

        # pads = [5 if v.get_height() > 10 else 0 for v  in c]

        # remove the labels parameter if it's not needed for customized labels
        plt.bar_label(c, labels=labels, label_type='center', padding=0, alpha=0.8)

    for _k, ll in enumerate(['(a)', '(b)', '(c)']):
        ax[_k].text(-.08, 1.07, ll, ha='left', va='top', transform=ax[_k].transAxes, fontsize=14)

    # add maintenance times
    for lx1, lx2 in zip(lxx_t1, lxx_t2):
        lx1_sec = (lx1-UTCDateTime(config['tbeg']))*tscale
        lx2_sec = (lx2-UTCDateTime(config['tbeg']))*tscale
        ax[0].fill_betweenx([-10000, 10000], lx1_sec, lx2_sec, color="yellow", alpha=0.7)

    plt.show();
    return fig

In [None]:
fig = __makeplot_performance2(mltistats, ylog=True, sec=False)

In [None]:
if config['interval']:
    fig.savefig(config['path_to_figures']+f"QS_Performance_interval_{config['tbeg'].date}_{config['tend'].date}_2.png", format="png", dpi=150, bbox_inches='tight')
else:
    fig.savefig(config['path_to_figures']+f"QS_Performance_{config['tbeg'].date}_{config['tend'].date}_2.png", format="png", dpi=150, bbox_inches='tight')

del fig

## Other Plots

In [None]:
from functions.smoothing import __smooth

mltistats["U"]["mlti_series_avg"] = __smooth(mltistats["U"]["mlti_series"], 86400)
mltistats["V"]["mlti_series_avg"] = __smooth(mltistats["V"]["mlti_series"], 86400)
mltistats["Z"]["mlti_series_avg"] = __smooth(mltistats["Z"]["mlti_series"], 86400)

In [None]:
tscale = 1/86400

plt.plot(mltistats["U"]["tsec"]*tscale, mltistats["U"]["mlti_series_avg"], alpha=0.5)
plt.plot(mltistats["V"]["tsec"]*tscale, mltistats["V"]["mlti_series_avg"], alpha=0.5)
plt.plot(mltistats["Z"]["tsec"]*tscale, mltistats["Z"]["mlti_series_avg"], alpha=0.5)

plt.fill_between(mltistats["U"]["tsec"]*tscale, mltistats["U"]["mlti_series_avg"], alpha=0.3)
plt.fill_between(mltistats["V"]["tsec"]*tscale, mltistats["V"]["mlti_series_avg"], alpha=0.3)
plt.fill_between(mltistats["Z"]["tsec"]*tscale, mltistats["Z"]["mlti_series_avg"], alpha=0.3)