# Inter-channel interference (ICI) estimation using *counting vectors* in a 32 GBd system. 

## Initialization

### Google Colab checking

In [None]:
if "google.colab" in str(get_ipython()):
    from google.colab import drive

    GOOGLE_COLAB = True
    ROOT = "/content/drive/MyDrive/SOFA/ICI_Project"

    drive.mount("/content/drive")
    !cp {ROOT}"/sofa.py" "."
    !cp {ROOT}"/counting_vectors_32f.csv" "."
    !cp {ROOT}"/counting_vectors_32g.csv" "."
else:
    GOOGLE_COLAB = False
    ROOT = "."

### Libraries

In [None]:
import json
import os
from collections import defaultdict
from itertools import product

import matplotlib.pyplot as plt
import numpy as np
import polars as pl
import scipy as sp
import sofa
import tensorflow as tf
import tensorflow.keras as ker
from matplotlib import cm
from matplotlib.colors import LogNorm
from mpl_toolkits.mplot3d import Axes3D
from scipy.stats import multivariate_normal
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.mixture import GaussianMixture
from sklearn.model_selection import KFold, cross_validate, train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import Sequential, models, regularizers, utils
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dense

In [None]:
GPU = tf.config.list_physical_devices("GPU")
if len(GPU) > 0:
    print(f"Using GPU: {GPU}")

### Globals

In [None]:
def calc_once(varname, fn, args):
    """Calculate a variable only once."""
    if varname not in globals():
        return fn(**args)
    return eval(varname)


def estimation_model(
    layers_props_lst: list, loss_fn: ker.losses.Loss, input_dim: int
) -> ker.models.Sequential:
    """Compile a sequential model for regression purposes."""
    model = ker.Sequential()
    # Hidden layers
    for i, layer_props in enumerate(layers_props_lst):
        if i == 0:
            model.add(ker.layers.Dense(input_dim=input_dim, **layer_props))
        else:
            model.add(ker.layers.Dense(**layer_props))
    # Regressor
    model.add(ker.layers.Dense(units=1, activation="linear"))

    model.compile(loss=loss_fn, optimizer="adam")

    return model


def estimation_crossvalidation(
    X, y, X_prod, y_prod, n_splits, layer_props, loss_fn, callbacks
):
    """Crossvalidation of an estimation network."""
    # Scores dict
    scores = {}
    scores["model"] = []
    scores["loss"] = []
    scores["mae"] = {"train": [], "test": [], "prod": []}
    scores["r2"] = {"train": [], "test": [], "prod": []}
    scores["rmse"] = {"train": [], "test": [], "prod": []}

    # K-fold crossvalidation
    kf = KFold(n_splits=n_splits, shuffle=True)

    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Input variables standarizer
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test_kf = sc.transform(X_test)
        X_prod_kf = sc.transform(X_prod)

        model = estimation_model(layer_props, loss_fn, X_train.shape[1])

        # Save test scalar loss
        if callbacks:
            loss = model.fit(
                X_train,
                y_train,
                epochs=5000,
                batch_size=64,
                callbacks=callbacks,
                verbose=0,
            )
        else:
            loss = model.fit(X_train, y_train, epochs=5000, batch_size=64, verbose=0)
        print(f"Needed iterations: {len(loss.history['loss'])}")
        loss = loss.history["loss"]

        # Predict using train values
        predictions_train = model.predict(X_train, verbose=0).flatten()
        # Predict using test values
        predictions_test = model.predict(X_test_kf, verbose=0).flatten()
        # Predict using production values
        predictions_prod = model.predict(X_prod_kf, verbose=0).flatten()

        # Dataframe for better visualization
        train_data_train = pl.DataFrame(
            {"ICI": y_train, "Predicted ICI": predictions_train}
        )
        train_data_test = pl.DataFrame(
            {"ICI": y_test, "Predicted ICI": predictions_test}
        )
        train_data_prod = pl.DataFrame(
            {"ICI": y_prod, "Predicted ICI": predictions_prod}
        )

        # MAE
        mae_score_train = mean_absolute_error(
            train_data_train["ICI"], train_data_train["Predicted ICI"]
        )
        mae_score_test = mean_absolute_error(
            train_data_test["ICI"], train_data_test["Predicted ICI"]
        )
        mae_score_prod = mean_absolute_error(
            train_data_prod["ICI"], train_data_prod["Predicted ICI"]
        )

        # R²
        r2_score_train = r2_score(
            train_data_train["ICI"], train_data_train["Predicted ICI"]
        )
        r2_score_test = r2_score(
            train_data_test["ICI"], train_data_test["Predicted ICI"]
        )
        r2_score_prod = r2_score(
            train_data_prod["ICI"], train_data_prod["Predicted ICI"]
        )

        # RMSE
        rmse_score_train = mean_squared_error(
            train_data_train["ICI"], train_data_train["Predicted ICI"], squared=False
        )
        rmse_score_test = mean_squared_error(
            train_data_test["ICI"], train_data_test["Predicted ICI"], squared=False
        )
        rmse_score_prod = mean_squared_error(
            train_data_prod["ICI"], train_data_prod["Predicted ICI"], squared=False
        )

        # Append to lists
        scores["model"].append(model)
        scores["loss"].append(loss)
        scores["mae"]["train"].append(mae_score_train)
        scores["mae"]["test"].append(mae_score_test)
        scores["mae"]["prod"].append(mae_score_prod)
        scores["r2"]["train"].append(r2_score_train)
        scores["r2"]["test"].append(r2_score_test)
        scores["r2"]["prod"].append(r2_score_prod)
        scores["rmse"]["train"].append(rmse_score_train)
        scores["rmse"]["test"].append(rmse_score_test)
        scores["rmse"]["prod"].append(rmse_score_prod)

    return scores


def test_estimation_model(
    data,
    data_prod,
    n_splits,
    max_neurons,
    activations,
    use_osnr=True,
    loss_fn="mean_absolute_error",
):
    """Test a spectral spacing estimation model with given parameters."""
    n_feat = data.shape[1]
    var_n = n_feat - 1 if use_osnr else n_feat - 2

    # Split variables
    # Features
    X = np.array(data[:, 0:var_n])
    X_prod = np.array(data_prod[:, 0:var_n])
    # Tags
    y = np.array(data[:, -1])
    y_prod = np.array(data_prod[:, -1])

    # Layer properties
    layer_props = [
        {"units": max_neurons // (2**i), "activation": activation}
        for i, activation in enumerate(activations)
    ]
    print(f"{layer_props}{' + OSNR' if use_osnr else ''}")
    callbacks = [
        EarlyStopping(
            monitor="loss", patience=30, mode="min", restore_best_weights=True
        )
    ]

    return estimation_crossvalidation(
        X, y, X_prod, y_prod, n_splits, layer_props, loss_fn, callbacks
    )


def plot_results(x_values, scores, xlabel, log=False, intx=False):
    plt.figure(figsize=(8, 6), layout="constrained")
    plt.scatter(x_values, scores)
    plt.plot(x_values, scores)
    plt.xlabel(xlabel)
    plt.ylabel("MAE")
    if log:
        plt.xscale("log", base=2)
    if intx:
        plt.xticks(x_values)

    plt.grid(True)
    plt.show()

## FCM

### Load data

In [None]:
if GOOGLE_COLAB:
    cv32f_file = "counting_vectors_32f.csv"
else:
    cv32f_file = "../../Databases/32GBd/counting_vectors_32f.csv"
    # cv32f_file = "../../Databases/32GBd/CaractFCM.csv"

cv32f = pl.read_csv(
    cv32f_file, has_header=False, dtypes=[pl.Float64], infer_schema_length=10000
).cast(pl.Float64, strict=False)
# Show the original dataframe
cv32f

In [None]:
# Back-to-back (B2B)
cv32f_B2B = cv32f[0:987, :]
# 0dBm optical fiber
cv32f_fiber0 = cv32f[988:1520, :]
# 9dBm optical fiber
cv32f_fiber9 = cv32f[1521:, :]

In [None]:
# Shuffle the dataframes
cv32f_B2B_shuffled = cv32f_B2B.sample(n=len(cv32f_B2B), shuffle=True, seed=1036681523)
cv32f_fiber0_shuffled = cv32f_fiber0.sample(
    n=len(cv32f_fiber0), shuffle=True, seed=1036681523
)
cv32f_fiber9_shuffled = cv32f_fiber9.sample(
    n=len(cv32f_fiber9), shuffle=True, seed=1036681523
)

In [None]:
# Extract 10% of the data to use later for "production" testing
cv32f_B2B_prod = cv32f_B2B_shuffled[: int(len(cv32f_B2B_shuffled) * 0.1)]
cv32f_fiber0_prod = cv32f_fiber0_shuffled[: int(len(cv32f_fiber0_shuffled) * 0.1)]
cv32f_fiber9_prod = cv32f_fiber9_shuffled[: int(len(cv32f_fiber9_shuffled) * 0.1)]

In [None]:
# Use the rest of the data for normal testing
cv32f_B2B_new = cv32f_B2B_shuffled[int(len(cv32f_B2B_shuffled) * 0.1) :]
cv32f_fiber0_new = cv32f_fiber0_shuffled[int(len(cv32f_fiber0_shuffled) * 0.1) :]
cv32f_fiber9_new = cv32f_fiber9_shuffled[int(len(cv32f_fiber9_shuffled) * 0.1) :]

### Hyperparameters evaluation

The following hyperparameters are going to be combined and evaluated: 
- Maximum number of neurons in the first layer (8, 16, 32, 64, 128, 256, 512, 1024).
- Using or not the OSNR value as an additional feature.

Results will have the following structure:
```
{"scenario": {"n_neurons": {"osnr": results}}}
```
Where `scenario` is the experimental scenario (B2B, fiber at 0 dBm or fiber at 9 dBm), `n_neurons` will be the maximum number of neurons in the model (corresponding to the first layer), `osnr` will be a string telling if that model used OSNR as input or not (`"osnr"` or `wo_osnr`).
Finally the results will store the loss history, the serialized model in JSON format in a string and MAE, RMSE and R² values for training, test and production data.

In [None]:
osnr_lst = ["osnr", "wo_osnr"]
max_neurons = [str(2**n) for n in range(3, 11)]
activations = ["relu", "tanh", "sigmoid"]
scenarios = ["b2b", "fiber0", "fiber9"]
scenario_data = [
    (cv32f_B2B_new, cv32f_B2B_prod),
    (cv32f_fiber0_new, cv32f_fiber0_prod),
    (cv32f_fiber9_new, cv32f_fiber9_prod),
]

In [None]:
try:
    cv32f_reg_results = sofa.load_hdf5(f"{ROOT}/cv32f_reg_results.h5")
except:
    print("Error loading from file, creating a new dictionary")
    cv32f_reg_results = defaultdict(
        defaultdict(defaultdict(defaultdict().copy).copy).copy
    )

# Evaluate
for i, (data, prod) in enumerate(scenario_data):
    for neurons in max_neurons:
        for osnr in osnr_lst:
            args = {
                "data": data,
                "data_prod": prod,
                "n_splits": 5,
                "max_neurons": int(neurons),
                "activations": activations,
                "use_osnr": True if osnr == "osnr" else False,
            }
            scenario_name = scenarios[i]
            if cv32f_reg_results[scenario_name][neurons][osnr] == defaultdict():
                # Get results
                results = test_estimation_model(**args)
                # Serialize model
                results["model"] = [
                    utils.serialize_keras_object(model) for model in results["model"]
                ]
                # Save serialized model for serialization
                cv32f_reg_results[scenario_name][neurons][osnr] = results
                # Save results with serialized model
                print("Saving results...")
                sofa.save_hdf5(cv32f_reg_results, f"{ROOT}/cv32f_reg_results.h5")
                print("Results saved!")

### Results

In [None]:
def get_avg_score(results, target_value, target="neurons", metric="mae", score="test"):
    mae_lst = []
    for scenario in scenarios:
        if target == "scenario" and scenario != target_value:
            continue
        for neurons in max_neurons:
            if target == "neurons" and neurons != target_value:
                continue
            for osnr in osnr_lst:
                if target == "osnr" and osnr != target_value:
                    continue
                act_fn_name = "".join([s[0] for s in activations])
                mae_lst.append(
                    np.mean(results[act_fn_name][neurons][osnr]["mae"]["test"])
                )
    return mae_lst

In [None]:
cv32f_neurons_avg_results = [
    np.mean(
        get_avg_score(
            cv32f_reg_results, neurons, target="neurons", metric="mae", score="test"
        )
    )
    for neurons in max_neurons
]
x = list(map(int, max_neurons))
plot_results(x, cv32f_neurons_avg_results, "Maximum number of neurons", log=True)

In [None]:
cv32f_scenario_avg_results = [np.mean(
    get_avg_score(
        cv32f_reg_results,
        target_value=scenario,
        target="scenario",
        metric="mae",
        score="test",
    )
) for scenario in scenarios]

print("B2B     Fiber at 0 dBm     Fiber at 9 dBm")
print(f"{cv32f_scenario_avg_results[0]:.4f}     {cv32f_scenario_avg_results[1]:.4f}     {cv32f_scenario_avg_results[2]:.4f}")

In [None]:
cv32f_osnr_avg_results = [
    np.mean(
        get_avg_score(
            cv32f_reg_results, osnr, target="osnr", metric="mae", score="test"
        )
    )
    for osnr in ["osnr", "wo_osnr"]
]
print(f"With OSNR  Without OSNR")
print(f"{cv32f_osnr_avg_results[0]:.3f}       {cv32f_osnr_avg_results[1]:.3f}")

### Sort models by score

In [None]:
# Find better model by test score
def get_better_models(results, metric="mae", score="test"):
    scores = []
    for scenario in scenarios:
        for neurons in max_neurons:
            for osnr in osnr_lst:
                coll = results[scenario][neurons][osnr][metric][score]
                if isinstance(coll, defaultdict):
                    continue
                score_value = np.mean(coll)
                scores.append((score_value, [scenario, neurons, osnr]))
    scores.sort(key=lambda x: x[0])
    return pl.dataframe.DataFrame(scores)

In [None]:
cv32f_better_models_df = get_better_models(
    cv32f_reg_results, metric="mae", score="test"
)
cv32f_better_models_df.head(10)

## GKM

### Load data

In [None]:
if GOOGLE_COLAB:
    cv32g_file = "counting_vectors_32g.csv"
else:
    cv32g_file = "../../Databases/32GBd/counting_vectors_32g.csv"
    # cv32f_file = "../../Databases/32GBd/CaractFCM.csv"

cv32g = pl.read_csv(
    cv32g_file, has_header=False, dtypes=[pl.Float64], infer_schema_length=10000
).cast(pl.Float64, strict=False)
# Show the original dataframe
cv32g

In [None]:
# Back-to-back (B2B)
cv32g_B2B = cv32g[0:987, :]
# 0dBm optical fiber
cv32g_fiber0 = cv32g[988:1520, :]
# 9dBm optical fiber
cv32g_fiber9 = cv32g[1521:, :]

In [None]:
# Shuffle the dataframes
cv32g_B2B_shuffled = cv32g_B2B.sample(n=len(cv32g_B2B), shuffle=True, seed=1036681523)
cv32g_fiber0_shuffled = cv32g_fiber0.sample(
    n=len(cv32g_fiber0), shuffle=True, seed=1036681523
)
cv32g_fiber9_shuffled = cv32g_fiber9.sample(
    n=len(cv32g_fiber9), shuffle=True, seed=1036681523
)

In [None]:
# Extract 10% of the data to use later for "production" testing
cv32g_B2B_prod = cv32g_B2B_shuffled[: int(len(cv32g_B2B_shuffled) * 0.1)]
cv32g_fiber0_prod = cv32g_fiber0_shuffled[: int(len(cv32g_fiber0_shuffled) * 0.1)]
cv32g_fiber9_prod = cv32g_fiber9_shuffled[: int(len(cv32g_fiber9_shuffled) * 0.1)]

In [None]:
# Use the rest of the data for normal testing
cv32g_B2B_new = cv32g_B2B_shuffled[int(len(cv32g_B2B_shuffled) * 0.1) :]
cv32g_fiber0_new = cv32g_fiber0_shuffled[int(len(cv32g_fiber0_shuffled) * 0.1) :]
cv32g_fiber9_new = cv32g_fiber9_shuffled[int(len(cv32g_fiber9_shuffled) * 0.1) :]

### Hyperparameters evaluation

The following hyperparameters are going to be combined and evaluated: 
- Maximum number of neurons in the first layer (8, 16, 32, 64, 128, 256, 512, 1024).
- Using or not the OSNR value as an additional feature.

Results will have the following structure:
```
{"scenario": {"n_neurons": {"osnr": results}}}
```
Where `scenario` is the experimental scenario (B2B, fiber at 0 dBm or fiber at 9 dBm), `n_neurons` will be the maximum number of neurons in the model (corresponding to the first layer), `osnr` will be a string telling if that model used OSNR as input or not (`"osnr"` or `wo_osnr`).
Finally the results will store the loss history, the serialized model in JSON format in a string and MAE, RMSE and R² values for training, test and production data.

In [None]:
osnr_lst = ["osnr", "wo_osnr"]
max_neurons = [str(2**n) for n in range(3, 11)]
activations = ["relu", "tanh", "sigmoid"]
scenarios = ["b2b", "fiber0", "fiber9"]
scenario_data = [
    (cv32g_B2B_new, cv32g_B2B_prod),
    (cv32g_fiber0_new, cv32g_fiber0_prod),
    (cv32g_fiber9_new, cv32g_fiber9_prod),
]

In [None]:
try:
    cv32g_reg_results = sofa.load_hdf5(f"{ROOT}/cv32g_reg_results.h5")
except:
    print("Error loading from file, creating a new dictionary")
    cv32g_reg_results = defaultdict(
        defaultdict(defaultdict(defaultdict().copy).copy).copy
    )

# Evaluate
for i, (data, prod) in enumerate(scenario_data):
    for neurons in max_neurons:
        for osnr in osnr_lst:
            args = {
                "data": data,
                "data_prod": prod,
                "n_splits": 5,
                "max_neurons": int(neurons),
                "activations": activations,
                "use_osnr": True if osnr == "osnr" else False,
            }
            scenario_name = scenarios[i]
            if cv32g_reg_results[scenario_name][neurons][osnr] == defaultdict():
                # Get results
                results = test_estimation_model(**args)
                # Serialize model
                results["model"] = [
                    utils.serialize_keras_object(model) for model in results["model"]
                ]
                # Save serialized model for serialization
                cv32g_reg_results[scenario_name][neurons][osnr] = results
                # Save results with serialized model
                print("Saving results...")
                sofa.save_hdf5(cv32g_reg_results, f"{ROOT}/cv32g_reg_results.h5")
                print("Results saved!")

### Results

In [None]:
cv32g_neurons_avg_results = [
    np.mean(
        get_avg_score(
            cv32g_reg_results, neurons, target="neurons", metric="mae", score="test"
        )
    )
    for neurons in max_neurons
]
x = list(map(int, max_neurons))
plot_results(x, cv32g_neurons_avg_results, "Maximum number of neurons", log=True)

In [None]:
cv32g_scenario_avg_results = [np.mean(
    get_avg_score(
        cv32g_reg_results,
        target_value=scenario,
        target="scenario",
        metric="mae",
        score="test",
    )
) for scenario in scenarios]

print("B2B     Fiber at 0 dBm     Fiber at 9 dBm")
print(f"{cv32g_scenario_avg_results[0]:.4f}     {cv32g_scenario_avg_results[1]:.4f}     {cv32g_scenario_avg_results[2]:.4f}")

In [None]:
cv32g_osnr_avg_results = [
    np.mean(
        get_avg_score(
            cv32g_reg_results, osnr, target="osnr", metric="mae", score="test"
        )
    )
    for osnr in ["osnr", "wo_osnr"]
]
print(f"With OSNR  Without OSNR")
print(f"{cv32g_osnr_avg_results[0]:.3f}       {cv32g_osnr_avg_results[1]:.3f}")

### Sort models by score

In [None]:
cv32g_better_models_df = get_better_models(
    cv32g_reg_results, metric="mae", score="test"
)
cv32g_better_models_df.head(10)