In [None]:
from ddnn.nn import *
from ddnn.validation import *
from ddnn.data import *

estimator = Estimator(
    net=NeuralNetwork(
        [
            LinearLayer((9, 32)),
            ActivationFunction("tanh"),
            LinearLayer((32, 32)),
            ActivationFunction("tanh"),
            LinearLayer((32, 2)),
        ]
    ),
    loss=LossFunction("MSE"),
    # optimizer=Optimizer("SGD", learning_rate=0.5, momentum_coefficient=0.5, l2_coefficient=0),
    optimizer=Optimizer("Adam", learning_rate=0.01, l2_coefficient=0.0033),
    batchsize=-1,
    initializer=Initializer("glorot_uniform"),
    seed=123,
)
early_stopping = 2.557191205025858
epochs = 1500
# dataset_type = ("monk", 2)
dataset_type = "ML_cup"
log_every = 1
losses = ["MSE", "MEE"]

In [None]:
if isinstance(dataset_type, tuple):
    traindata = read_monks(dataset_type[1], "train")
    traindata = onehot_encoding(data=traindata)

    testdata = read_monks(dataset_type[1], "test")
    testdata = onehot_encoding(data=testdata)
else:
    traindata = read_ML_cup("train")
    traindata, testdata = train_valid_split(traindata, seed=123)
    blindtest = read_ML_cup("test")

In [None]:
traindata.shape, testdata.shape

In [None]:
testlogger = Logger(
    estimator,
    losses=losses,
    training_set=traindata,
    validation_set=testdata,
    every=log_every,
)
if early_stopping is not None:
    teststopper = TrainingThresholdStopping(estimator, early_stopping)

    def callback(record):
        testlogger(record)
        teststopper(record)

else:

    def callback(record):
        testlogger(record)

In [None]:
estimator.train(traindata, callback=callback, n_epochs=epochs)

In [None]:
res = estimator.evaluate(losses=losses, dataset=traindata)
res

In [None]:
res = estimator.evaluate(losses=losses, dataset=testdata)
res

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from ipywidgets import interact
import numpy as np

In [None]:
%matplotlib ipympl

# 1 plot with train and valid

fig1, ax1 = plt.subplots()


@interact(
    loss=testlogger._losses,
)
def plot_results(loss):
    fig1.tight_layout()
    ax1.clear()
    ax1.set_xlabel("epochs")
    ax1.set_ylabel(loss)
    for where, style in zip(["train", "valid"], [None, "dotted"]):
        y = testlogger._scores[0]["folds"][0][where][loss]
        if loss == "binary_accuracy":
            # todo fix to show last not best
            best = max(y)
            form = "{:.2}"
            logplot = False
        else:
            best = min(y)
            form = "{:.2E}"
            logplot = True
        # scale to resemble number of epochs instead of plot points
        ticks_x = ticker.FuncFormatter(
            lambda x, pos: "{0:g}".format(x * testlogger._every)
        )
        ax1.xaxis.set_major_formatter(ticks_x)
        if logplot:
            ax1.set_yscale("log")
        else:
            ax1.set_yscale("linear")
        if where == "valid":
            where = "test"
        ax1.plot(
            y, label=f"{where}: {form.format(best)}", linestyle=style, color="black"
        )
        ax1.legend()

In [None]:
SAMPLE_SIZE = 50
sampleids = np.random.choice(
    np.arange(testdata.data.shape[0]), size=SAMPLE_SIZE, replace=False
)
sampledata = testdata.data[sampleids]
samplepred = estimator.predict(sampledata)
samplelabels = testdata.labels[sampleids]

fig, ax = plt.subplots()
ax.set_xlabel("x")
ax.set_ylabel("y")

for i in range(samplelabels.shape[0]):
    ax.plot(
        [samplelabels[i, 0], samplepred[i, 0]],
        [samplelabels[i, 1], samplepred[i, 1]],
        marker="o",
    )

In [None]:
fig, ax = plt.subplots()

preds = estimator.predict(testdata)
labels = testdata.labels
distances = np.linalg.norm(preds - labels, axis=1, ord=2)

ax.set_xlabel("distance (Euclidean)")
ax.set_ylabel("count")
ax.hist(distances, bins=50, color="black");

In [None]:
fig, ax = plt.subplots()

label = traindata.labels
pred = estimator.predict(traindata)

ax.set_xlim(0, 30)
ax.set_ylim(-35, -13)
ax.set_xlabel("x")
ax.set_ylabel("y")

ax.scatter(label[:, 0], label[:, 1], color="black", s=6, marker=".")
ax.scatter(pred[:, 0], pred[:, 1], color=(0.7, 0.7, 0.7), s=6, marker=".");

In [None]:
fig, ax = plt.subplots()

pred = estimator.predict(blindtest)

ax.set_xlim(0, 30)
ax.set_ylim(-35, -13)
ax.set_xlabel("x")
ax.set_ylabel("y")

ax.scatter(pred[:, 0], pred[:, 1], color=(0.7, 0.7, 0.7), s=6, marker=".");

In [None]:
import numpy as np
import sys

fixed_rng = np.random.default_rng(123)

n_tries = 30
train_loss_avg = {"MSE": 0, "MEE": 0}
test_loss_avg = {"MSE": 0, "MEE": 0}
teststopper = TrainingThresholdStopping(estimator, early_stopping)
for i in range(n_tries):
    estimator.update_params(seed=fixed_rng.integers(0, sys.maxsize))
    estimator.train(traindata, callback=teststopper, n_epochs=epochs)
    train_res = estimator.evaluate(losses=losses, dataset=traindata)
    test_res = estimator.evaluate(losses=losses, dataset=testdata)
    print(f"{i} / {n_tries}: {train_res}, {test_res}")
    for loss in losses:
        train_loss_avg[loss] += train_res[loss]
        test_loss_avg[loss] += test_res[loss]

for loss in losses:
    train_loss_avg[loss] = train_loss_avg[loss] / n_tries
    test_loss_avg[loss] = test_loss_avg[loss] / n_tries

In [None]:
print(train_loss_avg, test_loss_avg)