In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
from scipy.optimize import curve_fit
# sns.set_theme()

In [None]:
# read data in from checkpoint files
data_sequences = []
for lf in os.listdir("checkpoints"):
    with open(f"checkpoints/{lf}/log.txt") as f:
        data = f.readlines()
        run_name = lf.rstrip("-0123456789")
        loss_sequence = np.array([float(l.split(',')[2].split(':')[1]) for l in data])
        data_sequences.append((run_name, loss_sequence))

In [None]:
RUN_LEN = len(data_sequences[0][1])
TAIL_LEN = RUN_LEN // 20

# get the section of loss from the last half of the final epoch
mean_final_losses = [(rn, np.mean(ls[-TAIL_LEN:])) for rn, ls in data_sequences]

min_loss = 100.0
max_loss = -1.0
for _, loss in mean_final_losses:
    min_loss = min(min_loss, loss)
    max_loss = max(max_loss, loss)

In [None]:
# Function to extract number of neurons and configuration name
def parse_name(name) -> tuple[int, str]:
    mtch = re.search(r'(\d+)n$', name)
    if mtch is None:
        print("Error! No match!")
        return 0, ""
    neurons = int(mtch.group(1))
    config = re.sub(r'-\d+n$', '', name)
    config = re.sub("optimiser-benchmark-", '', config)
    return neurons, config.strip()

In [None]:
# Parse data
tagged_data = [(parse_name(name), loss) for name, loss in mean_final_losses]

# Group data by number of neurons
grouped_data: dict[int, list[tuple[str, np.float64]]] = {}
for (neurons, config), loss in tagged_data:
    if neurons not in grouped_data:
        grouped_data[neurons] = [] # type: ignore
    grouped_data[neurons].append((config, loss))

# Sort all the subgroups.
for k in grouped_data:
    grouped_data[k].sort(key = lambda t: t[0])

In [None]:
# Prepare plot data
neuron_counts = sorted(grouped_data.keys())
x = np.arange(len(neuron_counts))
n_configs = len(grouped_data[neuron_counts[0]])
width = 0.2 * 4 / n_configs
# Populate a colours map
config_colors = {}
palette = sns.color_palette()
for i, k in enumerate(grouped_data[1]):
    config_colors[k[0]] = palette[i]

In [None]:
# Create plot
fig, ax = plt.subplots(figsize=(12, 6))

for i, neuron_count in enumerate(neuron_counts):
    for j, (config, loss) in enumerate(grouped_data[neuron_count]):
        color = config_colors[config]
        label = config if i == 0 else ""
        ax.bar(x[i] + j*width, loss, width, label=label, color=color)

# Customize plot
ax.set_yscale("log", base = 2)
ax.set_ylabel('Loss')
ax.set_xlabel('Number of Neurons')
ax.set_title('Neural Network Configurations: Loss Comparison')
ax.set_xticks(x + width * (n_configs - 1) / 2)
ax.set_xticklabels(neuron_counts)
y_ticks = np.logspace(np.log2(min_loss), np.log2(max_loss), num = 15, base = 2.0)
ax.set_yticks(y_ticks)
ax.set_yticklabels(np.round(y_ticks, 4))
ax.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left')
ax.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()

In [None]:
# Create new dataset for plotting linearly.
configuration_series: dict[str, list[tuple[int, np.float64]]] = {}
for config, _ in grouped_data[1]:
    xs = []
    for _, neuron_count in enumerate(neuron_counts):
        for _, (inner_config, loss) in enumerate(grouped_data[neuron_count]):
            if config == inner_config:
                xs.append((neuron_count, loss))
    configuration_series[config] = xs

In [None]:
# Create plot
fig, ax = plt.subplots(figsize=(12, 6))

min_loss = 100.0
max_loss = -1.0
for config, seq in configuration_series.items():
    x, y = zip(*seq)
    min_loss = min(min_loss, np.min(y))
    max_loss = max(max_loss, np.max(y))
    ax.plot(x, y, label = config, marker = '+')

# Customize plot
ax.set_xscale("log", base = 2)
ax.set_yscale("log", base = 2)
ax.set_ylabel('Loss')
ax.set_xlabel('Number of Neurons')
ax.set_title('Neural Network Configurations: Loss Comparison')
ax.set_xticks([2 ** x for x in range(0, 14)])
ax.set_xticklabels([2 ** x for x in range(0, 14)])
ax.set_xlim(0.5, 8192)
y_ticks = np.logspace(np.log2(min_loss), np.log2(max_loss), num = 15, base = 2.0)
ax.set_yticks(y_ticks)
ax.set_yticklabels(np.round(y_ticks, 4))
ax.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
# Create plot
fig, ax = plt.subplots(figsize=(12, 8))

SKIP = 4

min_loss = 100.0
max_loss = -1.0
for config, seq in configuration_series.items():
    x, y = zip(*seq[SKIP:])
    min_loss = min(min_loss, np.min(y))
    max_loss = max(max_loss, np.max(y))
    ax.plot(x, y, label = config, marker = '+')

# Customize plot
ax.set_xscale("log", base = 2)
ax.set_yscale("log", base = 2)
ax.set_ylabel('Loss')
ax.set_xlabel('Number of Neurons')
ax.set_title('Neural Network Configurations: Loss Comparison')
x_ticks = [2 ** x for x in range(SKIP, 14)]
ax.set_xticks(x_ticks)
ax.set_xticklabels(x_ticks)
ax.set_xlim(x_ticks[0] / 2.0, 8192)
y_ticks = np.logspace(np.log2(min_loss), np.log2(max_loss), num = 15, base = 2.0)
ax.set_yticks(y_ticks)
ax.set_yticklabels(np.round(y_ticks, 4))
ax.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
# Create plot
fig, ax = plt.subplots(figsize=(12, 8))

SKIP = 7

min_loss = 100.0
max_loss = -1.0
desired_configs = [
    "screlu", "screlu-lineardecay", "screlu-warmup256", "screlu-warmupcosinedecay"
]
for config, seq in configuration_series.items():
    if config not in desired_configs:
        continue
    x, y = zip(*seq[SKIP:])
    min_loss = min(min_loss, np.min(y))
    max_loss = max(max_loss, np.max(y))
    ax.plot(x, y, label = config, marker = '+')

# Customize plot
ax.set_xscale("log", base = 2)
ax.set_yscale("log", base = 2)
ax.set_ylabel('Loss')
ax.set_xlabel('Number of Neurons')
ax.set_title('Neural Network Configurations: Loss Comparison')
x_ticks = [2 ** x for x in range(SKIP, 14)]
ax.set_xticks(x_ticks)
ax.set_xticklabels(x_ticks)
ax.set_xlim(x_ticks[0] / 2.0, 8192)
y_ticks = np.logspace(np.log2(min_loss), np.log2(max_loss), num = 15, base = 2.0)
ax.set_yticks(y_ticks)
ax.set_yticklabels(np.round(y_ticks, 4))
ax.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

In [None]:
# Fit a power-law model to the screlu data, and plot it alongside the data.
def powlaw(x, a, b, c) :
    return a * np.power(x, b) + c

screlu_neuron_counts, screlu_losses = zip(*configuration_series["screlu"])

def model(x, y):
    popt, pcov = curve_fit(powlaw, x, y, p0 = np.array([1.0, -1.0, 1.0]), maxfev=2000)
    extrap = list(screlu_neuron_counts) + [2048, 4096]
    return extrap, powlaw(extrap, *popt), popt

extrap_inputs, screlu_extrap, screlu_params = model(screlu_neuron_counts, screlu_losses)

crelu_neuron_counts, crelu_losses = zip(*configuration_series["crelu"])

extrap_inputs, crelu_extrap, crelu_params = model(crelu_neuron_counts, crelu_losses)

fig, ax = plt.subplots(figsize=(12, 6))
ax.set_xscale("log", base = 2)
ax.set_yscale("log", base = 2)
ax.plot(extrap_inputs, screlu_extrap, '--', label = "screlu-model")
ax.plot(screlu_neuron_counts, screlu_losses, 'x', label = "optimiser-benchmark-screlu datapoints")
ax.plot(extrap_inputs, crelu_extrap, '--', label = "crelu-model")
ax.plot(crelu_neuron_counts, crelu_losses, 'x', label = "optimiser-benchmark-crelu datapoints")

screlu_text = f"screlu: y = {screlu_params[0]:.2e} * x^{screlu_params[1]:.2f} + {screlu_params[2]:.2e}"
crelu_text = f"crelu: y = {crelu_params[0]:.2e} * x^{crelu_params[1]:.2f} + {crelu_params[2]:.2e}"
# Place text in upper right corner with larger font and background
ax.text(0.98, 0.98, screlu_text + '\n' + crelu_text, 
        transform=ax.transAxes, 
        verticalalignment='top', 
        horizontalalignment='right',
        fontsize=12,
        bbox=dict(facecolor='white', alpha=0.8, edgecolor='none', pad=5))

ax.legend(title="Configuration", bbox_to_anchor=(1.05, 1), loc='upper left')
ax.set_xticks([2 ** x for x in range(0, 12)])
ax.set_xticklabels([2 ** x for x in range(0, 12)])
ax.set_xlim(0.5, 8192)
min_loss = min(np.min(crelu_extrap), np.min(screlu_extrap))
max_loss = max(np.max(crelu_extrap), np.max(screlu_extrap))
y_ticks = np.logspace(np.log2(min_loss), np.log2(max_loss), num = 10, base = 2.0)
ax.set_yticks(y_ticks)
ax.set_yticklabels(np.round(y_ticks, 4))

plt.tight_layout()
plt.show()

In [None]:
with open("net-size-guantlet-ratings.txt") as f:
    text = f.readlines()

In [None]:
data = [s.split()[1:5] for s in text]
names, _, elos, errors = zip(*data[2:])
names = list(map(int, names))
elos = list(map(float, elos))
errors = list(map(float, errors))
up_down_errors = [errors, errors]

In [None]:
neg_ncount, neg_losses = zip(*configuration_series["screlu-warmupcosinedecay"][5:])
neg_losses = [-l for l in neg_losses]

In [None]:
# Create plot
fig, ax = plt.subplots(figsize=(12, 8))

ax1co = "tab:red"
ax2co = "tab:blue"

# Customize plot
ax.set_ylabel("Elo", color=ax1co)
ax.set_yticks(list(range(0, 550, 50)))
ax.tick_params(axis='y', labelcolor=ax1co)
ax.set_xlabel("Number of Neurons")
ax.set_xscale("log", base = 2)
ax.set_xticks([2 ** (x / 2) for x in range(4 * 2, 14 * 2)])
ax.set_xticklabels([2 ** (x // 2) if x % 2 == 0 else None for x in range(4 * 2, 14 * 2)])
ax.set_xlim(16, 8192)
ax.set_title("Elo vs. Net Size")
(lns1, caps, _) = ax.errorbar(names, elos, yerr=up_down_errors, color=ax1co, ecolor="black", elinewidth=1.0, markersize=8, capsize=4, label="elo curve")
lns1.set_label("elo curve")
for cap in caps:
    cap.set_markeredgewidth(1.0)

ax2 = ax.twinx()

ax2.grid(False)
lns2 = ax2.plot(neg_ncount, neg_losses, color=ax2co, label="(negative) loss curve")
bottom, top = ax2.get_ylim()
ax2.set_ylim(bottom=bottom, top=top + 0.00025)
ax2.set_ylabel("(Negative) Final Loss", color=ax2co)
ax2.tick_params(axis='y', labelcolor=ax2co)

lns = [lns1]+lns2
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc=0)

plt.tight_layout()
plt.show()