In [1]:
import pandas as pd


In [2]:
input_config = {
    "C": 1,
    "H": 28,
    "W": 28
}

output_config = {
    "C": 10
}

experiment_config = {
    "num_iters": 1,
    "batch_size": 32,
    "max_epochs": 20,
    "log_every_n_steps": 100,
    "patience": 5,
    "rotate_train": True,
    "rotate_test": True,
    "seed": 420,
}


def get_stcnn_hparams(
    conv_layers: int,
    channels: int,
    localization_channels: int,
    kernel_size: int = 5,
    mode: str = "single",
):
  return {
      "mode": mode,
      "in_channels": input_config["C"],
      "out_channels": output_config["C"],
      "channels": channels,
      "localization_channels": localization_channels,
      "conv_layers": conv_layers,
      "kernel_size": kernel_size,
      "localization_initialization_mode": "identity",
      "transformation_mode": "rotation",
  }


optimizer_hparams = {
    "lr": 1e-2,
    "weight_decay": 1e-4
}


In [3]:
metrics_base_path = "./results/rotations/stcnns/identity-init/"


In [4]:
from modules.MNISTModule import MNISTModule
model_name = "STCNN"


In [5]:
from model_saving_helpers import get_model_save_name
from pathlib import Path
import numpy as np

from count_model_parameters import count_model_parameters


def aggregate_model_stats(model_name, model_hparams, optimizer_hparams, experiment_config):
  model_save_name = get_model_save_name(
      model_name=model_name,
      model_hparams=model_hparams,
      optimizer_hparams=optimizer_hparams,
      experiment_config=experiment_config
  )
  model = MNISTModule(model_name, model_hparams, optimizer_hparams).model
  model_params = count_model_parameters(model)
  million = pow(10, 3)

  model_metrics_per_run = Path(
      metrics_base_path).glob(f"*{model_save_name}*.csv")

  model_metrics = pd.concat([pd.read_csv(experiment_metrics)
                             for experiment_metrics in model_metrics_per_run])
  test_metrics_subselection = model_metrics[[
      "test_loss",
      "test_acc",
  ]].dropna()
  test_metrics_means = test_metrics_subselection.mean().round(4)
  test_metrics_stds = test_metrics_subselection.std().round(4)

  test_acc_mean = test_metrics_means["test_acc"]
  test_acc_std = test_metrics_stds["test_acc"]

  report = pd.DataFrame({
      "Mode": [model_hparams["mode"]],
      "Conv Layers": [model_hparams["conv_layers"]],
      "Channels": [model_hparams["channels"]],
      "Localization Net Channels": [model_hparams["localization_channels"]],
      "Kernel Size": [model_hparams["kernel_size"]],
      "Params": [f"{np.round((model_params / million), 2)} K"],
      "Test Accuracy": [f"{test_acc_mean} ± {test_acc_std}"],
  })

  return report


In [6]:
import re


def get_max_model_stats(model_name, model_hparams, optimizer_hparams, experiment_config):
  model_save_name = get_model_save_name(
      model_name=model_name,
      model_hparams=model_hparams,
      optimizer_hparams=optimizer_hparams,
      experiment_config=experiment_config
  )
  model = MNISTModule(model_name, model_hparams, optimizer_hparams).model
  model_params = count_model_parameters(model)
  million = pow(10, 3)

  model_metrics_per_run = Path(
      metrics_base_path).glob(f"*{model_save_name}*.csv")

  all_metrics_files = []
  for metrics in model_metrics_per_run:
    run = re.search("-run=.", str(metrics)).group(0)[-1]
    read_metrics = pd.read_csv(metrics)
    read_metrics["run"] = int(run)
    all_metrics_files.append(read_metrics.reset_index())

  model_metrics = pd.concat(all_metrics_files)
  test_metrics_subselection = model_metrics[[
      "run",
      "test_loss",
      "test_acc",
  ]].dropna()

  best_acc_row = test_metrics_subselection[test_metrics_subselection.test_acc ==
                                           test_metrics_subselection.test_acc.max()].reset_index()

  report = pd.DataFrame({
      "Mode": [model_hparams["mode"]],
      "Conv Layers": [model_hparams["conv_layers"]],
      "Channels": [model_hparams["channels"]],
      "Localization Net Channels": [model_hparams["localization_channels"]],
      "Kernel Size": [model_hparams["kernel_size"]],
      "Params": [f"{np.round((model_params / million), 2)} K"],
      "Best Run": [best_acc_row["run"].tolist()[0]],
      "Max Test Accuracy": [best_acc_row["test_acc"].tolist()[0]],
  })

  return report


In [7]:
from model_saving_helpers import get_model_save_name
from pathlib import Path

modes = ["single", "multi"]
convs = [3, 5]
channels = [32, 64]

loc_channels = [8, 16, 17, 32, 35, 64, 18, 37]

all_models_stats = []

for mode in modes:
  for conv in convs:
    for channel in channels:
      for loc_channel in loc_channels:
        model_save_name = get_model_save_name(
            model_name=model_name,
            model_hparams=get_stcnn_hparams(
                conv, channel, loc_channel, mode=mode),
            optimizer_hparams=optimizer_hparams,
            experiment_config=experiment_config
        )
        model_metrics_per_run = Path(
            metrics_base_path).glob(f"*{model_save_name}*.csv")
        if len(list(model_metrics_per_run)):
          all_models_stats.append(
              aggregate_model_stats(
                  model_name=model_name,
                  model_hparams=get_stcnn_hparams(
                      conv, channel, loc_channel, mode=mode),
                  optimizer_hparams=optimizer_hparams,
                  experiment_config=experiment_config
              )
          )

all_models_stats = pd.concat(all_models_stats)

all_models_stats


Unnamed: 0,Mode,Conv Layers,Channels,Localization Net Channels,Kernel Size,Params,Test Accuracy
0,single,3,32,8,5,54.25 K,0.9335 ± 0.0093
0,single,3,64,8,5,209.07 K,0.9379 ± 0.0021
0,single,5,32,16,5,110.54 K,0.9725 ± 0.0023
0,single,5,64,8,5,414.0 K,0.9774 ± 0.0015
0,multi,3,32,17,5,101.88 K,0.9354 ± 0.0086
0,multi,3,64,35,5,412.31 K,0.936 ± 0.0187
0,multi,5,32,18,5,202.52 K,0.9693 ± 0.0022
0,multi,5,64,37,5,821.58 K,0.9753 ± 0.0012


In [8]:
print(all_models_stats.to_latex(
    column_format="c|c|c|c|c|c",
    index=False)
)


\begin{tabular}{c|c|c|c|c|c}
\toprule
  Mode &  Conv Layers &  Channels &  Localization Net Channels &  Kernel Size &   Params &   Test Accuracy \\
\midrule
single &            3 &        32 &                          8 &            5 &  54.25 K & 0.9335 ± 0.0093 \\
single &            3 &        64 &                          8 &            5 & 209.07 K & 0.9379 ± 0.0021 \\
single &            5 &        32 &                         16 &            5 & 110.54 K & 0.9725 ± 0.0023 \\
single &            5 &        64 &                          8 &            5 &  414.0 K & 0.9774 ± 0.0015 \\
 multi &            3 &        32 &                         17 &            5 & 101.88 K & 0.9354 ± 0.0086 \\
 multi &            3 &        64 &                         35 &            5 & 412.31 K &  0.936 ± 0.0187 \\
 multi &            5 &        32 &                         18 &            5 & 202.52 K & 0.9693 ± 0.0022 \\
 multi &            5 &        64 &                         37 &         

  print(all_models_stats.to_latex(


In [9]:
from model_saving_helpers import get_model_save_name
from pathlib import Path

modes = ["single", "multi"]
convs = [3, 5]
channels = [32, 64]

loc_channels = [8, 16, 17, 32, 35, 64, 18, 37]

all_models_stats = []

for mode in modes:
  for conv in convs:
    for channel in channels:
      for loc_channel in loc_channels:
        model_save_name = get_model_save_name(
            model_name=model_name,
            model_hparams=get_stcnn_hparams(
                conv, channel, loc_channel, mode=mode),
            optimizer_hparams=optimizer_hparams,
            experiment_config=experiment_config
        )
        model_metrics_per_run = Path(
            metrics_base_path).glob(f"*{model_save_name}*.csv")
        if len(list(model_metrics_per_run)):
          all_models_stats.append(
              get_max_model_stats(
                  model_name=model_name,
                  model_hparams=get_stcnn_hparams(
                      conv, channel, loc_channel, mode=mode),
                  optimizer_hparams=optimizer_hparams,
                  experiment_config=experiment_config
              )
          )

all_models_stats = pd.concat(all_models_stats)

all_models_stats


Unnamed: 0,Mode,Conv Layers,Channels,Localization Net Channels,Kernel Size,Params,Best Run,Max Test Accuracy
0,single,3,32,8,5,54.25 K,2,0.9423
0,single,3,64,8,5,209.07 K,2,0.9397
0,single,5,32,16,5,110.54 K,2,0.974
0,single,5,64,8,5,414.0 K,1,0.9783
0,multi,3,32,17,5,101.88 K,2,0.9449
0,multi,3,64,35,5,412.31 K,3,0.9478
0,multi,5,32,18,5,202.52 K,2,0.9716
0,multi,5,64,37,5,821.58 K,1,0.9762


In [10]:
print(all_models_stats.to_latex(
    column_format="c|c|c|c|c|c",
    index=False)
)


\begin{tabular}{c|c|c|c|c|c}
\toprule
  Mode &  Conv Layers &  Channels &  Localization Net Channels &  Kernel Size &   Params &  Best Run &  Max Test Accuracy \\
\midrule
single &            3 &        32 &                          8 &            5 &  54.25 K &         2 &             0.9423 \\
single &            3 &        64 &                          8 &            5 & 209.07 K &         2 &             0.9397 \\
single &            5 &        32 &                         16 &            5 & 110.54 K &         2 &             0.9740 \\
single &            5 &        64 &                          8 &            5 &  414.0 K &         1 &             0.9783 \\
 multi &            3 &        32 &                         17 &            5 & 101.88 K &         2 &             0.9449 \\
 multi &            3 &        64 &                         35 &            5 & 412.31 K &         3 &             0.9478 \\
 multi &            5 &        32 &                         18 &            5 

  print(all_models_stats.to_latex(
