##### Copyright 2020 Google LLC.

Licensed under the Apache License, Version 2.0 (the "License");

In [1]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Meta-Dataset leaderboard

This notebook computes leaderboard tables for different models on Meta-Dataset.

Results for each model in each training setting (ImageNet-only or all datasets) are defined in a different DataFrame. This script aggregates the data in one DataFrame, ranks the models in each setting (using a statistical test for equality), and produces the final tables.

In [2]:
import re
import textwrap

import numpy as np
import pandas as pd
from IPython import display

In [3]:
# Explicit list of evaluation datasets.
# ILSVRC (valid) is included for completeness, but does not have to be reported.
datasets = [
    "ILSVRC (valid)",
    "ILSVRC (test)",
    "Omniglot",
    "Aircraft",
    "Birds",
    "Textures",
    "QuickDraw",
    "Fungi",
    "VGG Flower",
    "Traffic signs",
    "MSCOCO"
]

# Explicit list of articles and references, filled in throughout the notebook.
references = []

## Results from Triantafillou et al. (2020)

In [4]:
ref = ("Triantafillou et al. (2020)",
       "Eleni Triantafillou, Tyler Zhu, Vincent Dumoulin, Pascal Lamblin, "
       "Utku Evci, Kelvin Xu, Ross Goroshin, Carles Gelada, Kevin Swersky, "
       "Pierre-Antoine Manzagol, Hugo Larochelle; "
       "[_Meta-Dataset: A Dataset of Datasets for Learning to Learn from Few "
       "Examples_](https://arxiv.org/abs/1903.03096); ICLR 2020.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### k-NN (`baseline`)

In [5]:
baseline_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
baseline_imagenet_df['# episodes'] = 600

In [6]:
baseline_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [41.03, 1.01],
    [37.07, 1.15],
    [46.81, 0.89],
    [50.13, 1.00],
    [66.36, 0.75],
    [32.06, 1.08],
    [36.16, 1.02],
    [83.10, 0.68],
    [44.59, 1.19],
    [30.38, 0.99]
]

In [7]:
baseline_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),41.03,1.01,600
Omniglot,37.07,1.15,600
Aircraft,46.81,0.89,600
Birds,50.13,1.0,600
Textures,66.36,0.75,600
QuickDraw,32.06,1.08,600
Fungi,36.16,1.02,600
VGG Flower,83.1,0.68,600
Traffic signs,44.59,1.19,600


In [8]:
baseline_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
baseline_all_df['# episodes'] = 600
baseline_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [38.55, 0.94],
    [74.60, 1.08],
    [64.98, 0.82],
    [66.35, 0.92],
    [63.58, 0.79],
    [44.88, 1.05],
    [37.12, 1.06],
    [83.47, 0.61],
    [40.11, 1.10],
    [29.55, 0.96]
]
baseline_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),38.55,0.94,600
Omniglot,74.6,1.08,600
Aircraft,64.98,0.82,600
Birds,66.35,0.92,600
Textures,63.58,0.79,600
QuickDraw,44.88,1.05,600
Fungi,37.12,1.06,600
VGG Flower,83.47,0.61,600
Traffic signs,40.11,1.1,600


### Finetune (`baselinefinetune`)

In [9]:
baselineft_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
baselineft_imagenet_df['# episodes'] = 600
baselineft_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [45.78, 1.10],
    [60.85, 1.58],
    [68.69, 1.26],
    [57.31, 1.26],
    [69.05, 0.90],
    [42.60, 1.17],
    [38.20, 1.02],
    [85.51, 0.68],
    [66.79, 1.31],
    [34.86, 0.97]
]
baselineft_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),45.78,1.1,600
Omniglot,60.85,1.58,600
Aircraft,68.69,1.26,600
Birds,57.31,1.26,600
Textures,69.05,0.9,600
QuickDraw,42.6,1.17,600
Fungi,38.2,1.02,600
VGG Flower,85.51,0.68,600
Traffic signs,66.79,1.31,600


In [10]:
baselineft_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
baselineft_all_df['# episodes'] = 600
baselineft_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [43.08, 1.08],
    [71.11, 1.37],
    [72.03, 1.07],
    [59.82, 1.15	],
    [69.14, 0.85],
    [47.05, 1.16	],
    [38.16, 1.04],
    [85.28, 0.69],
    [66.74, 1.23],
    [35.17, 1.08]
]
baselineft_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),43.08,1.08,600
Omniglot,71.11,1.37,600
Aircraft,72.03,1.07,600
Birds,59.82,1.15,600
Textures,69.14,0.85,600
QuickDraw,47.05,1.16,600
Fungi,38.16,1.04,600
VGG Flower,85.28,0.69,600
Traffic signs,66.74,1.23,600


### MatchingNet (`matching`)

In [11]:
matching_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
matching_imagenet_df['# episodes'] = 600
matching_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [45.00, 1.10],
    [52.27, 1.28],
    [48.97, 0.93],
    [62.21, 0.95],
    [64.15, 0.85],
    [42.87, 1.09],
    [33.97, 1.00],
    [80.13, 0.71],
    [47.80, 1.14],
    [34.99, 1.00]
]
matching_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),45.0,1.1,600
Omniglot,52.27,1.28,600
Aircraft,48.97,0.93,600
Birds,62.21,0.95,600
Textures,64.15,0.85,600
QuickDraw,42.87,1.09,600
Fungi,33.97,1.0,600
VGG Flower,80.13,0.71,600
Traffic signs,47.8,1.14,600


In [12]:
matching_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
matching_all_df['# episodes'] = 600
matching_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [36.08, 1.00],
    [78.25, 1.01],
    [69.17, 0.96],
    [56.40, 1.00],
    [61.80, 0.74],
    [60.81, 1.03],
    [33.70, 1.04],
    [81.90, 0.72],
    [55.57, 1.08],
    [28.79, 0.96]
]
matching_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),36.08,1.0,600
Omniglot,78.25,1.01,600
Aircraft,69.17,0.96,600
Birds,56.4,1.0,600
Textures,61.8,0.74,600
QuickDraw,60.81,1.03,600
Fungi,33.7,1.04,600
VGG Flower,81.9,0.72,600
Traffic signs,55.57,1.08,600


### ProtoNet (`prototypical`)

In [13]:
prototypical_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
prototypical_imagenet_df['# episodes'] = 600
prototypical_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [50.50, 1.08],
    [59.98, 1.35],
    [53.10, 1.00],
    [68.79, 1.01],
    [66.56, 0.83],
    [48.96, 1.08],
    [39.71, 1.11],
    [85.27, 0.77],
    [47.12, 1.10],
    [41.00, 1.10]
]
prototypical_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),50.5,1.08,600
Omniglot,59.98,1.35,600
Aircraft,53.1,1.0,600
Birds,68.79,1.01,600
Textures,66.56,0.83,600
QuickDraw,48.96,1.08,600
Fungi,39.71,1.11,600
VGG Flower,85.27,0.77,600
Traffic signs,47.12,1.1,600


In [14]:
prototypical_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
prototypical_all_df['# episodes'] = 600
prototypical_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [44.50, 1.05],
    [79.56, 1.12],
    [71.14, 0.86],
    [67.01, 1.02],
    [65.18, 0.84],
    [64.88, 0.89],
    [40.26, 1.13],
    [86.85, 0.71],
    [46.48, 1.00],
    [39.87, 1.06]
]
prototypical_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),44.5,1.05,600
Omniglot,79.56,1.12,600
Aircraft,71.14,0.86,600
Birds,67.01,1.02,600
Textures,65.18,0.84,600
QuickDraw,64.88,0.89,600
Fungi,40.26,1.13,600
VGG Flower,86.85,0.71,600
Traffic signs,46.48,1.0,600


### fo-MAML (`maml`)

In [15]:
maml_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
maml_imagenet_df['# episodes'] = 600
maml_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [45.51, 1.11],
    [55.55, 1.54],
    [56.24, 1.11],
    [63.61, 1.06],
    [68.04, 0.81],
    [43.96, 1.29],
    [32.10, 1.10],
    [81.74, 0.83],
    [50.93, 1.51],
    [35.30, 1.23]
]
maml_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),45.51,1.11,600
Omniglot,55.55,1.54,600
Aircraft,56.24,1.11,600
Birds,63.61,1.06,600
Textures,68.04,0.81,600
QuickDraw,43.96,1.29,600
Fungi,32.1,1.1,600
VGG Flower,81.74,0.83,600
Traffic signs,50.93,1.51,600


In [16]:
maml_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
maml_all_df['# episodes'] = 600
maml_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [37.83, 1.01],
    [83.92, 0.95],
    [76.41, 0.69],
    [62.43, 1.08],
    [64.16, 0.83],
    [59.73, 1.10],
    [33.54, 1.11],
    [79.94, 0.84],
    [42.91, 1.31],
    [29.37, 1.08]
]
maml_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),37.83,1.01,600
Omniglot,83.92,0.95,600
Aircraft,76.41,0.69,600
Birds,62.43,1.08,600
Textures,64.16,0.83,600
QuickDraw,59.73,1.1,600
Fungi,33.54,1.11,600
VGG Flower,79.94,0.84,600
Traffic signs,42.91,1.31,600


### RelationNet (`relationnet`)

In [17]:
relationnet_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
relationnet_imagenet_df['# episodes'] = 600
relationnet_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [34.69, 1.01],
    [45.35, 1.36],
    [40.73, 0.83],
    [49.51, 1.05],
    [52.97, 0.69],
    [43.30, 1.08],
    [30.55, 1.04],
    [68.76, 0.83],
    [33.67, 1.05],
    [29.15, 1.01]
]
relationnet_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),34.69,1.01,600
Omniglot,45.35,1.36,600
Aircraft,40.73,0.83,600
Birds,49.51,1.05,600
Textures,52.97,0.69,600
QuickDraw,43.3,1.08,600
Fungi,30.55,1.04,600
VGG Flower,68.76,0.83,600
Traffic signs,33.67,1.05,600


In [18]:
relationnet_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
relationnet_all_df['# episodes'] = 600
relationnet_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [30.89, 0.93],
    [86.57, 0.79],
    [69.71, 0.83],
    [54.14, 0.99],
    [56.56, 0.73],
    [61.75, 0.97],
    [32.56, 1.08],
    [76.08, 0.76],
    [37.48, 0.93],
    [27.41, 0.89]
]
relationnet_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),30.89,0.93,600
Omniglot,86.57,0.79,600
Aircraft,69.71,0.83,600
Birds,54.14,0.99,600
Textures,56.56,0.73,600
QuickDraw,61.75,0.97,600
Fungi,32.56,1.08,600
VGG Flower,76.08,0.76,600
Traffic signs,37.48,0.93,600


### fo-Proto-MAML (`maml_init_with_proto`)

In [19]:
protomaml_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
protomaml_imagenet_df['# episodes'] = 600
protomaml_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [49.53, 1.05],
    [63.37, 1.33],
    [55.95, 0.99],
    [68.66, 0.96],
    [66.49, 0.83],
    [51.52, 1.00],
    [39.96, 1.14],
    [87.15, 0.69],
    [48.83, 1.09],
    [43.74, 1.12],
]
protomaml_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),49.53,1.05,600
Omniglot,63.37,1.33,600
Aircraft,55.95,0.99,600
Birds,68.66,0.96,600
Textures,66.49,0.83,600
QuickDraw,51.52,1.0,600
Fungi,39.96,1.14,600
VGG Flower,87.15,0.69,600
Traffic signs,48.83,1.09,600


In [20]:
protomaml_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
protomaml_all_df['# episodes'] = 600
protomaml_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [46.52, 1.05],
    [82.69, 0.97],
    [75.23, 0.76],
    [69.88, 1.02],
    [68.25, 0.81],
    [66.84, 0.94],
    [41.99, 1.17],
    [88.72, 0.67],
    [52.42, 1.08],
    [41.74, 1.13]
]
protomaml_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),46.52,1.05,600
Omniglot,82.69,0.97,600
Aircraft,75.23,0.76,600
Birds,69.88,1.02,600
Textures,68.25,0.81,600
QuickDraw,66.84,0.94,600
Fungi,41.99,1.17,600
VGG Flower,88.72,0.67,600
Traffic signs,52.42,1.08,600


## Results from Requeima et al. (2019)

In [21]:
ref = ("Requeima et al. (2019)",
       "James Requeima, Jonathan Gordon, John Bronskill, Sebastian Nowozin, "
       "Richard E. Turner; "
       "[_Fast and Flexible Multi-Task Classification Using Conditional Neural "
       "Adaptive Processes_](https://arxiv.org/abs/1906.07697); "
       "NeurIPS 2019.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### CNAPs (`cnaps`)

In [22]:
cnaps_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
cnaps_all_df['# episodes'] = 600
cnaps_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [50.8, 1.1],
    [91.7, 0.5],
    [83.7, 0.6],
    [73.6, 0.9],
    [59.5, 0.7],
    [74.7, 0.8],
    [50.2, 1.1],
    [88.9, 0.5],
    [56.5, 1.1],
    [39.4, 1.0]
]
cnaps_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),50.8,1.1,600
Omniglot,91.7,0.5,600
Aircraft,83.7,0.6,600
Birds,73.6,0.9,600
Textures,59.5,0.7,600
QuickDraw,74.7,0.8,600
Fungi,50.2,1.1,600
VGG Flower,88.9,0.5,600
Traffic signs,56.5,1.1,600


## Results from Baik et al. (2020)

In [23]:
ref = ("Baik et al. (2020)",
       "Sungyong Baik, Myungsub Choi, Janghoon Choi, Heewon Kim, Kyoung Mu Lee; "
       "[_Meta-Learning with Adaptive Hyperparameters_]"
       "(https://papers.nips.cc/paper/2020/hash/ee89223a2b625b5152132ed77abbcc79-Abstract.html); "
       "NeurIPS 2020.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### ALFA + fo-Proto-MAML (`alfa_protomaml`)

In [24]:
alfa_protomaml_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
alfa_protomaml_imagenet_df['# episodes'] = 600
alfa_protomaml_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [52.80, 1.11],
    [61.87, 1.51],
    [63.43, 1.10],
    [69.75, 1.05],
    [70.78, 0.88],
    [59.17, 1.16],
    [41.49, 1.17],
    [85.96, 0.77],
    [60.78, 1.29],
    [48.11, 1.14]
]
alfa_protomaml_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),52.8,1.11,600
Omniglot,61.87,1.51,600
Aircraft,63.43,1.1,600
Birds,69.75,1.05,600
Textures,70.78,0.88,600
QuickDraw,59.17,1.16,600
Fungi,41.49,1.17,600
VGG Flower,85.96,0.77,600
Traffic signs,60.78,1.29,600


### ALFA + fo-MAML (`alfa_maml`)
Not included in the global table as it performs worse than ALFA + fo-Proto-MAML overall, but provided here for reference.

In [25]:
alfa_maml_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
alfa_maml_imagenet_df['# episodes'] = 600
alfa_maml_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [51.09, 1.17],
    [67.89, 1.43],
    [66.34, 1.17],
    [67.67, 1.06],
    [65.34, 0.95],
    [60.53, 1.13],
    [37.41, 1.00],
    [84.28, 0.97],
    [60.86, 1.43],
    [40.05, 1.14]
]
alfa_maml_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),51.09,1.17,600
Omniglot,67.89,1.43,600
Aircraft,66.34,1.17,600
Birds,67.67,1.06,600
Textures,65.34,0.95,600
QuickDraw,60.53,1.13,600
Fungi,37.41,1.0,600
VGG Flower,84.28,0.97,600
Traffic signs,60.86,1.43,600


## Results from Doersch et al. (2020)
Carl Doersch, Ankush Gupta, Andrew Zisserman,
_CrossTransformers: spatially-aware few-shot transfer_,
NeurIPS 2020

In [26]:
ref = ("Doersch et al. (2020)",
       "Carl Doersch, Ankush Gupta, Andrew Zisserman; "
       "[_CrossTransformers: spatially-aware few-shot transfer_]"
       "(https://arxiv.org/abs/2007.11498); "
       "NeurIPS 2020.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### ProtoNet large (`protonet_large`)
Larger-scale prototypical networks, including:
- 224x224 input size
- ResNet-34 backbone
- SimCLR Episodes

In [27]:
protonet_large_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
protonet_large_imagenet_df['# episodes'] = 600
protonet_large_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
     [53.69, 1.07],
     [68.50, 1.27],
     [58.04, 0.96],
     [74.07, 0.92],
     [68.76, 0.77],
     [53.30, 1.06],
     [40.73, 1.15],
     [86.96, 0.73],
     [58.11, 1.05],
     [41.70, 1.08],
]
protonet_large_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),53.69,1.07,600
Omniglot,68.5,1.27,600
Aircraft,58.04,0.96,600
Birds,74.07,0.92,600
Textures,68.76,0.77,600
QuickDraw,53.3,1.06,600
Fungi,40.73,1.15,600
VGG Flower,86.96,0.73,600
Traffic signs,58.11,1.05,600


### CrossTransformers (`ctx`)

CrossTransformers network with:
- 224x224 input size
- ResNet-34 backbone
- SimCLR episodes
- 14x14 feature grid
- BOHB-inspired data augmentation

In [28]:
ctx_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
ctx_imagenet_df['# episodes'] = 600
ctx_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
      [62.76, 0.99],
      [82.21, 1.00],
      [79.49, 0.89],
      [80.63, 0.88],
      [75.57, 0.64],
      [72.68, 0.82],
      [51.58, 1.11],
      [95.34, 0.37],
      [82.65, 0.76],
      [59.90, 1.02],
]
ctx_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),62.76,0.99,600
Omniglot,82.21,1.0,600
Aircraft,79.49,0.89,600
Birds,80.63,0.88,600
Textures,75.57,0.64,600
QuickDraw,72.68,0.82,600
Fungi,51.58,1.11,600
VGG Flower,95.34,0.37,600
Traffic signs,82.65,0.76,600


## Results from Saikia et al. (2020)
Tonmoy Saikia, Thomas Brox, Cordelia Schmid, _Optimized Generic Feature Learning for Few-shot Classification across Domains_, arXiv 2020

In [29]:
ref = ("Saikia et al. (2020)",
       "Tonmoy Saikia, Thomas Brox, Cordelia Schmid; "
       "[_Optimized Generic Feature Learning for Few-shot Classification "
       "across Domains_]"
       "(https://arxiv.org/abs/2001.07926); "
       "arXiv 2020.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### BOHB (`bohb`)
Validated on _S1_ (ImageNet) only, nearest-centroid classifier (NC).

In [30]:
bohb_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
bohb_imagenet_df['# episodes'] = 600
bohb_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [51.92, 1.05],
    [67.57, 1.21],
    [54.12, 0.90],
    [70.69, 0.90],
    [68.34, 0.76],
    [50.33, 1.04],
    [41.38, 1.12],
    [87.34, 0.59],
    [51.80, 1.04],
    [48.03, 0.99],
]
bohb_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),51.92,1.05,600
Omniglot,67.57,1.21,600
Aircraft,54.12,0.9,600
Birds,70.69,0.9,600
Textures,68.34,0.76,600
QuickDraw,50.33,1.04,600
Fungi,41.38,1.12,600
VGG Flower,87.34,0.59,600
Traffic signs,51.8,1.04,600


## Results from Dvornik et al. (2020)


In [31]:
ref = ("Dvornik et al. (2020)",
       "Nikita Dvornik, Cordelia Schmid, Julien Mairal; "
       "[_Selecting Relevant Features from a Multi-domain Representation for "
       "Few-shot Classification_](https://arxiv.org/abs/2003.09338); "
       "ECCV 2020.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### SUR (`sur`)

In [32]:
sur_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
sur_all_df['# episodes'] = 600
sur_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [56.1, 1.1],
    [93.1, 0.5],
    [84.6, 0.7],
    [70.6, 1.0],
    [71.0, 0.8],
    [81.3, 0.6],
    [64.2, 1.1],
    [82.8, 0.8],
    [53.4, 1.0],
    [50.1, 1.0],
]
sur_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),56.1,1.1,600
Omniglot,93.1,0.5,600
Aircraft,84.6,0.7,600
Birds,70.6,1.0,600
Textures,71.0,0.8,600
QuickDraw,81.3,0.6,600
Fungi,64.2,1.1,600
VGG Flower,82.8,0.8,600
Traffic signs,53.4,1.0,600


### SUR-pnf (`sur_pnf`)
SUR with parametric network family, also referred as "SUR-pf".

In [33]:
sur_pnf_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
sur_pnf_all_df['# episodes'] = 600
sur_pnf_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [56.0, 1.1],
    [90.0, 0.6],
    [79.7, 0.8],
    [75.9, 0.9],
    [72.5, 0.7],
    [76.7, 0.7],
    [49.8, 1.1],
    [90.0, 0.6],
    [52.2, 0.8],
    [50.2, 1.1],
]
sur_pnf_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),56.0,1.1,600
Omniglot,90.0,0.6,600
Aircraft,79.7,0.8,600
Birds,75.9,0.9,600
Textures,72.5,0.7,600
QuickDraw,76.7,0.7,600
Fungi,49.8,1.1,600
VGG Flower,90.0,0.6,600
Traffic signs,52.2,0.8,600


## Results from Bateni et al. (2020)

In [34]:
ref = ("Bateni et al. (2020)",
       "Peyman Bateni, Raghav Goyal, Vaden Masrani, Frank Wood, Leonid Sigal; "
       "[_Improved Few-Shot Visual Classification_]"
       "(https://openaccess.thecvf.com/content_CVPR_2020/html/Bateni_Improved_Few-Shot_Visual_Classification_CVPR_2020_paper.html); "
       "CVPR 2020.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### Simple CNAPS (`simple_cnaps`)

In [35]:
simple_cnaps_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
simple_cnaps_all_df['# episodes'] = 600
simple_cnaps_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [56.5, 1.1],
    [91.9, 0.6],
    [83.8, 0.6],
    [76.1, 0.9],
    [70.0, 0.8],
    [78.3, 0.7],
    [49.1, 1.2],
    [91.3, 0.6],
    [59.2, 1.0],
    [42.4, 1.1],
]
simple_cnaps_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),56.5,1.1,600
Omniglot,91.9,0.6,600
Aircraft,83.8,0.6,600
Birds,76.1,0.9,600
Textures,70.0,0.8,600
QuickDraw,78.3,0.7,600
Fungi,49.1,1.2,600
VGG Flower,91.3,0.6,600
Traffic signs,59.2,1.0,600


## Results from Bateni et al. (2022a)

In [36]:
ref = ("Bateni et al. (2022a)",
       "Peyman Bateni, Jarred Barber, Jan-Willem van de Meent, Frank Wood; "
       "[_Enhancing Few-Shot Image Classification with Unlabelled Examples_]"
       "(https://openaccess.thecvf.com/content/WACV2022/html/Bateni_Enhancing_Few-Shot_Image_Classification_With_Unlabelled_Examples_WACV_2022_paper.html); "
       "WACV 2022.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### Transductive CNAPS (`transductive_cnaps`)

In [37]:
transductive_cnaps_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
transductive_cnaps_all_df['# episodes'] = 600
transductive_cnaps_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [57.9, 1.1],
    [94.3, 0.4],
    [84.7, 0.5],
    [78.8, 0.7],
    [66.2, 0.8],
    [77.9, 0.6],
    [48.9, 1.2],
    [92.3, 0.4],
    [59.7, 1.1],
    [42.5, 1.1],
]
transductive_cnaps_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),57.9,1.1,600
Omniglot,94.3,0.4,600
Aircraft,84.7,0.5,600
Birds,78.8,0.7,600
Textures,66.2,0.8,600
QuickDraw,77.9,0.6,600
Fungi,48.9,1.2,600
VGG Flower,92.3,0.4,600
Traffic signs,59.7,1.1,600


## Results from Liu et al. (2021)

Lu Liu, William Hamilton, Guodong Long, Jing Jiang, Hugo Larochelle,
_A Universal Representation Transformer Layer for Few-Shot Image Classification_, ICLR 2021



In [38]:
ref = ("Liu et al. (2021a)",
       "Lu Liu, William Hamilton, Guodong Long, Jing Jiang, Hugo Larochelle; "
       "[_Universal Representation Transformer Layer for Few-Shot Image "
       "Classification_](https://arxiv.org/abs/2006.11702); "
       "ICLR 2021.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### URT (`urt`)

In [39]:
urt_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
urt_all_df['# episodes'] = 600
urt_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [55.7, 1.0],
    [94.4, 0.4],
    [85.8, 0.6],
    [76.3, 0.8],
    [71.8, 0.7],
    [82.5, 0.6],
    [63.5, 1.0],
    [88.2, 0.6],
    [51.1, 1.1],
    [52.2, 1.1],
]
urt_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),55.7,1.0,600
Omniglot,94.4,0.4,600
Aircraft,85.8,0.6,600
Birds,76.3,0.8,600
Textures,71.8,0.7,600
QuickDraw,82.5,0.6,600
Fungi,63.5,1.0,600
VGG Flower,88.2,0.6,600
Traffic signs,51.1,1.1,600


### URT-pf (`urt-pf`)

In [40]:
urt_pf_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
urt_pf_all_df['# episodes'] = 600
urt_pf_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [55.5, 1.1],
    [90.2, 0.6],
    [79.8, 0.7],
    [77.5, 0.8],
    [73.5, 0.7],
    [75.8, 0.7],
    [48.1, 0.9],
    [91.9, 0.5],
    [52.0, 1.4],
    [52.1, 1.0],
]
urt_pf_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),55.5,1.1,600
Omniglot,90.2,0.6,600
Aircraft,79.8,0.7,600
Birds,77.5,0.8,600
Textures,73.5,0.7,600
QuickDraw,75.8,0.7,600
Fungi,48.1,0.9,600
VGG Flower,91.9,0.5,600
Traffic signs,52.0,1.4,600


## Results from Triantafillou et al. (2021)

Eleni Triantafillou, Hugo Larochelle, Richard Zemel, Vincent Dumoulin. Learning a Universal Template for Few-shot Dataset Generalization. ICML 2021.

In [41]:
ref = ("Triantafillou et al. (2021)",
       "Eleni Triantafillou, Hugo Larochelle, Richard Zemel, Vincent Dumoulin; "
       "[_Learning a Universal Template for Few-shot Dataset Generalization_]"
       "(https://arxiv.org/abs/2105.07029); "
       "ICML 2021.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### FLUTE (`FLUTE`)

In [42]:
flute_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
flute_all_df['# episodes'] = 600
flute_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [51.8, 1.1],
    [93.2, 0.5],
    [87.2, 0.5],
    [79.2, 0.8],
    [68.8, 0.8],
    [79.5, 0.7],
    [58.1, 1.1],
    [91.6, 0.6],
    [58.4, 1.1],
    [50.0, 1.0],
]
flute_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),51.8,1.1,600
Omniglot,93.2,0.5,600
Aircraft,87.2,0.5,600
Birds,79.2,0.8,600
Textures,68.8,0.8,600
QuickDraw,79.5,0.7,600
Fungi,58.1,1.1,600
VGG Flower,91.6,0.6,600
Traffic signs,58.4,1.1,600


## Results from Li et al. (2021a)
Wei-Hong Li, Xialei Liu, Hakan Bilen. Universal Representation Learning from Multiple Domains for Few-shot Classification. ICCV 2021.

In [43]:
ref = ("Li et al. (2021a)",
       "Wei-Hong Li, Xialei Liu, Hakan Bilen; "
       "[_Universal Representation Learning from Multiple Domains for Few-shot Classification_]"
       "(https://arxiv.org/pdf/2103.13841.pdf); "
       "ICCV 2021.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

In [44]:
url_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
url_all_df['# episodes'] = 600
url_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [57.51, 1.08],
    [94.51, 0.41],
    [88.59, 0.46],
    [80.54, 0.69],
    [76.17, 0.67],
    [81.94, 0.56],
    [68.75, 0.95],
    [92.11, 0.48],
    [63.34, 1.19],
    [54.03, 0.96],
]
url_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),57.51,1.08,600
Omniglot,94.51,0.41,600
Aircraft,88.59,0.46,600
Birds,80.54,0.69,600
Textures,76.17,0.67,600
QuickDraw,81.94,0.56,600
Fungi,68.75,0.95,600
VGG Flower,92.11,0.48,600
Traffic signs,63.34,1.19,600


## Results from Li et al. (2021b)
Wei-Hong Li, Xialei Liu, Hakan Bilen. Cross-domain Few-shot Learning with Task-specific Adapters. arXiv 2021.

In [45]:
ref = ("Li et al. (2021b)",
       "Wei-Hong Li, Xialei Liu, Hakan Bilen; "
       "[_Cross-domain Few-shot Learning with Task-specific Adapters_]"
       "(https://arxiv.org/pdf/2107.00358.pdf); "
       "arXiv 2021.")
references.append(ref)
# display.display(display.Markdown(ref[1]))

### Task-specific Adapters (`tsa`)

In [46]:
tsa_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
tsa_all_df['# episodes'] = 600
tsa_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [57.35, 1.05],
    [94.96, 0.38],
    [89.33, 0.44],
    [81.42, 0.74],
    [76.74, 0.72],
    [82.01, 0.57],
    [67.40, 0.99],
    [92.18, 0.52],
    [83.55, 0.90],
    [55.75, 1.06],
]
tsa_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),57.35,1.05,600
Omniglot,94.96,0.38,600
Aircraft,89.33,0.44,600
Birds,81.42,0.74,600
Textures,76.74,0.72,600
QuickDraw,82.01,0.57,600
Fungi,67.4,0.99,600
VGG Flower,92.18,0.52,600
Traffic signs,83.55,0.9,600


### Task-specific Adapters with ResNet18 (`tsa_resnet18`)
Task-specific Adapters with:

* 84x84 input size
* ResNet-18 backbone
* adapters in matrix form with residual connections

In [47]:
tsa_resnet18_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
tsa_resnet18_imagenet_df['# episodes'] = 600
tsa_resnet18_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [59.5, 1.1],
    [78.2, 1.2],
    [72.2, 1.0],
    [74.9, 0.9],
    [77.3, 0.7],
    [67.6, 0.9],
    [44.7, 1.0],
    [90.9, 0.6],
    [82.5, 0.8],
    [59.0, 1.0],
]
tsa_resnet18_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),59.5,1.1,600
Omniglot,78.2,1.2,600
Aircraft,72.2,1.0,600
Birds,74.9,0.9,600
Textures,77.3,0.7,600
QuickDraw,67.6,0.9,600
Fungi,44.7,1.0,600
VGG Flower,90.9,0.6,600
Traffic signs,82.5,0.8,600


### Task-specific Adapters with ResNet34 (`tsa_resnet34`)
Task-specific Adapters with:

224x224 input size
ResNet-34 backbone
adapters in matrix form with residual connections

In [48]:
tsa_resnet34_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
tsa_resnet34_imagenet_df['# episodes'] = 600
tsa_resnet34_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [63.73, 0.99],
    [82.58, 1.11],
    [80.13, 1.01],
    [83.39, 0.80],
    [79.61, 0.68],
    [71.03, 0.84],
    [51.38, 1.17],
    [94.05, 0.45],
    [81.71, 0.95],
    [61.67, 0.95],
]
tsa_resnet34_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),63.73,0.99,600
Omniglot,82.58,1.11,600
Aircraft,80.13,1.01,600
Birds,83.39,0.8,600
Textures,79.61,0.68,600
QuickDraw,71.03,0.84,600
Fungi,51.38,1.17,600
VGG Flower,94.05,0.45,600
Traffic signs,81.71,0.95,600


## Results from Liu et al. (2021)

Yanbin Liu, Juho Lee, Linchao Zhu, Ling Chen, Humphrey Shi and Yi Yang. A Multi-Mode Modulator for Multi-Domain Few-Shot Classification. ICCV 2021.

In [49]:
ref = ("Liu et al. (2021b)",
       "Yanbin Liu, Juho Lee, Linchao Zhu, Ling Chen, Humphrey Shi, Yi Yang; "
       "[_A Multi-Mode Modulator for Multi-Domain Few-Shot Classification_]"
       "(https://openaccess.thecvf.com/content/ICCV2021/papers/Liu_A_Multi-Mode_Modulator_for_Multi-Domain_Few-Shot_Classification_ICCV_2021_paper.pdf); "
       "ICCV 2021.")
references.append(ref)

In [50]:
triM_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
triM_all_df['# episodes'] = 600
triM_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [58.6, 1.0],
    [92.0, 0.6],
    [82.8, 0.7],
    [75.3, 0.8],
    [71.2, 0.8],
    [77.3, 0.7],
    [48.5, 1.0],
    [90.5, 0.5],
    [63.0, 1.0],
    [52.8, 1.1]
]
triM_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),58.6,1.0,600
Omniglot,92.0,0.6,600
Aircraft,82.8,0.7,600
Birds,75.3,0.8,600
Textures,71.2,0.8,600
QuickDraw,77.3,0.7,600
Fungi,48.5,1.0,600
VGG Flower,90.5,0.5,600
Traffic signs,63.0,1.0,600


## Results from Bateni et al. (2022b)

Peyman Bateni, Jarred Barber, Raghav Goyal, Vaden Masrani, Jan-Willem van de Meent, Leonid Sigal, Frank Wood. Beyond Simple Meta-Learning: Multi-Purpose Models for Multi-Domain, Active and Continual Few-Shot Learning. arXiv.

In [51]:
ref = ("Bateni et al. (2022b)",
       "Bateni Peyman, Jarred Barber, Raghav Goyal, Vaden Masrani, Jan-Willem van de Meent, Leonid Sigal, and Frank Wood.; "
       "[_Beyond Simple Meta-Learning: Multi-Purpose Models for Multi-Domain, Active and Continual Few-Shot Learning._]"
       "(https://arxiv.org/abs/2201.05151); "
       "arXiv 2022.")
references.append(ref)

In [52]:
simple_cnaps_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
simple_cnaps_imagenet_df['# episodes'] = 600
simple_cnaps_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [54.8, 1.2],
    [62.0, 1.3],
    [49.2, 0.9],
    [66.5, 1.0],
    [71.6, 0.7],
    [56.6, 1.0],
    [37.5, 1.2],
    [82.1, 0.9],
    [63.1, 1.1],
    [45.8, 1.0],
]
simple_cnaps_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),54.8,1.2,600
Omniglot,62.0,1.3,600
Aircraft,49.2,0.9,600
Birds,66.5,1.0,600
Textures,71.6,0.7,600
QuickDraw,56.6,1.0,600
Fungi,37.5,1.2,600
VGG Flower,82.1,0.9,600
Traffic signs,63.1,1.1,600


In [53]:
transductive_cnaps_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
transductive_cnaps_imagenet_df['# episodes'] = 600
transductive_cnaps_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
    [54.1, 1.1],
    [62.9, 1.3],
    [48.4, 0.9],
    [67.3, 0.9],
    [72.5, 0.7],
    [58.0, 1.0],
    [37.7, 1.1],
    [82.8, 0.8],
    [61.8, 1.1],
    [45.8, 1.0],
]
transductive_cnaps_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),54.1,1.1,600
Omniglot,62.9,1.3,600
Aircraft,48.4,0.9,600
Birds,67.3,0.9,600
Textures,72.5,0.7,600
QuickDraw,58.0,1.0,600
Fungi,37.7,1.1,600
VGG Flower,82.8,0.8,600
Traffic signs,61.8,1.1,600


## Results from Perera & Halgamuge (2024)

Rashindrie Perera and Saman Halgamuge. Discriminative Sample-Guided and Parameter-Efficient Feature Space Adaptation for Cross-Domain Few-Shot Learning. To appear in CVPR 2024.

In [54]:
ref = ("Perera & Halgamuge (2024)",
       "Rashindrie Perera, Saman Halgamuge;"
       "[Discriminative Sample-Guided and Parameter-Efficient Feature Space Adaptation for Cross-Domain Few-Shot Learning]"
       "(https://arxiv.org/abs/2403.04492);"
       "To appear in CVPR 2024.")
references.append(ref)

In [55]:
dipa_imagenet_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
dipa_imagenet_df['# episodes'] = 600
dipa_imagenet_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
   [71.4, 0.9],
   [84.3, 1.2],
   [86.7, 1.0],
   [88.2, 0.9],
   [87.1, 0.6],
   [74.6, 0.8],
   [61.4, 1.2],
   [97.4, 0.4],
   [88.9, 1.0],
   [65.2, 1.0],
]
dipa_imagenet_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),71.4,0.9,600
Omniglot,84.3,1.2,600
Aircraft,86.7,1.0,600
Birds,88.2,0.9,600
Textures,87.1,0.6,600
QuickDraw,74.6,0.8,600
Fungi,61.4,1.2,600
VGG Flower,97.4,0.4,600
Traffic signs,88.9,1.0,600


In [56]:
dipa_all_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
dipa_all_df['# episodes'] = 600
dipa_all_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [
   [70.9 ,1.0],
  [84.7 , 1.1],
  [86.3 , 1.0],
  [90.8 , 0.8],
  [88.6 , 0.5],
  [75.3 , 0.8],
  [66.6 , 1.1],
  [97.9 , 0.3],
  [91.3 , 1.0],
  [64.8 , 1.0],
]
dipa_all_df

Unnamed: 0,mean (%),95% CI,# episodes
ILSVRC (valid),,,600
ILSVRC (test),70.9,1.0,600
Omniglot,84.7,1.1,600
Aircraft,86.3,1.0,600
Birds,90.8,0.8,600
Textures,88.6,0.5,600
QuickDraw,75.3,0.8,600
Fungi,66.6,1.1,600
VGG Flower,97.9,0.3,600
Traffic signs,91.3,1.0,600


## Template to add a new paper

```
ref = ("Author et al. (year)",
       "First Author, Second Author, Last Author; "
       "[_Title of Paper_](https://paper.url/); "
       "Venue year.")
references.append(ref)
# display.display(display.Markdown(ref[1]))
```

### Template to add a new model

```
<model_name>_<train_source>_df = pd.DataFrame(
    columns=['mean (%)', '95% CI', '# episodes'],
    index=datasets
)
<model_name>_<train_source>_df['# episodes'] = ...
<model_name>_<train_source>_df.loc[datasets[1:], ['mean (%)', '95% CI']] = [...]
```

## Aggregate in table

In [57]:
imagenet_dfs = {
    'k-NN': baseline_imagenet_df,
    'Finetune': baselineft_imagenet_df,
    'MatchingNet': matching_imagenet_df,
    'ProtoNet': prototypical_imagenet_df,
    'fo-MAML': maml_imagenet_df,
    'RelationNet': relationnet_imagenet_df,
    'fo-Proto-MAML': protomaml_imagenet_df,
    'ALFA+fo-Proto-MAML': alfa_protomaml_imagenet_df,
    'ProtoNet (large)': protonet_large_imagenet_df,
    'CTX': ctx_imagenet_df,
    'BOHB': bohb_imagenet_df,
    'SimpleCNAPS': simple_cnaps_imagenet_df,
    'TransductiveCNAPS': transductive_cnaps_imagenet_df,
    'TSA_resnet18': tsa_resnet18_imagenet_df,
    'TSA_resnet34': tsa_resnet34_imagenet_df,
    'DIPA': dipa_imagenet_df,
}

In [58]:
imagenet_df = pd.concat(
    imagenet_dfs.values(),
    axis=1,
    keys=imagenet_dfs.keys())
imagenet_df

Unnamed: 0_level_0,k-NN,k-NN,k-NN,Finetune,Finetune,Finetune,MatchingNet,MatchingNet,MatchingNet,ProtoNet,...,TransductiveCNAPS,TSA_resnet18,TSA_resnet18,TSA_resnet18,TSA_resnet34,TSA_resnet34,TSA_resnet34,DIPA,DIPA,DIPA
Unnamed: 0_level_1,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes,mean (%),...,# episodes,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes
ILSVRC (valid),,,600,,,600,,,600,,...,600,,,600,,,600,,,600
ILSVRC (test),41.03,1.01,600,45.78,1.1,600,45.0,1.1,600,50.5,...,600,59.5,1.1,600,63.73,0.99,600,71.4,0.9,600
Omniglot,37.07,1.15,600,60.85,1.58,600,52.27,1.28,600,59.98,...,600,78.2,1.2,600,82.58,1.11,600,84.3,1.2,600
Aircraft,46.81,0.89,600,68.69,1.26,600,48.97,0.93,600,53.1,...,600,72.2,1.0,600,80.13,1.01,600,86.7,1.0,600
Birds,50.13,1.0,600,57.31,1.26,600,62.21,0.95,600,68.79,...,600,74.9,0.9,600,83.39,0.8,600,88.2,0.9,600
Textures,66.36,0.75,600,69.05,0.9,600,64.15,0.85,600,66.56,...,600,77.3,0.7,600,79.61,0.68,600,87.1,0.6,600
QuickDraw,32.06,1.08,600,42.6,1.17,600,42.87,1.09,600,48.96,...,600,67.6,0.9,600,71.03,0.84,600,74.6,0.8,600
Fungi,36.16,1.02,600,38.2,1.02,600,33.97,1.0,600,39.71,...,600,44.7,1.0,600,51.38,1.17,600,61.4,1.2,600
VGG Flower,83.1,0.68,600,85.51,0.68,600,80.13,0.71,600,85.27,...,600,90.9,0.6,600,94.05,0.45,600,97.4,0.4,600
Traffic signs,44.59,1.19,600,66.79,1.31,600,47.8,1.14,600,47.12,...,600,82.5,0.8,600,81.71,0.95,600,88.9,1.0,600


In [59]:
all_dfs = {
    'k-NN': baseline_all_df,
    'Finetune': baselineft_all_df,
    'MatchingNet': matching_all_df,
    'ProtoNet': prototypical_all_df,
    'fo-MAML': maml_all_df,
    'RelationNet': relationnet_all_df,
    'fo-Proto-MAML': protomaml_all_df,
    'CNAPs': cnaps_all_df,
    'SUR': sur_all_df,
    'SUR-pnf': sur_pnf_all_df,
    'SimpleCNAPS': simple_cnaps_all_df,
    'TransductiveCNAPS': transductive_cnaps_all_df,
    'URT': urt_all_df,
    'URT-pf': urt_pf_all_df,
    'FLUTE': flute_all_df,
    'URL': url_all_df,
    'TSA': tsa_all_df,
    'TriM': triM_all_df,
    'DIPA': dipa_all_df,
}
all_df = pd.concat(
    all_dfs.values(),
    axis=1,
    keys=all_dfs.keys())
all_df

Unnamed: 0_level_0,k-NN,k-NN,k-NN,Finetune,Finetune,Finetune,MatchingNet,MatchingNet,MatchingNet,ProtoNet,...,URL,TSA,TSA,TSA,TriM,TriM,TriM,DIPA,DIPA,DIPA
Unnamed: 0_level_1,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes,mean (%),...,# episodes,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes,mean (%),95% CI,# episodes
ILSVRC (valid),,,600,,,600,,,600,,...,600,,,600,,,600,,,600
ILSVRC (test),38.55,0.94,600,43.08,1.08,600,36.08,1.0,600,44.5,...,600,57.35,1.05,600,58.6,1.0,600,70.9,1.0,600
Omniglot,74.6,1.08,600,71.11,1.37,600,78.25,1.01,600,79.56,...,600,94.96,0.38,600,92.0,0.6,600,84.7,1.1,600
Aircraft,64.98,0.82,600,72.03,1.07,600,69.17,0.96,600,71.14,...,600,89.33,0.44,600,82.8,0.7,600,86.3,1.0,600
Birds,66.35,0.92,600,59.82,1.15,600,56.4,1.0,600,67.01,...,600,81.42,0.74,600,75.3,0.8,600,90.8,0.8,600
Textures,63.58,0.79,600,69.14,0.85,600,61.8,0.74,600,65.18,...,600,76.74,0.72,600,71.2,0.8,600,88.6,0.5,600
QuickDraw,44.88,1.05,600,47.05,1.16,600,60.81,1.03,600,64.88,...,600,82.01,0.57,600,77.3,0.7,600,75.3,0.8,600
Fungi,37.12,1.06,600,38.16,1.04,600,33.7,1.04,600,40.26,...,600,67.4,0.99,600,48.5,1.0,600,66.6,1.1,600
VGG Flower,83.47,0.61,600,85.28,0.69,600,81.9,0.72,600,86.85,...,600,92.18,0.52,600,90.5,0.5,600,97.9,0.3,600
Traffic signs,40.11,1.1,600,66.74,1.23,600,55.57,1.08,600,46.48,...,600,83.55,0.9,600,63.0,1.0,600,91.3,1.0,600


In [60]:
models_df = pd.DataFrame.from_dict(
    orient='index',
    columns=["ref"],
    data={
        'k-NN': 'Triantafillou et al. (2020)',
        'Finetune': 'Triantafillou et al. (2020)',
        'MatchingNet': 'Triantafillou et al. (2020)',
        'ProtoNet': 'Triantafillou et al. (2020)',
        'fo-MAML': 'Triantafillou et al. (2020)',
        'RelationNet': 'Triantafillou et al. (2020)',
        'fo-Proto-MAML': 'Triantafillou et al. (2020)',
        'CNAPs': 'Requeima et al. (2019)',
        'ALFA+fo-Proto-MAML': 'Baik et al. (2020)',
        'ProtoNet (large)': 'Doersch et al. (2020)',
        'CTX': 'Doersch et al. (2020)',
        'BOHB': 'Saikia et al. (2020)',
        'SUR': 'Dvornik et al. (2020)',
        'SUR-pnf': 'Dvornik et al. (2020)',
        'URT': 'Liu et al. (2021a)',
        'URT-pf': 'Liu et al. (2021a)',
        'FLUTE': 'Triantafillou et al. (2021)',
        'URL': 'Li et al. (2021a)',
        'TSA': 'Li et al. (2021b)',
        'TSA_resnet18': 'Li et al. (2021b)',
        'TSA_resnet34': 'Li et al. (2021b)',
        'TriM': 'Liu et al. (2021b)',
        'SimpleCNAPS': ['Bateni et al. (2022b)', 'Bateni et al. (2020)'],
        'TransductiveCNAPS': ['Bateni et al. (2022b)', 'Bateni et al. (2022a)'],
        'DIPA': 'Perera & Halgamuge (2024)',
        })
models_df

Unnamed: 0,ref
k-NN,Triantafillou et al. (2020)
Finetune,Triantafillou et al. (2020)
MatchingNet,Triantafillou et al. (2020)
ProtoNet,Triantafillou et al. (2020)
fo-MAML,Triantafillou et al. (2020)
RelationNet,Triantafillou et al. (2020)
fo-Proto-MAML,Triantafillou et al. (2020)
CNAPs,Requeima et al. (2019)
ALFA+fo-Proto-MAML,Baik et al. (2020)
ProtoNet (large),Doersch et al. (2020)


### Add stddev

In [61]:
def add_stddev(df):
  # Extract original order of labels
  datasets = df.index
  models = df.columns.levels[0]
  # Have only one result (mean, CI, ...) per row
  stacked_df = df.stack(0)
  # Add 'stddev' as column
  stacked_df['stddev'] = stacked_df['95% CI'] * np.sqrt(stacked_df['# episodes']) / 1.96
  # Reshape and put back in original order
  new_df = stacked_df.unstack().swaplevel(0, 1, axis=1)
  new_df = new_df.loc[datasets][models]
  return new_df

In [62]:
imagenet_df = add_stddev(imagenet_df)
imagenet_df

Unnamed: 0_level_0,k-NN,k-NN,k-NN,k-NN,Finetune,Finetune,Finetune,Finetune,MatchingNet,MatchingNet,...,TSA_resnet18,TSA_resnet18,TSA_resnet34,TSA_resnet34,TSA_resnet34,TSA_resnet34,DIPA,DIPA,DIPA,DIPA
Unnamed: 0_level_1,# episodes,95% CI,mean (%),stddev,# episodes,95% CI,mean (%),stddev,# episodes,95% CI,...,mean (%),stddev,# episodes,95% CI,mean (%),stddev,# episodes,95% CI,mean (%),stddev
ILSVRC (valid),600,,,,600,,,,600,,...,,,600,,,,600,,,
ILSVRC (test),600,1.01,41.03,12.622371,600,1.1,45.78,13.747136,600,1.1,...,59.5,13.747136,600,0.99,63.73,12.372423,600,0.9,71.4,11.247657
Omniglot,600,1.15,37.07,14.372006,600,1.58,60.85,19.745887,600,1.28,...,78.2,14.996876,600,1.11,82.58,13.87211,600,1.2,84.3,14.996876
Aircraft,600,0.89,46.81,11.122683,600,1.26,68.69,15.74672,600,0.93,...,72.2,12.497397,600,1.01,80.13,12.622371,600,1.0,86.7,12.497397
Birds,600,1.0,50.13,12.497397,600,1.26,57.31,15.74672,600,0.95,...,74.9,11.247657,600,0.8,83.39,9.997917,600,0.9,88.2,11.247657
Textures,600,0.75,66.36,9.373047,600,0.9,69.05,11.247657,600,0.85,...,77.3,8.748178,600,0.68,79.61,8.49823,600,0.6,87.1,7.498438
QuickDraw,600,1.08,32.06,13.497188,600,1.17,42.6,14.621954,600,1.09,...,67.6,11.247657,600,0.84,71.03,10.497813,600,0.8,74.6,9.997917
Fungi,600,1.02,36.16,12.747345,600,1.02,38.2,12.747345,600,1.0,...,44.7,12.497397,600,1.17,51.38,14.621954,600,1.2,61.4,14.996876
VGG Flower,600,0.68,83.1,8.49823,600,0.68,85.51,8.49823,600,0.71,...,90.9,7.498438,600,0.45,94.05,5.623828,600,0.4,97.4,4.998959
Traffic signs,600,1.19,44.59,14.871902,600,1.31,66.79,16.37159,600,1.14,...,82.5,9.997917,600,0.95,81.71,11.872527,600,1.0,88.9,12.497397


In [63]:
all_df = add_stddev(all_df)
all_df

Unnamed: 0_level_0,k-NN,k-NN,k-NN,k-NN,Finetune,Finetune,Finetune,Finetune,MatchingNet,MatchingNet,...,TSA,TSA,TriM,TriM,TriM,TriM,DIPA,DIPA,DIPA,DIPA
Unnamed: 0_level_1,# episodes,95% CI,mean (%),stddev,# episodes,95% CI,mean (%),stddev,# episodes,95% CI,...,mean (%),stddev,# episodes,95% CI,mean (%),stddev,# episodes,95% CI,mean (%),stddev
ILSVRC (valid),600,,,,600,,,,600,,...,,,600,,,,600,,,
ILSVRC (test),600,0.94,38.55,11.747553,600,1.08,43.08,13.497188,600,1.0,...,57.35,13.122266,600,1.0,58.6,12.497397,600,1.0,70.9,12.497397
Omniglot,600,1.08,74.6,13.497188,600,1.37,71.11,17.121433,600,1.01,...,94.96,4.749011,600,0.6,92.0,7.498438,600,1.1,84.7,13.747136
Aircraft,600,0.82,64.98,10.247865,600,1.07,72.03,13.372214,600,0.96,...,89.33,5.498855,600,0.7,82.8,8.748178,600,1.0,86.3,12.497397
Birds,600,0.92,66.35,11.497605,600,1.15,59.82,14.372006,600,1.0,...,81.42,9.248074,600,0.8,75.3,9.997917,600,0.8,90.8,9.997917
Textures,600,0.79,63.58,9.872943,600,0.85,69.14,10.622787,600,0.74,...,76.74,8.998126,600,0.8,71.2,9.997917,600,0.5,88.6,6.248698
QuickDraw,600,1.05,44.88,13.122266,600,1.16,47.05,14.49698,600,1.03,...,82.01,7.123516,600,0.7,77.3,8.748178,600,0.8,75.3,9.997917
Fungi,600,1.06,37.12,13.24724,600,1.04,38.16,12.997293,600,1.04,...,67.4,12.372423,600,1.0,48.5,12.497397,600,1.1,66.6,13.747136
VGG Flower,600,0.61,83.47,7.623412,600,0.69,85.28,8.623204,600,0.72,...,92.18,6.498646,600,0.5,90.5,6.248698,600,0.3,97.9,3.749219
Traffic signs,600,1.1,40.11,13.747136,600,1.23,66.74,15.371798,600,1.08,...,83.55,11.247657,600,1.0,63.0,12.497397,600,1.0,91.3,12.497397


### Add rankings

In [64]:
def is_difference_significant(best_stats, candidate_stats):
  # compute a 95% confidence for the difference of means.
  ci = 1.96 * np.sqrt((best_stats['stddev'] ** 2) / best_stats['# episodes'] +
                      (candidate_stats['stddev'] ** 2) / candidate_stats['# episodes'])
  diff_of_means = best_stats['mean (%)'] - candidate_stats['mean (%)']
  return np.abs(diff_of_means) > ci

In [65]:
def compute_ranks(dataset_series):
  dataset_df = dataset_series.unstack()
  n_models = len(dataset_df.index)
  remaining_models = list(dataset_df.index)
  next_available_rank = 1
  ranks = {}
  # Iteratively pick the best models, then all the ones statistically equivalent
  while remaining_models:
    accuracies = dataset_df.loc[remaining_models]['mean (%)'].astype('d')
    # best_model = accuracies.idxmax(axis=1)  #previous
    best_model = accuracies.idxmax(axis=0)
    best_stats = dataset_df.loc[best_model]
    tied_models = [best_model]
    potential_tied_models = [model for model in remaining_models
                             if model != best_model]
    for candidate in potential_tied_models:
      candidate_stats = dataset_df.loc[candidate]
      if not is_difference_significant(best_stats, candidate_stats):
        tied_models.append(candidate)

    n_ties = len(tied_models)
    # All tied models share the same rank, which is the average of the next
    # `n_ties` available ranks (the ranks they would have without the ties), or
    # next_available_rank + (1 + ... + (n_ties - 1)) / n_ties, which gives:
    shared_rank = next_available_rank + (n_ties - 1) / 2
    next_available_rank += n_ties
    for model in tied_models:
      ranks[model] = shared_rank

    # Remove picked models for next iteration
    remaining_models = [model for model in remaining_models
                        if model not in tied_models]
  return pd.Series(ranks, name='rank')

In [66]:
def add_ranks(df):
  # Get ranks as a data frame (ignore "ILSVRC (valid)")
  ranks = df[1:].apply(compute_ranks, axis=1)
  # Set the columns as (model, 'rank') Multi-index
  ranks = pd.concat([ranks], axis=1, keys=['rank']).swaplevel(0, 1, axis=1)
  # Concatenate with the original dataframe and defrag columns
  new_df = pd.concat([df, ranks], axis=1)[df.columns.levels[0]]
  return new_df

In [67]:
imagenet_df = add_ranks(imagenet_df)
imagenet_df

Unnamed: 0_level_0,ALFA+fo-Proto-MAML,ALFA+fo-Proto-MAML,ALFA+fo-Proto-MAML,ALFA+fo-Proto-MAML,ALFA+fo-Proto-MAML,BOHB,BOHB,BOHB,BOHB,BOHB,...,fo-Proto-MAML,fo-Proto-MAML,fo-Proto-MAML,fo-Proto-MAML,fo-Proto-MAML,k-NN,k-NN,k-NN,k-NN,k-NN
Unnamed: 0_level_1,# episodes,95% CI,mean (%),stddev,rank,# episodes,95% CI,mean (%),stddev,rank,...,# episodes,95% CI,mean (%),stddev,rank,# episodes,95% CI,mean (%),stddev,rank
ILSVRC (valid),600,,,,,600,,,,,...,600,,,,,600,,,,
ILSVRC (test),600,1.11,52.8,13.87211,8.5,600,1.05,51.92,13.122266,8.5,...,600,1.05,49.53,13.122266,10.5,600,1.01,41.03,12.622371,15.0
Omniglot,600,1.51,61.87,18.871069,8.5,600,1.21,67.57,15.12185,5.5,...,600,1.33,63.37,16.621538,8.5,600,1.15,37.07,14.372006,16.0
Aircraft,600,1.1,63.43,13.747136,6.0,600,0.9,54.12,11.247657,10.5,...,600,0.99,55.95,12.372423,8.5,600,0.89,46.81,11.122683,15.0
Birds,600,1.05,69.75,13.122266,6.5,600,0.9,70.69,11.247657,6.5,...,600,0.96,68.66,11.997501,8.5,600,1.0,50.13,12.497397,15.5
Textures,600,0.88,70.78,10.997709,7.0,600,0.76,68.34,9.498021,9.5,...,600,0.83,66.49,10.372839,13.0,600,0.75,66.36,9.373047,13.0
QuickDraw,600,1.16,59.17,14.49698,5.5,600,1.04,50.33,12.997293,9.5,...,600,1.0,51.52,12.497397,9.5,600,1.08,32.06,13.497188,16.0
Fungi,600,1.17,41.49,14.621954,6.5,600,1.12,41.38,13.997084,6.5,...,600,1.14,39.96,14.247032,6.5,600,1.02,36.16,12.747345,13.0
VGG Flower,600,0.77,85.96,9.622995,9.0,600,0.59,87.34,7.373464,6.0,...,600,0.69,87.15,8.623204,6.0,600,0.68,83.1,8.49823,12.0
Traffic signs,600,1.29,60.78,16.121642,8.0,600,1.04,51.8,12.997293,10.5,...,600,1.09,48.83,13.622162,12.5,600,1.19,44.59,14.871902,15.0


In [68]:
all_df = add_ranks(all_df)
all_df

Unnamed: 0_level_0,CNAPs,CNAPs,CNAPs,CNAPs,CNAPs,DIPA,DIPA,DIPA,DIPA,DIPA,...,fo-Proto-MAML,fo-Proto-MAML,fo-Proto-MAML,fo-Proto-MAML,fo-Proto-MAML,k-NN,k-NN,k-NN,k-NN,k-NN
Unnamed: 0_level_1,# episodes,95% CI,mean (%),stddev,rank,# episodes,95% CI,mean (%),stddev,rank,...,# episodes,95% CI,mean (%),stddev,rank,# episodes,95% CI,mean (%),stddev,rank
ILSVRC (valid),600,,,,,600,,,,,...,600,,,,,600,,,,
ILSVRC (test),600,1.1,50.8,13.747136,11.5,600,1.0,70.9,12.497397,1.0,...,600,1.05,46.52,13.122266,13.0,600,0.94,38.55,11.747553,16.5
Omniglot,600,0.5,91.7,6.248698,8.0,600,1.1,84.7,13.747136,13.5,...,600,0.97,82.69,12.122475,15.0,600,1.08,74.6,13.497188,18.0
Aircraft,600,0.6,83.7,7.498438,8.5,600,1.0,86.3,12.497397,3.5,...,600,0.76,75.23,9.498021,14.0,600,0.82,64.98,10.247865,19.0
Birds,600,0.9,73.6,11.247657,11.0,600,0.8,90.8,9.997917,1.0,...,600,1.02,69.88,12.747345,12.5,600,0.92,66.35,11.497605,14.5
Textures,600,0.7,59.5,8.748178,18.0,600,0.5,88.6,6.248698,1.0,...,600,0.81,68.25,10.122891,11.5,600,0.79,63.58,9.872943,15.5
QuickDraw,600,0.8,74.7,9.997917,12.0,600,0.8,75.3,9.997917,10.5,...,600,0.94,66.84,11.747553,13.0,600,1.05,44.88,13.122266,19.0
Fungi,600,1.1,50.2,13.747136,8.5,600,1.1,66.6,13.747136,3.0,...,600,1.17,41.99,14.621954,13.0,600,1.06,37.12,13.24724,15.5
VGG Flower,600,0.5,88.9,6.248698,11.0,600,0.3,97.9,3.749219,1.0,...,600,0.67,88.72,8.373256,11.0,600,0.61,83.47,7.623412,15.5
Traffic signs,600,1.1,56.5,13.747136,9.5,600,1.0,91.3,12.497397,1.0,...,600,1.08,52.42,13.497188,12.5,600,1.1,40.11,13.747136,18.0


In [69]:
imagenet_df.xs('rank', axis=1, level=1).mean()

ALFA+fo-Proto-MAML     7.10
BOHB                   7.85
CTX                    2.75
DIPA                   1.00
Finetune              10.45
MatchingNet           13.55
ProtoNet              10.75
ProtoNet (large)       7.25
RelationNet           15.55
SimpleCNAPS            8.75
TSA_resnet18           3.80
TSA_resnet34           2.50
TransductiveCNAPS      8.60
fo-MAML               12.25
fo-Proto-MAML          9.25
k-NN                  14.60
dtype: float64

In [70]:
all_df.xs('rank', axis=1, level=1).mean()

CNAPs                11.15
DIPA                  3.65
FLUTE                 6.65
Finetune             14.10
MatchingNet          16.40
ProtoNet             14.50
RelationNet          17.70
SUR                   8.45
SUR-pnf               9.00
SimpleCNAPS           8.25
TSA                   2.25
TransductiveCNAPS     6.85
TriM                  7.40
URL                   2.75
URT                   6.85
URT-pf                8.40
fo-MAML              16.15
fo-Proto-MAML        12.65
k-NN                 16.85
dtype: float64

### Display in HTML
This section uses the DataFrame's "styler" object, which renders nicely within the notebook.

Unfortunately, the HTML it outputs is not compatible with GitHub's markdown (as it relies on the `<style>` tag).

In [71]:
def str_summary(series):
  # Summarize each (episode, model) by a single cell
  # Non-breaking space to keep things on the same line
  nbsp = '\u00A0'
  string = '%(acc)s±%(ci)s%(nbsp)s(%(rank)g)' % {
      'acc': series['mean (%)'],
      'ci': series['95% CI'],
      'rank': series['rank'],
      'nbsp': nbsp
  }
  return string

In [72]:
def display_table(df, models=None):
  accuracies_df = df.stack(0).apply(str_summary, axis=1).unstack(0)[df.index[1:]]
  rank_df = df.xs('rank', axis=1, level=1).loc[df.index[1:]]
  avg_rank_df = pd.DataFrame(rank_df.mean(), columns=['Avg rank'])
  display_df = pd.concat([avg_rank_df, accuracies_df], axis=1)
  if models:
    # Try and force a particular order of models
    display_df = display_df.loc[models]

  # Bold cells corresponding to the best rank
  best_acc_mask = rank_df.T == rank_df.min(axis=1)
  best_avg_mask = avg_rank_df == avg_rank_df.min()
  best_mask = pd.concat([best_avg_mask, best_acc_mask], axis=1)
  if models:
    best_mask = best_mask.loc[models]
  bold_mask = best_mask.applymap(lambda v: 'font-weight: bold' if v else '')

  display_style = display_df.style.apply(lambda f: bold_mask, axis=None)
  display_style = display_style.format({'Avg rank': '{:g}'})
  return display_style

In [73]:
imagenet_display = display_table(imagenet_df, models=imagenet_dfs.keys())
imagenet_display

Unnamed: 0,Avg rank,ILSVRC (test),Omniglot,Aircraft,Birds,Textures,QuickDraw,Fungi,VGG Flower,Traffic signs,MSCOCO
k-NN,14.6,41.03±1.01 (15),37.07±1.15 (16),46.81±0.89 (15),50.13±1.0 (15.5),66.36±0.75 (13),32.06±1.08 (16),36.16±1.02 (13),83.1±0.68 (12),44.59±1.19 (15),30.38±0.99 (15.5)
Finetune,10.45,45.78±1.1 (13),60.85±1.58 (11.5),68.69±1.26 (5),57.31±1.26 (14),69.05±0.9 (9.5),42.6±1.17 (13.5),38.2±1.02 (11),85.51±0.68 (9),66.79±1.31 (5),34.86±0.97 (13)
MatchingNet,13.55,45.0±1.1 (13),52.27±1.28 (14),48.97±0.93 (13),62.21±0.95 (12.5),64.15±0.85 (15),42.87±1.09 (13.5),33.97±1.0 (14),80.13±0.71 (15),47.8±1.14 (12.5),34.99±1.0 (13)
ProtoNet,10.75,50.5±1.08 (10.5),59.98±1.35 (11.5),53.1±1.0 (10.5),68.79±1.01 (8.5),66.56±0.83 (13),48.96±1.08 (11),39.71±1.11 (9),85.27±0.77 (9),47.12±1.1 (14),41.0±1.1 (10.5)
fo-MAML,12.25,45.51±1.11 (13),55.55±1.54 (13),56.24±1.11 (8.5),63.61±1.06 (12.5),68.04±0.81 (9.5),43.96±1.29 (13.5),32.1±1.1 (15),81.74±0.83 (14),50.93±1.51 (10.5),35.3±1.23 (13)
RelationNet,15.55,34.69±1.01 (16),45.35±1.36 (15),40.73±0.83 (16),49.51±1.05 (15.5),52.97±0.69 (16),43.3±1.08 (13.5),30.55±1.04 (16),68.76±0.83 (16),33.67±1.05 (16),29.15±1.01 (15.5)
fo-Proto-MAML,9.25,49.53±1.05 (10.5),63.37±1.33 (8.5),55.95±0.99 (8.5),68.66±0.96 (8.5),66.49±0.83 (13),51.52±1.0 (9.5),39.96±1.14 (6.5),87.15±0.69 (6),48.83±1.09 (12.5),43.74±1.12 (9)
ALFA+fo-Proto-MAML,7.1,52.8±1.11 (8.5),61.87±1.51 (8.5),63.43±1.1 (6),69.75±1.05 (6.5),70.78±0.88 (7),59.17±1.16 (5.5),41.49±1.17 (6.5),85.96±0.77 (9),60.78±1.29 (8),48.11±1.14 (5.5)
ProtoNet (large),7.25,53.69±1.07 (6),68.5±1.27 (5.5),58.04±0.96 (7),74.07±0.92 (4.5),68.76±0.77 (9.5),53.3±1.06 (8),40.73±1.15 (6.5),86.96±0.73 (6),58.11±1.05 (9),41.7±1.08 (10.5)
CTX,2.75,62.76±0.99 (2.5),82.21±1.0 (2.5),79.49±0.89 (2.5),80.63±0.88 (3),75.57±0.64 (4),72.68±0.82 (2),51.58±1.11 (2.5),95.34±0.37 (2),82.65±0.76 (3),59.9±1.02 (3.5)


In [74]:
# print(imagenet_display.render())

In [75]:
all_display = display_table(all_df, models=all_dfs.keys())
all_display

Unnamed: 0,Avg rank,ILSVRC (test),Omniglot,Aircraft,Birds,Textures,QuickDraw,Fungi,VGG Flower,Traffic signs,MSCOCO
k-NN,16.85,38.55±0.94 (16.5),74.6±1.08 (18),64.98±0.82 (19),66.35±0.92 (14.5),63.58±0.79 (15.5),44.88±1.05 (19),37.12±1.06 (15.5),83.47±0.61 (15.5),40.11±1.1 (18),29.55±0.96 (17)
Finetune,14.1,43.08±1.08 (14.5),71.11±1.37 (19),72.03±1.07 (15.5),59.82±1.15 (17),69.14±0.85 (9.5),47.05±1.16 (18),38.16±1.04 (15.5),85.28±0.69 (14),66.74±1.23 (3),35.17±1.08 (15)
MatchingNet,16.4,36.08±1.0 (18),78.25±1.01 (16.5),69.17±0.96 (17.5),56.4±1.0 (18),61.8±0.74 (17),60.81±1.03 (15.5),33.7±1.04 (18),81.9±0.72 (17),55.57±1.08 (9.5),28.79±0.96 (17)
ProtoNet,14.5,44.5±1.05 (14.5),79.56±1.12 (16.5),71.14±0.86 (15.5),67.01±1.02 (14.5),65.18±0.84 (13.5),64.88±0.89 (14),40.26±1.13 (14),86.85±0.71 (13),46.48±1.0 (16),39.87±1.06 (13.5)
fo-MAML,16.15,37.83±1.01 (16.5),83.92±0.95 (13.5),76.41±0.69 (13),62.43±1.08 (16),64.16±0.83 (15.5),59.73±1.1 (17),33.54±1.11 (18),79.94±0.84 (18),42.91±1.31 (17),29.37±1.08 (17)
RelationNet,17.7,30.89±0.93 (19),86.57±0.79 (12),69.71±0.83 (17.5),54.14±0.99 (19),56.56±0.73 (19),61.75±0.97 (15.5),32.56±1.08 (18),76.08±0.76 (19),37.48±0.93 (19),27.41±0.89 (19)
fo-Proto-MAML,12.65,46.52±1.05 (13),82.69±0.97 (15),75.23±0.76 (14),69.88±1.02 (12.5),68.25±0.81 (11.5),66.84±0.94 (13),41.99±1.17 (13),88.72±0.67 (11),52.42±1.08 (12.5),41.74±1.13 (11)
CNAPs,11.15,50.8±1.1 (11.5),91.7±0.5 (8),83.7±0.6 (8.5),73.6±0.9 (11),59.5±0.7 (18),74.7±0.8 (12),50.2±1.1 (8.5),88.9±0.5 (11),56.5±1.1 (9.5),39.4±1.0 (13.5)
SUR,8.45,56.1±1.1 (8),93.1±0.5 (5.5),84.6±0.7 (6.5),70.6±1.0 (12.5),71.0±0.8 (7.5),81.3±0.6 (4),64.2±1.1 (4.5),82.8±0.8 (15.5),53.4±1.0 (12.5),50.1±1.0 (8)
SUR-pnf,9.0,56.0±1.1 (8),90.0±0.6 (10.5),79.7±0.8 (11.5),75.9±0.9 (8.5),72.5±0.7 (5.5),76.7±0.7 (8.5),49.8±1.1 (8.5),90.0±0.6 (8.5),52.2±0.8 (12.5),50.2±1.1 (8)


In [76]:
# print(all_display.render())

### Display in MarkDown
At least, in GitHub-flavored MarkDown.

In [77]:
def md_render(series):
  # Summarize each (episode, model) by a single cell containing MarkDown
  nbsp = '&nbsp;'
  md_string = '%(bold)s%(acc)5.2f%(bold)s±%(ci)4.2f%(nbsp)s(%(rank)g)' % {
      'acc': series['mean (%)'],
      'ci': series['95% CI'],
      'rank': series['rank'],
      'bold': '**' if series['best_rank'] else '',
      'nbsp': nbsp
  }
  return md_string

In [78]:
def md_table(df, models=None):
  # Whether a model has the best rank on a given dataset
  rank_df = df.xs('rank', axis=1, level=1).loc[df.index[1:]]
  best_rank = pd.concat([rank_df.T == rank_df.min(axis=1)], axis=1,
                        keys=['best_rank']).swaplevel(0, 1, axis=1)
  accuracies_df = df[1:].T.unstack(1)
  accuracies_df = pd.concat([accuracies_df, best_rank], axis=1)
  accuracies_md = accuracies_df.stack(0).apply(md_render, axis=1).unstack(1)

  # Average rank (and whether it's the best)
  avg_rank_df = pd.DataFrame(rank_df.mean(), columns=['Avg rank'])
  best_avg_rank = (avg_rank_df == avg_rank_df.min()).rename(
      columns={'Avg rank': 'best_rank'})
  avg_rank_md = pd.concat([avg_rank_df, best_avg_rank], axis=1).apply(
      lambda s: '%(bold)s%(avg_rank)g%(bold)s' % {
          'avg_rank': s['Avg rank'],
          'bold': '**' if s['best_rank'] else ''
      },
      axis=1).rename('Avg rank')

  # Display method name with a pointer to the reference, defined later.
  ref_to_link = {ref[0]: "[[%i]]" % i for i, ref in enumerate(references, 1)}
  method_md = models_df.apply(lambda r: '['+','.join([ref_to_link[item][1:-1] for item in r['ref']])+']'
                              if (type(r['ref']) is list) else ref_to_link[r['ref']],
                              axis='columns')

  display_md = pd.concat([avg_rank_md, accuracies_md[df.index[1:]]], axis=1)
  if models:
    # Try and force a particular order of models
    display_md = display_md.loc[list(models)]

  # Pad all cells so they align well, 27 chars should be enough
  header_str = '|'.join(['%-27s' % c
                         for c in ['Method'] + list(display_md.columns)])
  sep_str = '|'.join(['-' * 27 for _ in [''] + list(display_md.columns)])
  rows = [
      '|'.join(['%-27s' % c for c in ([' '.join((i, method_md.loc[i]))] +
                                      list(display_md.loc[i]))])
      for i in display_md.index
  ]
  return '\n'.join([header_str, sep_str] + rows)

In [79]:
print(md_table(imagenet_df, models=imagenet_dfs.keys()))

Method                     |Avg rank                   |ILSVRC (test)              |Omniglot                   |Aircraft                   |Birds                      |Textures                   |QuickDraw                  |Fungi                      |VGG Flower                 |Traffic signs              |MSCOCO                     
---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------
k-NN [[1]]                 |14.6                       |41.03±1.01&nbsp;(15)       |37.07±1.15&nbsp;(16)       |46.81±0.89&nbsp;(15)       |50.13±1.00&nbsp;(15.5)     |66.36±0.75&nbsp;(13)       |32.06±1.08&nbsp;(16)       |36.16±1.02&nbsp;(13)       |83.10±0.68&nbsp;(12)       |44.59±1.19&nbsp;(15)       |30.38±0.99&nbsp;(15.

In [80]:
print(md_table(all_df, models=all_dfs.keys()))

Method                     |Avg rank                   |ILSVRC (test)              |Omniglot                   |Aircraft                   |Birds                      |Textures                   |QuickDraw                  |Fungi                      |VGG Flower                 |Traffic signs              |MSCOCO                     
---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------
k-NN [[1]]                 |16.85                      |38.55±0.94&nbsp;(16.5)     |74.60±1.08&nbsp;(18)       |64.98±0.82&nbsp;(19)       |66.35±0.92&nbsp;(14.5)     |63.58±0.79&nbsp;(15.5)     |44.88±1.05&nbsp;(19)       |37.12±1.06&nbsp;(15.5)     |83.47±0.61&nbsp;(15.5)     |40.11±1.10&nbsp;(18)       |29.55±0.96&nbsp;(17)

## Export to MarkDown

### Reference list

In [81]:
def sanitize_anchor(string):
  # Try to mimic the MarkDown function that transforms a section title into an
  # html link anchor, that is:
  # - put it in lower case
  # - remove everything that is not a text character ("\w", which includes "_"),
  #   a space ("\s") or dash ("-")
  # - replace spaces and "_" by "-" (and deduplicate)
  anchor = string.lower()
  anchor = re.sub('[^\w\s-]', '', anchor)
  anchor = re.sub('[\s_-]+', '-', anchor)
  return anchor

In [82]:
def ref_list():
  # Define links from [i] to the reference section
  links = []
  for i, ref in enumerate(references, 1):
    links.append('[%(i)i]: #%(i)i-%(r)s' % dict(
        i=i,
        r=sanitize_anchor(ref[0])))

  references_md = []
  # Content of the reference section
  for i, ref in enumerate(references, 1):
    references_md.append(textwrap.dedent(r'''
      ###### \[%(i)i\] %(shortref)s

      %(fullref)s
    ''') % dict(i=i, shortref=ref[0], fullref=ref[1]))

  return '\n'.join(links + references_md)

In [83]:
print(ref_list())

[1]: #1-triantafillou-et-al-2020
[2]: #2-requeima-et-al-2019
[3]: #3-baik-et-al-2020
[4]: #4-doersch-et-al-2020
[5]: #5-saikia-et-al-2020
[6]: #6-dvornik-et-al-2020
[7]: #7-bateni-et-al-2020
[8]: #8-bateni-et-al-2022a
[9]: #9-liu-et-al-2021a
[10]: #10-triantafillou-et-al-2021
[11]: #11-li-et-al-2021a
[12]: #12-li-et-al-2021b
[13]: #13-liu-et-al-2021b
[14]: #14-bateni-et-al-2022b
[15]: #15-perera-halgamuge-2024

###### \[1\] Triantafillou et al. (2020)

Eleni Triantafillou, Tyler Zhu, Vincent Dumoulin, Pascal Lamblin, Utku Evci, Kelvin Xu, Ross Goroshin, Carles Gelada, Kevin Swersky, Pierre-Antoine Manzagol, Hugo Larochelle; [_Meta-Dataset: A Dataset of Datasets for Learning to Learn from Few Examples_](https://arxiv.org/abs/1903.03096); ICLR 2020.


###### \[2\] Requeima et al. (2019)

James Requeima, Jonathan Gordon, John Bronskill, Sebastian Nowozin, Richard E. Turner; [_Fast and Flexible Multi-Task Classification Using Conditional Neural Adaptive Processes_](https://arxiv.org/abs/19

### Full section

In [84]:
def export_md():
  begin_line = '<!-- Beginning of content generated by `Leaderboard.ipynb` -->'
  end_line = '<!-- End of content generated by `Leaderboard.ipynb` -->'

  parts = [
      begin_line,
      '## Training on ImageNet only',
      md_table(imagenet_df, models=imagenet_dfs.keys()),
      '## Training on all datasets',
      md_table(all_df, models=all_dfs.keys()),
      '## References',
      ref_list(),
      end_line
  ]
  return '\n\n'.join(parts)

In [85]:
print(export_md())

<!-- Beginning of content generated by `Leaderboard.ipynb` -->

## Training on ImageNet only

Method                     |Avg rank                   |ILSVRC (test)              |Omniglot                   |Aircraft                   |Birds                      |Textures                   |QuickDraw                  |Fungi                      |VGG Flower                 |Traffic signs              |MSCOCO                     
---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------|---------------------------
k-NN [[1]]                 |14.6                       |41.03±1.01&nbsp;(15)       |37.07±1.15&nbsp;(16)       |46.81±0.89&nbsp;(15)       |50.13±1.00&nbsp;(15.5)     |66.36±0.75&nbsp;(13)       |32.06±1.08&nbsp;(16)       |36.16±1.02