# Setup

In [1]:
import numpy as np
import pandas as pd
from scipy import stats

In [2]:
data_url = 'https://epochai.org/data/epochdb/all_systems.csv'
dtypes = {
    'Training compute (FLOP)': np.float64,
}
pcd_df = pd.read_csv(data_url, dtype=dtypes)

In [3]:
pcd_df

Unnamed: 0,System,Domain,Organization,Authors,Publication date,Reference,Link,Citations,Notability criteria,Notability criteria notes,...,Training time notes,Hardware quantity,Hardware utilization,Finetune compute (FLOP),Finetune compute notes,Batch size,Compute cost notes,Training cloud compute vendor,Batch size notes,Training data center
0,Sparse digit recognition SVM,Vision,University of Lubeck,"Kai Labusch, Erhadt Barth, Thomas Martinetz",2008-11-19,Simple method for high-performance digit recog...,https://pubmed.ncbi.nlm.nih.gov/19000969/,124.0,SOTA improvement,"""Finally, we train a support vector machine (S...",...,,,,,,,,,,
1,Karakuri LM,Language,KARAKURI Inc.,,2024-01-26,KARAKURI LM,https://huggingface.co/karakuri-ai/karakuri-lm...,,,,...,,,,,,,,,,
2,DOT(S)-RNN,Language,"Aalto University,Université de Montréal","Razvan Pascanu, Caglar Gulcehre, Kyunghyun Cho...",2013-12-20,How to Construct Deep Recurrent Neural Networks,https://arxiv.org/abs/1312.6026,1255.0,Highly cited,,...,,,,,,,,,,
3,KEPLER,Language,"Tsinghua University,Mila- Quebec AI,University...","Xiaozhi Wang, Tianyu Gao, Zhaocheng Zhu, Zhiyu...",2020-11-23,KEPLER: A Unified Model for Knowledge Embeddin...,https://arxiv.org/abs/1911.06136,445.0,SOTA improvement,"""Experimental results show that KEPLER achieve...",...,,,,,,,,,,
4,DOC + Finetune∗ + Partial Shuffle (PTB),Language,,,2019-03-11,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407,ELMo,Language,"University of Washington,Allen Institute for AI","ME Peters, M Neumann, M Iyyer, M Gardner",2018-02-01,Deep contextualized word representations,https://arxiv.org/abs/1802.05365,10486.0,Highly cited,,...,,,,,,,,,,
1408,tsuzumi 7B,Language,NTT Communication Science Laboratories,,2023-12-01,"NTT's Large Language Model ""tsuzumi"" is Here!",https://group.ntt/en/magazine/blog/tsuzumi/,,,,...,,,,,,,,,,
1409,VD-LSTM+REAL Small,Language,,,2016-11-04,,,,,,...,,,,,,,,,,
1410,DLRM-12T,Recommendation,"Meta AI,Carnegie Mellon University (CMU)","Dheevatsa Mudigere, Yuchen Hao, Jianyu Huang, ...",2021-04-12,Software-Hardware Co-design for Fast and Scala...,https://arxiv.org/abs/2104.05158,86.0,,,...,No training details provided.,,,,,,No training details provided.,,,


In [4]:
pcd_df['Publication date'] = pd.to_datetime(pcd_df['Publication date'])

In [5]:
pcd_df.sort_values('Publication date', inplace=True)

In [6]:
pcd_df.dropna(subset=['Publication date', 'Notability criteria', 'Training compute (FLOP)'], inplace=True)

In [7]:
pcd_df

Unnamed: 0,System,Domain,Organization,Authors,Publication date,Reference,Link,Citations,Notability criteria,Notability criteria notes,...,Training time notes,Hardware quantity,Hardware utilization,Finetune compute (FLOP),Finetune compute notes,Batch size,Compute cost notes,Training cloud compute vendor,Batch size notes,Training data center
1127,Theseus,Robotics,Bell Laboratories,Claude Shannon,1950-07-02,Mighty Mouse,https://www.technologyreview.com/2018/12/19/13...,0.0,Historical significance,,...,,,,,,,,,,
626,Perceptron Mark I,Other,"Cornell Aeronautical Laboratory,Cornell Univer...",F Rosenblatt,1957-01-01,The Perceptron—a perceiving and recognizing au...,https://blogs.umass.edu/brain-wars/files/2016/...,1610.0,"Historical significance,Highly cited",First modern neural network,...,,,,,,,,,,
374,Pandemonium (morse),Language,Massachusetts Institute of Technology (MIT),OG Selfridge,1959-02-01,Pandemonium: A Paradigm for Learning,https://aitopics.org/doc/classics:504E1BAC/,1453.0,Highly cited,,...,,,,,,,,,,
1194,Samuel Neural Checkers,Games,IBM,Arthur L. Samuel,1959-07-01,Some studies in machine learning using the gam...,https://ieeexplore.ieee.org/abstract/document/...,4466.0,Highly cited,,...,,,,,,,,,,
415,Perceptron (1960),Vision,Cornell Aeronautical Laboratory,Frank Rosenblatt,1960-03-30,Perceptron Simulation Experiments,https://www.semanticscholar.org/paper/Perceptr...,394.0,Historical significance,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1319,MultiBand Diffusion,Audio,"Meta AI,Hebrew University of Jerusalem,LORIA","Robin San Roman, Yossi Adi, Antoine Deleforge,...",2023-11-08,From Discrete Tokens to High-Fidelity Audio Us...,https://arxiv.org/abs/2308.02560,1.0,SOTA improvement,"""At equal bit rate, the proposed approach outp...",...,around 2 days,,,,,,,,,
1129,Nemotron-3-8B,Language,NVIDIA,,2023-11-15,NVIDIA AI Foundation Models: Build Custom Ente...,https://developer.nvidia.com/blog/nvidia-ai-fo...,,SOTA improvement,"""The Nemotron-3-8B-QA model offers state-of-th...",...,19 days,1024.0,0.34,,,,,,,
833,Inflection-2,Language,Inflection AI,,2023-11-22,Inflection-2: The Next Step Up,https://inflection.ai/inflection-2,,Significant use,Inflection-2 either already powers Pi or soon ...,...,,5000.0,,,,,,,,
306,Qwen-72B,Language,Alibaba,"Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Ka...",2023-11-30,,https://huggingface.co/Qwen/Qwen-72B,,SOTA improvement,"SOTA on several Chinese benchmarks, with highe...",...,,,,,,4000000.0,,,Table 1 https://arxiv.org/abs/2309.16609\n(thi...,


In [10]:
outlier_window_size = 2  # years

# Default large scale systems

https://colab.research.google.com/drive/1PLGY5ErysqQMfy7Z08uIR2cTnnDgSaVR?usp=sharing

In [9]:
start_large_scale_era = '2015-09-01'
high_outliers_z_value_threshold = 0.76

In [None]:
large_scale_idx = set()

for index, row in pcd_df.iterrows():
  # Filter entries in a 2-year window around the paper
  window_size = pd.Timedelta(f'{outlier_window_size*52*7} days')
  half_window_size = window_size / 2
  mask = ( row['Publication date'] - half_window_size <= pcd_df['Publication date'] ) &\
        ( pcd_df['Publication date'] <= row['Publication date'] + half_window_size )
  window_df = pcd_df[mask].copy()

  if len(window_df) < 2: continue

  window_df['Training compute (FLOP) z scores'] = stats.zscore(np.log10(window_df['Training compute (FLOP)'].values))
  if window_df.loc[index, 'Training compute (FLOP) z scores'] > high_outliers_z_value_threshold:
    large_scale_idx.add(index)

large_scale_mask = pcd_df.index.isin(large_scale_idx) & (pcd_df['Publication date'] > start_large_scale_era)

In [None]:
large_scale_df = pcd_df[large_scale_mask]

In [None]:
large_scale_df

Unnamed: 0,System,Domain,Organization,Authors,Publication date,Reference,Link,Citations,Notability criteria,Notability criteria notes,...,Training time notes,Hardware quantity,Hardware utilization,Finetune compute (FLOP),Finetune compute notes,Batch size,Compute cost notes,Training cloud compute vendor,Batch size notes,Training data center
657,AlphaGo Fan,Games,Google DeepMind,"David Silver, Aja Huang, Chris J. Maddison, Ar...",2015-10-01,Mastering the game of Go with deep neural netw...,https://www.nature.com/articles/nature24270.ep...,14733.0,"Highly cited,SOTA improvement",,...,,,,,,,,,,
1333,AlphaGo Lee,Games,DeepMind,"David Silver, Aja Huang, Chris J. Maddison, Ar...",2016-01-27,Mastering the game of Go with deep neural netw...,https://www.nature.com/articles/nature16961,14733.0,Highly cited,,...,,,,,,,,,,
479,GNMT,Language,Google,"Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc ...",2016-09-26,Google's Neural Machine Translation System: Br...,https://arxiv.org/abs/1609.08144,6105.0,Highly cited,,...,,96.0,,,,,,,,
1216,Xception,Vision,Google,François Chollet,2016-10-07,Xception: Deep Learning with Depthwise Separab...,https://arxiv.org/abs/1610.02357,11338.0,Highly cited,,...,"""while the JFT experiments took over one month...",60.0,,,,,,,,
1290,NASv3 (CIFAR-10),Vision,Google Brain,"Barret Zoph, Quoc V. Le",2016-11-05,Neural Architecture Search with Reinforcement ...,https://arxiv.org/abs/1611.01578,4700.0,Highly cited,,...,,800.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1369,ChatGLM3,Multimodal,Zhipu AI,,2023-10-27,Zhipu AI launches third-generation base model,https://www.zhipuai.cn/en/news/76,,SOTA improvement,"Aiming at GPT-4V, ChatGLM3 has implemented ite...",...,,,,,,,,,,
537,Yi-34B,Language,01.AI,,2023-11-02,,https://github.com/01-ai/Yi,,Significant use,2nd most popular model on HuggingFace: https:/...,...,,,,,,,,,,
832,Inflection-2,Language,Inflection AI,,2023-11-22,Inflection-2: The Next Step Up,https://inflection.ai/inflection-2,,Significant use,Inflection-2 either already powers Pi or soon ...,...,,5000.0,,,,,,,,
306,Qwen-72B,Language,Alibaba,"Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Ka...",2023-11-30,,https://huggingface.co/Qwen/Qwen-72B,,SOTA improvement,"SOTA on several Chinese benchmarks, with highe...",...,,,,,,4000000.0,,,Table 1 https://arxiv.org/abs/2309.16609\n(thi...,


In [None]:
for system in large_scale_df['System'][::-1]:
  print(system)

Gemini Ultra
Qwen-72B
Inflection-2
Yi-34B
ChatGLM3
Falcon 180B
Llama 2-34B
Llama 2-70B
Claude 2
xTrimoPGLM -100B
PaLM 2
PanGu-Σ
GPT-4
LLaMA-65B
ViT-22B
GPT-3.5 (text-davinci-003)
Galactica
BLOOM-176B
U-PaLM (540B)
BlenderBot 3
GLM-130B
Minerva (540B)
Parti
UL2
OPT-175B
Flamingo
PaLM (540B)
Chinchilla
LaMDA
GPT-NeoX-20B
AlphaCode
ERNIE 3.0 Titan
GLaM
Gopher (280B)
Yuan 1.0
Megatron-Turing NLG 530B
HyperCLOVA
GOAT
ByT5-XXL
ProtT5-XXL
PLUG
Meta Pseudo Labels
Switch
DALL-E
mT5-XXL
GShard (dense)
iGPT-XL
GPT-3 175B (davinci)
Turing-NLG
Meena
ContextNet + Noisy Student
OpenAI Five Rerun
OpenAI Five
AlphaStar
T5-11B
Megatron-BERT
Megatron-LM (8.3B)
RoBERTa Large
MnasNet-A1 + SSDLite
MnasNet-A3
GPT-2 (1.5B)
BigGAN-deep 512x512
FTW
ResNeXt-101 32x48d
AmoebaNet-A (F=448)
IMPALA
AlphaZero
AlphaGo Zero
OpenAI TI7 DOTA 1v1
JFT
Libratus
AlphaGo Master
NASv3 (CIFAR-10)
Xception
GNMT
AlphaGo Lee
AlphaGo Fan


# Percentiles

In [12]:
frontier_systems_by_percentile = {}
percentile_interval = 5
for percentile in range(95, -5, -percentile_interval):
  print(percentile)
  percentile_compute_low = np.zeros(len(pcd_df))
  percentile_compute_high = np.zeros(len(pcd_df))
  # Iterate through each row and calculate the 2-year moving average for each date
  for i, (index, row) in enumerate(pcd_df.iterrows()):
    # Define the 2-year window
    start_date = row['Publication date'] - pd.DateOffset(years=outlier_window_size/2)
    end_date = row['Publication date'] + pd.DateOffset(years=outlier_window_size/2)

    # Filter the DataFrame for this window
    window_df = pcd_df[(pcd_df['Publication date'] >= start_date) & (pcd_df['Publication date'] <= end_date)]

    percentile_compute_low[i] = np.percentile(window_df['Training compute (FLOP)'], percentile)
    percentile_compute_high[i] = np.percentile(window_df['Training compute (FLOP)'], percentile + percentile_interval)

  frontier_systems_flag = pcd_df['Training compute (FLOP)'] > np.array(percentile_compute_low)
  extra_frontier_systems_flag = pcd_df['Training compute (FLOP)'] <= np.array(percentile_compute_high)

  # raise Exception("Edit the following line if you want to consider models released after 2023-12-31.")
  extra_frontier_systems = pcd_df['System'][frontier_systems_flag & extra_frontier_systems_flag & (pcd_df['Publication date'] > pd.to_datetime('2015-09-30'))].values

  frontier_systems_by_percentile[f"{percentile} to {percentile + percentile_interval}"] = extra_frontier_systems

95
90
85
80
75
70
65
60
55
50
45
40
35
30
25
20
15
10
5
0


In [13]:
frontier_systems_by_percentile

{'95 to 100': array(['GNMT', 'AlphaGo Master', 'AlphaGo Zero', 'AlphaZero',
        'ResNeXt-101 32x48d', 'Megatron-BERT', 'OpenAI Five', 'Meena',
        'GPT-3 175B (davinci)', 'Megatron-Turing NLG 530B',
        'ERNIE 3.0 Titan', 'PaLM (540B)', 'Minerva (540B)', 'GPT-4',
        'PaLM 2', 'Inflection-2', 'Gemini Ultra'], dtype=object),
 '90 to 95': array(['NASv3 (CIFAR-10)', 'FTW', 'T5-11B', 'AlphaStar', 'mT5-XXL',
        'Switch', 'Gopher (280B)', 'Chinchilla', 'U-PaLM (540B)',
        'GPT-3.5 (text-davinci-003)', 'Claude 2', 'Falcon 180B'],
       dtype=object),
 '85 to 90': array(['AlphaGo Fan', 'AlphaGo Lee', 'BigGAN-deep 512x512',
        'Megatron-LM (8.3B)', 'OpenAI Five Rerun', 'Turing-NLG',
        'Yuan 1.0', 'GLaM', 'LaMDA', 'OPT-175B', 'Parti', 'Llama 2-70B',
        'ChatGLM3', 'Qwen-72B'], dtype=object),
 '80 to 85': array(['JFT', 'OpenAI TI7 DOTA 1v1', 'AmoebaNet-A (F=448)',
        'GPT-2 (1.5B)', 'iGPT-XL', 'DALL-E', 'Meta Pseudo Labels',
        'ByT5-XXL', 'GOA

In [14]:
for percentiles, systems in frontier_systems_by_percentile.items():
  print(percentiles)
  print(len(systems), "systems")
  for system in systems[::-1]:
    print(system)
  print()

95 to 100
17 systems
Gemini Ultra
Inflection-2
PaLM 2
GPT-4
Minerva (540B)
PaLM (540B)
ERNIE 3.0 Titan
Megatron-Turing NLG 530B
GPT-3 175B (davinci)
Meena
OpenAI Five
Megatron-BERT
ResNeXt-101 32x48d
AlphaZero
AlphaGo Zero
AlphaGo Master
GNMT

90 to 95
12 systems
Falcon 180B
Claude 2
GPT-3.5 (text-davinci-003)
U-PaLM (540B)
Chinchilla
Gopher (280B)
Switch
mT5-XXL
AlphaStar
T5-11B
FTW
NASv3 (CIFAR-10)

85 to 90
14 systems
Qwen-72B
ChatGLM3
Llama 2-70B
Parti
OPT-175B
LaMDA
GLaM
Yuan 1.0
Turing-NLG
OpenAI Five Rerun
Megatron-LM (8.3B)
BigGAN-deep 512x512
AlphaGo Lee
AlphaGo Fan

80 to 85
18 systems
Yi-34B
xTrimoPGLM -100B
PanGu-Σ
LLaMA-65B
BlenderBot 3
GLM-130B
Flamingo
AlphaCode
HyperCLOVA
GOAT
ByT5-XXL
Meta Pseudo Labels
DALL-E
iGPT-XL
GPT-2 (1.5B)
AmoebaNet-A (F=448)
OpenAI TI7 DOTA 1v1
JFT

75 to 80
15 systems
Llama 2-34B
ViT-22B
Galactica
BLOOM-176B
UL2
GPT-NeoX-20B
ProtT5-XXL
GShard (dense)
ContextNet + Noisy Student
RoBERTa Large
BERT-Large
IMPALA
Libratus
Xception
DeepSpeech2 (Eng

# Distance from compute record at the time

In [None]:
ooms_from_frontier = 2

In [None]:
current_max = 0
running_max = np.zeros(len(pcd_df))
for i, compute in enumerate(pcd_df['Training compute (FLOP)']):
  if compute > current_max:
    running_max[i] = compute
    current_max = compute
  else:
    running_max[i] = current_max
running_max

array([4.00000000e+01, 6.94894938e+05, 6.00000000e+08, 6.00000000e+08,
       7.20000000e+08, 7.20000000e+08, 7.20000000e+08, 7.20000000e+08,
       2.83280026e+10, 2.83280026e+10, 2.83280026e+10, 4.33721175e+10,
       8.11870414e+10, 1.82321576e+13, 1.82321576e+13, 1.82321576e+13,
       2.10080000e+13, 2.10080000e+13, 6.30000000e+13, 1.30389876e+15,
       1.30389876e+15, 1.30389876e+15, 1.30389876e+15, 1.30389876e+15,
       3.41463600e+15, 6.14400000e+16, 6.14400000e+16, 2.73196800e+17,
       2.73196800e+17, 2.73196800e+17, 6.00000000e+17, 6.00000000e+17,
       6.00000000e+17, 6.00000000e+17, 6.00000000e+17, 6.00000000e+17,
       6.00000000e+17, 1.34092800e+18, 1.34092800e+18, 1.34092800e+18,
       1.34092800e+18, 1.34092800e+18, 3.41107200e+18, 3.41107200e+18,
       3.41107200e+18, 9.25344000e+18, 9.25344000e+18, 5.60000000e+19,
       5.60000000e+19, 5.60000000e+19, 5.60000000e+19, 5.60000000e+19,
       5.60000000e+19, 5.60000000e+19, 5.60000000e+19, 3.80000000e+20,
      

In [None]:
pcd_df['Frontier training compute (FLOP)'] = running_max

In [None]:
pcd_df['Frontier system'] = (pcd_df['Publication date'] > start_large_scale_era) & (np.log10(pcd_df['Frontier training compute (FLOP)']) - np.log10(pcd_df['Training compute (FLOP)']) <= ooms_from_frontier)
pcd_df[['System', 'Frontier system']]

Unnamed: 0,System,Frontier system
1126,Theseus,False
625,Perceptron Mark I,False
374,Pandemonium (morse),False
1193,Samuel Neural Checkers,False
415,Perceptron (1960),False
...,...,...
1318,MultiBand Diffusion,False
1128,Nemotron-3-8B,False
832,Inflection-2,True
306,Qwen-72B,True


In [None]:
frontier_df = pcd_df[pcd_df['Frontier system']]
frontier_df

Unnamed: 0,System,Domain,Organization,Authors,Publication date,Reference,Link,Citations,Notability criteria,Notability criteria notes,...,Hardware utilization,Finetune compute (FLOP),Finetune compute notes,Batch size,Compute cost notes,Training cloud compute vendor,Batch size notes,Training data center,Frontier training compute (FLOP),Frontier system
657,AlphaGo Fan,Games,Google DeepMind,"David Silver, Aja Huang, Chris J. Maddison, Ar...",2015-10-01,Mastering the game of Go with deep neural netw...,https://www.nature.com/articles/nature24270.ep...,14733.0,"Highly cited,SOTA improvement",,...,,,,,,,,,3.800000e+20,True
21,DeepSpeech2 (English),Speech,Baidu Research - Silicon Valley AI Lab,"Dario Amodei, Rishita Anubhai, Eric Battenberg...",2015-12-08,Deep Speech 2: End-to-End Speech Recognition i...,https://arxiv.org/abs/1512.02595,2741.0,Highly cited,,...,0.45,,,,,,,,3.800000e+20,True
359,ResNet-152 (ImageNet),Vision,Microsoft,"Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun",2015-12-10,Deep Residual Learning for Image Recognition,https://arxiv.org/abs/1512.03385,154061.0,Highly cited,,...,,,,,,,,,3.800000e+20,True
1333,AlphaGo Lee,Games,DeepMind,"David Silver, Aja Huang, Chris J. Maddison, Ar...",2016-01-27,Mastering the game of Go with deep neural netw...,https://www.nature.com/articles/nature16961,14733.0,Highly cited,,...,,,,,,,,,1.900000e+21,True
479,GNMT,Language,Google,"Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc ...",2016-09-26,Google's Neural Machine Translation System: Br...,https://arxiv.org/abs/1609.08144,6105.0,Highly cited,,...,,,,,,,,,6.900000e+21,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1055,Skywork-13B,Language,Kunlun Inc.,"Tianwen Wei, Liang Zhao, Lichang Zhang, Bo Zhu...",2023-10-30,Skywork: A More Open Bilingual Foundation Model,https://arxiv.org/abs/2310.19341,26.0,SOTA improvement,"""We show that our model not only excels on pop...",...,0.46,,,16000000.0,,,Table 3,,2.100000e+25,True
537,Yi-34B,Language,01.AI,,2023-11-02,,https://github.com/01-ai/Yi,,Significant use,2nd most popular model on HuggingFace: https:/...,...,,,,,,,,,2.100000e+25,True
832,Inflection-2,Language,Inflection AI,,2023-11-22,Inflection-2: The Next Step Up,https://inflection.ai/inflection-2,,Significant use,Inflection-2 either already powers Pi or soon ...,...,,,,,,,,,2.100000e+25,True
306,Qwen-72B,Language,Alibaba,"Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Ka...",2023-11-30,,https://huggingface.co/Qwen/Qwen-72B,,SOTA improvement,"SOTA on several Chinese benchmarks, with highe...",...,,,,4000000.0,,,Table 1 https://arxiv.org/abs/2309.16609\n(thi...,,2.100000e+25,True


In [None]:
for system in frontier_df['System'][::-1]:
  print(system)

Gemini Ultra
Qwen-72B
Inflection-2
Yi-34B
Skywork-13B
ChatGLM3
Falcon 180B
Llama 2-34B
Llama 2-70B
Claude 2
xTrimoPGLM -100B
PaLM 2
BloombergGPT
PanGu-Σ
Falcon-40B
GPT-4
LLaMA-13B
LLaMA-65B
LLaMA-7B
ViT-22B
GPT-3.5 (text-davinci-003)
Galactica
BLOOM-176B
Taiyi-Stable Diffusion
U-PaLM (540B)
Whisper
PaLI
BlenderBot 3
GLM-130B
AlexaTM 20B
ESM2-15B
ESM2-3B
Minerva (540B)
Parti
CoCa
UL2
OPT-175B
Flamingo
Stable Diffusion (LDM-KL-8-G)
PaLM (540B)
Chinchilla
LaMDA
GPT-NeoX-20B
AlphaCode
ERNIE 3.0 Titan
XGLM-7.5B
GLaM
Gopher (280B)
Florence
BASIC-L
T0-XXL
Yuan 1.0
Megatron-Turing NLG 530B
AlphaFold-Multimer
HyperCLOVA
FLAN 137B
SEER
GOAT
HuBERT
ERNIE 3.0
ALIGN
DeBERTa
CoAtNet
ByT5-XXL
CogView
ProtBERT-BFD
ProtT5-XXL
ProtT5-XXL-BFD
PLUG
M6-T
Meta Pseudo Labels
MSA Transformer
Switch
CLIP (ViT L/14@336px)
DALL-E
ViT-Huge/14
mT5-XXL
Conformer + Wav2vec 2.0 + Noisy Student
GShard (dense)
iGPT-L
iGPT-XL
GPT-3 175B (davinci)
Turing-NLG
Meena
ContextNet + Noisy Student
OpenAI Five Rerun
OpenAI Five


# Constant threshold

In [None]:
compute_threshold = 1e23

In [None]:
above_threshold = pcd_df[pcd_df['Training compute (FLOP)'] > compute_threshold]

In [None]:
print(len(above_threshold), 'systems')
for system in above_threshold['System'][::-1]:
  print(system)

47 systems
Gemini Ultra
Qwen-72B
Inflection-2
Nemotron-3-8B
Yi-34B
Skywork-13B
ChatGLM3
Falcon 180B
Llama 2-34B
Llama 2-70B
Llama 2-13B
Claude 2
xTrimoPGLM -100B
PaLM 2
StarCoder
BloombergGPT
PanGu-Σ
Falcon-40B
GPT-4
LLaMA-65B
ViT-22B
GPT-3.5 (text-davinci-003)
Galactica
BLOOM-176B
U-PaLM (540B)
BlenderBot 3
GLM-130B
AlexaTM 20B
Minerva (540B)
Parti
UL2
OPT-175B
Flamingo
PaLM (540B)
Chinchilla
LaMDA
AlphaCode
ERNIE 3.0 Titan
GLaM
Gopher (280B)
Yuan 1.0
Megatron-Turing NLG 530B
HyperCLOVA
GPT-3 175B (davinci)
Meena
AlphaGo Zero
AlphaGo Master
