In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import os
import pandas as pd
import plotly.express as px

from data import *
from plotting import *
from regression import *
from utils import *

In [3]:
model_results_dir = 'results/benchmark/initial/model/'
code_results_dir = 'results/benchmark/initial/code/'
os.makedirs(model_results_dir, exist_ok=True)
os.makedirs(code_results_dir, exist_ok=True)

# Data

In [4]:
# Load data
pcd_df = load_pcd_df()
benchmark_df = load_benchmark_df()

In [5]:
benchmark_df

Unnamed: 0,System,Author(s),Publication date,Reference,Citations,Peer reviewed?,Link,Parameters,Training Compute,Epoch,...,Perplexity (WT2),Perplexity (PTB),Zero-shot?,Uses Cache,Architecture,Base Model,GitHub,Complete row,All ML Systems,System (from All ML Systems)
0,(ensemble): AWD-LSTM-DOC (fin) × 5 (PTB),"Sho Takase, Jun Suzuki, Masaaki Nagata",2018-08-30,Direct Output Connection for a High-Rank Langu...,36.0,,https://arxiv.org/abs/1808.10143,114000000.0,,300.0,...,,47.17,0.0,0,Recurrent,LSTM,https://github.com/nttcslab-nlp/doc_lm,1,(ensemble): AWD-LSTM-DOC (fin) × 5 (PTB),(ensemble): AWD-LSTM-DOC (fin) × 5 (PTB)
1,(ensemble): AWD-LSTM-DOC (fin) × 5 (WT2),"Sho Takase, Jun Suzuki, Masaaki Nagata",2018-08-30,Direct Output Connection for a High-Rank Langu...,36.0,,https://arxiv.org/abs/1808.10143,185000000.0,,300.0,...,53.09,,0.0,0,Recurrent,LSTM,https://github.com/nttcslab-nlp/doc_lm,1,(ensemble): AWD-LSTM-DOC (fin) × 5 (WT2),(ensemble): AWD-LSTM-DOC (fin) × 5 (WT2)
2,$\infty$-former (SM),"Pedro Henrique Martins, Zita Marinho, André F....",2021-09-01,$\infty$-former: Infinite Memory Transformer,31.0,,https://arxiv.org/abs/2109.00301,117000000.0,1.200000e+22,1.0,...,,,1.0,0,Transformer,GPT,https://github.com/deep-spin/infinite-former,1,$\infty$-former (SM),$\infty$-former (SM)
3,1-layer-LSTM,"H. T. Kung, Bradley McDanel, Sai Qian Zhang",2020-07-13,Term Revealing: Furthering Quantization at Run...,9.0,,https://arxiv.org/pdf/2007.06389,86500000.0,,,...,86.85,,0.0,0,Recurrent,LSTM,,1,1-layer-LSTM,1-layer-LSTM
4,2-layer skip-LSTM + dropout tuning (PTB),"Gábor Melis, Charles Blundell, Tomáš Kočiský, ...",2018-05-23,Pushing the bounds of dropout,14.0,,https://arxiv.org/abs/1805.09208,5400000.0,,,...,,55.30,0.0,0,Recurrent,LSTM,,1,2-layer skip-LSTM + dropout tuning (PTB),2-layer skip-LSTM + dropout tuning (PTB)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403,WeNet (PTB),"Zhiheng Huang, Bing Xiang",2019-04-08,WeNet: Weighted Networks for Recurrent Network...,5.0,,https://arxiv.org/pdf/1904.03819,23000000.0,,,...,,54.80,0.0,0,NAS,WeNet,,1,WeNet (PTB),WeNet (PTB)
404,WeNet (WT2),"Zhiheng Huang, Bing Xiang",2019-04-08,WeNet: Weighted Networks for Recurrent Network...,5.0,,https://arxiv.org/pdf/1904.03819,33000000.0,,,...,66.60,,0.0,0,NAS,WeNet,,1,"WeNet (WT2),WeNet (Penn Treebank)","WeNet (WT2), WeNet (Penn Treebank)"
405,Word-Independent-SRNN+KN5,"Youssef Oualil, Clayton Greenberg, Mittul Sing...",2017-03-23,Sequential Recurrent Neural Networks for Langu...,7.0,,https://arxiv.org/pdf/1703.08068,5320000.0,,,...,,94.00,0.0,0,Recurrent,RNN,,1,Word-Independent-SRNN+KN5,Word-Independent-SRNN+KN5
406,Zoneout + Variational LSTM (PTB),"Stephen Merity, Caiming Xiong, James Bradbury,...",2016-09-26,Pointer Sentinel Mixture Models,1558.0,,https://arxiv.org/abs/1609.07843,21000000.0,,64.0,...,,80.60,0.0,0,Recurrent,LSTM,,1,Zoneout + Variational LSTM (PTB),Zoneout + Variational LSTM (PTB)


In [6]:
# Set System as index
benchmark_df.set_index('System', inplace=True)
pcd_df.set_index('System', inplace=True)

In [7]:
# Find overlapping System between the two dataframes
systems = set(pcd_df.index) & set(benchmark_df.index)
systems

{'$\\infty$-former (SM)',
 '(ensemble): AWD-LSTM-DOC (fin) × 5 (PTB)',
 '(ensemble): AWD-LSTM-DOC (fin) × 5 (WT2)',
 '1-layer-LSTM',
 '2-layer skip-LSTM + dropout tuning (PTB)',
 '2-layer skip-LSTM + dropout tuning (WT2)',
 '2-layer-LSTM+Deep-Gradient-Compression',
 '2nd order FOFE-FNNLM',
 '3-Layer-Tensor-Transformer+AdaHessian',
 '4 layer Densely Connected LSTM',
 '4 layer QRNN (h=2500)',
 '4 layer QRNN + dynamic evaluation',
 '4-gram + 8 DENN',
 '6-Layer-Tensor-Transformer+AdaHessian',
 'ADP-FAIRSEQ+NGRAMRES',
 'AFP+FPI (PTB)',
 'AFP+FPI (WT2)',
 'ALiBi (L=3072, Lvalid = 3072)',
 'AWD-FWM (PTB)',
 'AWD-FWM (WT2)',
 'AWD-LSTM',
 'AWD-LSTM + DeFINE',
 'AWD-LSTM + MoS + Partial Shuffled',
 'AWD-LSTM + Phrase Induction + finetuning',
 'AWD-LSTM + dynamic eval (PTB)',
 'AWD-LSTM + dynamic eval (WT2)',
 'AWD-LSTM - 3-layer LSTM (tied) + continuous cache pointer (PTB)',
 'AWD-LSTM - 3-layer LSTM (tied) + continuous cache pointer (WT2)',
 'AWD-LSTM+Behaviorial-Gating',
 'AWD-LSTM+WT+Cache+I

In [8]:
# Filter systems that have non-empty value for 'Model accessibility' in pcd_df
systems_with_model_accessibility = list(systems & set(pcd_df[pcd_df['Model accessibility'].notnull()].index))
len(systems_with_model_accessibility)

387

In [9]:
# Filter systems that have non-empty value for 'Code accessibility' in pcd_df
systems_with_code_accessibility = list(systems & set(pcd_df[pcd_df['Code accessibility'].notnull()].index))
len(systems_with_code_accessibility)

387

In [10]:
print(sum(benchmark_df.loc[:, 'Perplexity (WT103)'].notna()))
print(sum(benchmark_df.loc[systems_with_model_accessibility, 'Perplexity (WT103)'].notna()))
print(sum(benchmark_df.loc[:, 'Perplexity (WT2)'].notna()))
print(sum(benchmark_df.loc[systems_with_model_accessibility, 'Perplexity (WT2)'].notna()))

165
153
111
105


In [11]:
print(sum(benchmark_df.loc[:, 'Perplexity (WT103)'].notna()))
print(sum(benchmark_df.loc[systems_with_code_accessibility, 'Perplexity (WT103)'].notna()))
print(sum(benchmark_df.loc[:, 'Perplexity (WT2)'].notna()))
print(sum(benchmark_df.loc[systems_with_code_accessibility, 'Perplexity (WT2)'].notna()))

165
153
111
105


In [12]:
# Assign model and code accessibility values to benchmark_df
benchmark_df.loc[systems_with_model_accessibility, 'Model accessibility'] = pcd_df.loc[
    systems_with_model_accessibility, 'Model accessibility'
]

benchmark_df.loc[systems_with_code_accessibility, 'Code accessibility'] = pcd_df.loc[
    systems_with_code_accessibility, 'Code accessibility'
]

In [13]:
benchmark_df.loc[systems_with_model_accessibility, 'Training compute (FLOP)'] = pcd_df.loc[
    systems_with_model_accessibility, 'Training compute (FLOP)'
]
benchmark_df.loc[systems_with_code_accessibility, 'Training compute (FLOP)'] = pcd_df.loc[
    systems_with_code_accessibility, 'Training compute (FLOP)'
]

In [14]:
model_filtered_benchmark_df = benchmark_df.loc[systems_with_model_accessibility]
code_filtered_benchmark_df = benchmark_df.loc[systems_with_code_accessibility]

# Model accessibility plots

In [15]:
# Plot wikitext perplexity of systems with model accessibility
fig = px.scatter(
    model_filtered_benchmark_df,
    x='Publication date',
    y='Perplexity (WT103)',
    color='Model accessibility',
    # text=filtered_benchmark_df.index
)

# Move text to top
fig.update_traces(textposition='top center')
save_plot(fig, model_results_dir, 'WT103_vs_model_accessibility')

# fig.show()

In [16]:
fig = px.scatter(
    model_filtered_benchmark_df,
    x='Training compute (FLOP)',
    y='Perplexity (WT103)',
    color='Model accessibility'
)

# Log x-axis
fig.update_xaxes(type='log')

# Move text to top
fig.update_traces(textposition='middle left')

# Zoom in
fig.update_layout(yaxis_range=[0, 60])

save_plot(fig, model_results_dir, 'WT103_vs_compute')

# fig.show()

In [17]:
fig = px.scatter(
    model_filtered_benchmark_df,
    x='Training compute (FLOP)',
    y='Perplexity (WT2)',
    color='Model accessibility',
    # text=filtered_benchmark_df.index
)

# Log x-axis
fig.update_xaxes(type='log')

# Move text to top
fig.update_traces(textposition='middle left')

save_plot(fig, model_results_dir, 'WT2_vs_compute')

# fig.show()

In [18]:
fig = px.scatter(
    model_filtered_benchmark_df,
    x='Training compute (FLOP)',
    y='Perplexity (PTB)',
    color='Model accessibility',
    # text=filtered_benchmark_df.index
)

# Log x-axis
fig.update_xaxes(type='log')

# Move text to top
fig.update_traces(textposition='middle left')

save_plot(fig, code_results_dir, 'PTB_vs_compute')

# fig.show()

# Code accessibility plots

In [19]:
# Plot wikitext perplexity of systems with code accessibility
fig = px.scatter(
    code_filtered_benchmark_df,
    x='Publication date',
    y='Perplexity (WT103)',
    color='Code accessibility',
    # text=filtered_benchmark_df.index
)

# Move text to top
fig.update_traces(textposition='top center')

save_plot(fig, code_results_dir, 'WT103_vs_code_accessibility')


In [20]:
fig = px.scatter(
    code_filtered_benchmark_df,
    x='Training compute (FLOP)',
    y='Perplexity (WT103)',
    color='Code accessibility'
)

# Log x-axis
fig.update_xaxes(type='log')

# Move text to top
fig.update_traces(textposition='middle left')

# Zoom in
fig.update_layout(yaxis_range=[0, 60])

save_plot(fig, code_results_dir, 'WT103_vs_compute')

In [21]:
fig = px.scatter(
    code_filtered_benchmark_df,
    x='Training compute (FLOP)',
    y='Perplexity (WT2)',
    color='Code accessibility',
    # text=filtered_benchmark_df.index
)

# Log x-axis
fig.update_xaxes(type='log')

# Move text to top
fig.update_traces(textposition='middle left')

save_plot(fig, code_results_dir, 'WT2_vs_compute')

In [22]:
fig = px.scatter(
    code_filtered_benchmark_df,
    x='Training compute (FLOP)',
    y='Perplexity (PTB)',
    color='Code accessibility',
    # text=filtered_benchmark_df.index
)

# Log x-axis
fig.update_xaxes(type='log')

# Move text to top
fig.update_traces(textposition='middle left')

save_plot(fig, code_results_dir, 'PTB_vs_compute')