Licensed under the Apache License, Version 2.0

# Imports

In [None]:
import numpy as np
import scipy 
import math
import json
import shutil
from copy import deepcopy

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from google.colab import files
import importlib

from sklearn.linear_model import LinearRegression
from methods import m1, m2, m3, m4

In [None]:
M1 = m1.Estimator
M2 = m2.Estimator
M3 = m3.Estimator
M4 = m4.Estimator

# Load data

In [None]:
!rm benchmark.vision.csv
f = files.upload()
df_vision = pd.read_csv('benchmark.vision.csv')

In [None]:
!rm benchmark.lang.csv
f = files.upload()
df_lang = pd.read_csv('benchmark.lang.csv')

In [None]:
# last column indicates whether the data should be used for training or testing.
df_all = pd.concat([df_vision, df_lang])
df_all

# Evaluate

In [None]:
np.random.seed(2021)

In [None]:
def get_error(slaw, x, y):
  """Evaluate the scaling law estimator slaw on the test data (x, y).

  Args:
    x: 1d array containing data sizes.
    y: 1d array containing errors/losses.
  """
  yp = np.array([slaw.predict_loss(xi) for xi in x])
  error = (np.log(yp) - np.log(y)) ** 2
  err_mu = np.mean(error)
  err_std = np.sqrt(err_mu + np.std(error) / (len(yp)**0.5)) - np.sqrt(err_mu)
  # return mean and std error
  return np.sqrt(err_mu), err_std

def create_dir(dir_name):
  # if dir exists, remove it and its contents
  try:
    shutil.rmtree(dir_name)
  except:
    pass
  os.mkdir(dir_name)

In [None]:
scaling_laws = {}
errors = {}

## Image Classification

In [None]:
domain = 'IC'
df = df_all[df_all['Domain'] == domain]
tasks = set(df['Task'])
models = set(df['Model'])
print('Tasks: ', tasks)
print('Models: ', models)

In [None]:
downstream_groups = {
    'Birds': ['bird_5', 'bird_10', 'bird_25'],
    'CIFAR100': ['c_5', 'c_10', 'c_25'],
    'Caltech101': ['cal_5', 'cal_10', 'cal_25'],
    'ImageNet': ['inet_5', 'inet_10', 'inet_25'],
}

for model in models:
  for group in downstream_groups:
    for downstream in downstream_groups[group]:
      key = (domain, model, downstream)
      print()
      print(key)
      # fetch training data
      df_subset1 = df[(df['Model'] == model) & (df['Task'] == downstream) & (df['Training'] == 1)]
      x_vals = np.array(df_subset1['Seen Examples'])
      y_vals = np.array(df_subset1['Loss'])
      fit_values = {x: y for x, y in zip(x_vals, y_vals)}
      # fetch test data
      df_subset0 = df[(df['Model'] == model) & (df['Task'] == downstream) & (df['Training'] == 0)]
      x_test = np.array(df_subset0['Seen Examples'])
      y_test = np.array(df_subset0['Loss'])

      # train all estimators
      scaling_laws[key] = {}
      errors[key] = {}
      for mode in ['M1', 'M2', 'M3', 'M4']:
        print(mode)
        if mode == 'M1':
          scaling_laws[key][mode] = M1(fit_values)
        elif mode == 'M2':
          scaling_laws[key][mode] = M2(fit_values)
        elif mode == 'M3':
          scaling_laws[key][mode] = M3(fit_values)
        elif mode == 'M4':
          scaling_laws[key][mode] = M4(fit_values, err_inf=None, err_0=1.0,
                                       update_err_0=True, up_bound=1.0)
        # fit
        scaling_laws[key][mode].estimate_scaling_params(verbose=0,
                                                        max_iterations=10_000)
        # report
        if mode in ['M1', 'M2']:
          print('beta, c, err_inf =\t\t %.2f, %0.2f, %0.2f' % (
                scaling_laws[key][mode].beta,
                scaling_laws[key][mode].c,
                scaling_laws[key][mode].err_inf             
                )
          )
        elif mode == 'M3':
          print('beta, c, gamma =\t\t %.2f, %0.2f, %0.2f' %(
                scaling_laws[key][mode].beta,
                scaling_laws[key][mode].c,
                scaling_laws[key][mode].gamma              
                )
          )
        else:
          print('beta, c, alpha, err_inf =\t %.2f, %0.2f, %0.2f, %0.2f' %(
                scaling_laws[key][mode].beta,
                scaling_laws[key][mode].c,
                scaling_laws[key][mode].alpha,
                scaling_laws[key][mode].err_inf
                )
          )                    

        # record error
        errors[key][mode] = get_error(scaling_laws[key][mode],
                                       x_test, y_test)
        print('Extrapolation Loss =\t\t %.4f +- %.5f' %(
            errors[key][mode][0], errors[key][mode][1])
        )
        print()

In [None]:
create_dir('image_classification')

In [None]:
sns.set_theme(context='paper', style='whitegrid', palette='colorblind',
              font_scale=1.75, rc={'lines.linewidth': 2})
for key in scaling_laws:
  domain, model, downstream = key
  fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(20, 4))
  fig_id = -1
  miny = 1  # used to rescale the y-axis in the figures.
  for mode in ['M1', 'M2', 'M3', 'M4']:
    fig_id += 1
    # fetch training data
    df_subset1 = df[(df['Model'] == model) & (df['Task'] == downstream) & (df['Training'] == 1)]
    x_vals = np.array(df_subset1['Seen Examples'])
    y_vals = np.array(df_subset1['Loss'])
    fit_values = {x: y for x, y in zip(x_vals, y_vals)}
    # fetch test data
    df_subset0 = df[(df['Model'] == model) & (df['Task'] == downstream) & (df['Training'] == 0)]
    x_test = np.array(df_subset0['Seen Examples'])
    y_test = np.array(df_subset0['Loss'])

    law = scaling_laws[key][mode]

    axes[fig_id].scatter(list(fit_values.keys()), list(fit_values.values()),
                         c='black', alpha=0.5)
    axes[fig_id].scatter(x_test, y_test,  c='tab:orange')
    xt, yt = law.loss_curve(min(df_subset1['Seen Examples']),  # min of fitting data
                            max(df_subset0['Seen Examples']) * 3)  # max of extrapolation data

    axes[fig_id].plot(xt, yt, color='tab:blue')
    axes[fig_id].set_xscale('log')
    axes[fig_id].set_title(mode.upper())
    axes[fig_id].xaxis.set_ticklabels([])
    if mode == 'M1':
      axes[fig_id].set_ylabel('Error Rate')
    axes[fig_id].set_xlabel('Examples Seen')

    miny = min(int(min(yt)*10)/10, miny)

  plt.setp(axes, ylim=(miny, 1))
  plt.setp(axes, xlim=(min(df_subset1['Seen Examples']), max(df_subset0['Seen Examples'])*3))
  subtitle = key[1] + ', ' + key[2]
  fig.suptitle(subtitle)
  fig.tight_layout()
  plt.show()
  filename = 'image_classification/' + model.replace('/', '_') + '_' + downstream + '.pdf'
  fig.savefig(filename, dpi=200)


In [None]:
# download figures
shutil.make_archive('image_classification', 'zip', 'image_classification')
%download_file image_classification.zip

In [None]:
# win probabilities
wins = np.array([0., 0., 0., 0.])
for key in errors:
  e = np.array([errors[key][mode][0] for mode in ['M1', 'M2', 'M3', 'M4']])
  for k in range(4):
    if np.isnan(e[k]):  # if it fails, set its loss to max
      e[k] = 1
    e[k] = int(e[k] * 1_000) / 1_000 # compare performance up to 3 decimal places.
  v = np.min(e)
  wins  = wins + (e == v) / sum(e == v)  # if multiplle winners, divide score
print(wins / len(errors))

## NMT

In [None]:
domain = 'NMT'
df = df_all[df_all['Domain'] == domain]
tasks = set(df['Task'])
models = set(df['Model'])
print('Tasks: ', tasks)
print('Models: ', models)

In [None]:
df

In [None]:
for model in models:
  key = (domain, model)
  print()
  print(key)
  # fetch training data
  df_subset1 = df[(df['Model'] == model) & (df['Training'] == 1)]
  x_vals = np.array(df_subset1['Seen Examples'])
  y_vals = np.array(df_subset1['Loss'])

  fit_values = {x: y for x, y in zip(x_vals, y_vals)}
  # fetch test data
  df_subset0 = df[(df['Model'] == model) & (df['Training'] == 0)]
  x_test = np.array(df_subset0['Seen Examples'])
  y_test = np.array(df_subset0['Loss'])

  # train all estimators
  scaling_laws[key] = {}
  errors[key] = {}
  for mode in ['M1', 'M2', 'M3', 'M4']:
    print(mode)
    if mode == 'M1':
      scaling_laws[key][mode] = M1(fit_values)
    elif mode == 'M2':
      scaling_laws[key][mode] = M2(fit_values)
    elif mode == 'M3':
      scaling_laws[key][mode] = M3(fit_values)
    elif mode == 'M4':
      scaling_laws[key][mode] = M4(fit_values, err_0=1.0, update_err_0=True,
                                   up_bound=None)  # no upper bound since this is log-preplexity
    # fit
    scaling_laws[key][mode].estimate_scaling_params(verbose=0)
    # report
    if mode in ['M1', 'M2']:
      print('beta, c, err_inf =\t\t %.2f, %0.2f, %0.2f' % (
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].err_inf             
            )
      )
    elif mode == 'M3':
      print('beta, c, gamma =\t\t %.2f, %0.2f, %0.2f' %(
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].gamma              
            )
      )
    else:
      print('beta, c, alpha, err_inf =\t %.2f, %0.2f, %0.2f, %0.2f' %(
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].alpha,
            scaling_laws[key][mode].err_inf
            )
      )                    

    # record error
    errors[key][mode] = get_error(scaling_laws[key][mode],
                                    x_test, y_test)
    print('Extrapolation Loss =\t\t %.4f +- %.5f' %(
        errors[key][mode][0], errors[key][mode][1])
    )
    print()

## Language Model

In [None]:
domain = 'LM'
df = df_all[df_all['Domain'] == domain]
tasks = set(df['Task'])
models = set(df['Model'])
print('Tasks: ', tasks)
print('Models: ', models)

In [None]:
df

In [None]:
for model in models:
  key = (domain, model)
  print()
  print(key)
  # fetch training data
  df_subset1 = df[(df['Model'] == model) & (df['Training'] == 1)]
  x_vals = np.array(df_subset1['Seen Examples'])
  y_vals = np.array(df_subset1['Loss'])  # rescaled already

  fit_values = {x: y for x, y in zip(x_vals, y_vals)}
  # fetch test data
  df_subset0 = df[(df['Model'] == model) & (df['Training'] == 0)]
  x_test = np.array(df_subset0['Seen Examples'])
  y_test = np.array(df_subset0['Loss'])

  # train all estimators
  scaling_laws[key] = {}
  errors[key] = {}
  for mode in ['M1', 'M2', 'M3', 'M4']:
    print(mode)
    if mode == 'M1':
      scaling_laws[key][mode] = M1(fit_values)
    elif mode == 'M2':
      scaling_laws[key][mode] = M2(fit_values)
    elif mode == 'M3':
      scaling_laws[key][mode] = M3(fit_values)
    elif mode == 'M4':
      scaling_laws[key][mode] = M4(fit_values, err_0=1.0,
                                   update_err_0=True,
                                   up_bound=None)  # no upper bound since this is cross-entropy loss
    # fit
    scaling_laws[key][mode].estimate_scaling_params(verbose=0)
    # report
    if mode in ['M1', 'M2']:
      print('beta, c, err_inf =\t\t %.2f, %0.2f, %0.2f' % (
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].err_inf             
            )
      )
    elif mode == 'M3':
      print('beta, c, gamma =\t\t %.2f, %0.2f, %0.2f' %(
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].gamma              
            )
      )
    else:
      print('beta, c, alpha, err_inf =\t %.2f, %0.2f, %0.2f, %0.2f' %(
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].alpha,
            scaling_laws[key][mode].err_inf
            )
      )            

    # record error
    errors[key][mode] = get_error(scaling_laws[key][mode],
                                    x_test, y_test)
    print('Extrapolation Loss =\t\t %.4f +- %.5f' %(
        errors[key][mode][0], errors[key][mode][1])
    )
    print()

In [None]:
sns.set_theme(context='paper', style='whitegrid', palette='colorblind',
              font_scale=1.25, rc={'lines.linewidth': 2})

fig, axes = plt.subplots(nrows=5, ncols=4, figsize=(20,10))
row_id = -1
for model in models:
  row_id += 1
  fig_id = -1
  for mode in ['M1', 'M2', 'M3', 'M4']:
    fig_id += 1
    # fetch training data
    df_subset1 = df[(df['Model'] == model) & (df['Training'] == 1)]
    x_vals = np.array(df_subset1['Seen Examples'])
    y_vals = np.array(df_subset1['Loss'])
    # fetch test data
    df_subset0 = df[(df['Model'] == model) & (df['Training'] == 0)]
    x_test = np.array(df_subset0['Seen Examples'])
    y_test = np.array(df_subset0['Loss'])

    key = (domain, model)
    law = scaling_laws[key][mode]
    axes[row_id,fig_id].scatter(x_vals, y_vals, c='black', alpha=0.5)
    axes[row_id,fig_id].scatter(x_test, y_test,  c='tab:orange')
    xt, yt = law.loss_curve(min(df_subset1['Seen Examples']),  # min of fitting data
                            max(df_subset0['Seen Examples']) * 3)  # max of extrapolation data

    axes[row_id,fig_id].plot(xt, yt, color='tab:blue')
    axes[row_id,fig_id].set_xscale('log')
    axes[row_id,fig_id].set_title(mode.upper())

    if fig_id == 0:
      axes[row_id,fig_id].set_ylabel('P'+model)
    if row_id == 4:
      axes[row_id,fig_id].set_xlabel('Tokens')

plt.setp(axes, xlim=(min(df_subset1['Seen Examples']), max(df_subset0['Seen Examples'])*3))
fig.tight_layout()
plt.show()
filename = 'lm.pdf'
fig.savefig(filename, dpi=200)


## Big Bench

In [None]:
domain = 'BB'
df = df_all[df_all['Domain'] == domain]
tasks = set(df['Task'])
models = set(df['Model'])
print('Tasks: ', tasks)
print('Models: ', models)

In [None]:
df

In [None]:
tasks

In [None]:
for task in tasks:
  key = (domain, task)
  print()
  print(key)
  # fetch training data
  df_subset1 = df[(df['Task'] == task) & (df['Training'] == 1)]
  x_vals = np.array(df_subset1['Seen Examples'])
  y_vals = np.array(df_subset1['Loss'])  # rescaled already

  fit_values = {x: y for x, y in zip(x_vals, y_vals)}
  # fetch test data
  df_subset0 = df[(df['Task'] == task) & (df['Training'] == 0)]
  x_test = np.array(df_subset0['Seen Examples'])
  y_test = np.array(df_subset0['Loss'])

  # train all estimators
  scaling_laws[key] = {}
  errors[key] = {}
  for mode in ['M1', 'M2', 'M3', 'M4']:
    print(mode)
    if mode == 'M1':
      scaling_laws[key][mode] = M1(fit_values)
    elif mode == 'M2':
      scaling_laws[key][mode] = M2(fit_values)
    elif mode == 'M3':
      scaling_laws[key][mode] = M3(fit_values)
    elif mode == 'M4':
      scaling_laws[key][mode] = M4(fit_values, err_0=1.001,
                                   update_err_0=True, up_bound=1.0)
    # fit
    scaling_laws[key][mode].estimate_scaling_params(verbose=0)
    # report
    if mode in ['M1', 'M2']:
      print('beta, c, err_inf =\t\t %.2f, %0.2f, %0.2f' % (
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].err_inf             
            )
      )
    elif mode == 'M3':
      print('beta, c, gamma =\t\t %.2f, %0.2f, %0.2f' %(
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].gamma              
            )
      )
    else:
      print('beta, c, alpha, err_inf =\t %.2f, %0.2f, %0.2f, %0.2f' %(
            scaling_laws[key][mode].beta,
            scaling_laws[key][mode].c,
            scaling_laws[key][mode].alpha,
            scaling_laws[key][mode].err_inf
            )
      )                    

    # record error
    errors[key][mode] = get_error(scaling_laws[key][mode],
                                    x_test, y_test)
    print('Extrapolation Loss =\t\t %.4f +- %.5f' %(
        errors[key][mode][0], errors[key][mode][1])
    )
    print()
