# Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import os
import pandas as pd
import plotly.express as px

from cost import *
from plotting import *
from prices import *
from imputation import *
from inflation import *
from regression import *
from utils import *

In [3]:
estimation_method = 'hardware-capex-opex'
compute_threshold_method = 'top_n'
compute_threshold = 10
variant = 'original'

estimation_method_lookup = {
    'cloud': estimate_costs,
    'amortized': estimate_amortized_hardware_costs,
    'up-front-server-capex': estimate_upfront_server_capex,
    'hardware-capex-opex': estimate_hardware_capex_opex,
}
cost_estimation_function = estimation_method_lookup[estimation_method]

results_dir = f'results/{estimation_method}-{compute_threshold_method}={compute_threshold}-{variant}/'
os.makedirs(results_dir, exist_ok=True)

# Load data

In [4]:
frontier_pcd_df, hardware_df, price_df = load_data_for_cost_estimation(
    compute_threshold_method=compute_threshold_method, compute_threshold=compute_threshold,
)

In [5]:
len(frontier_pcd_df), len(hardware_df), len(price_df)

(67, 5510, 142)

# Cost estimation

In [6]:
cost_df = cost_estimation_function(frontier_pcd_df, hardware_df, price_df)

==== System: Gemini Ultra ====
Estimated the server release price for Google TPU v4: 19390.4

==== System: Inflection-2 ====
No training time found, assuming 32.083333333333336



Estimated the server release price for NVIDIA H100 SXM5: 44238.96465099587

==== System: Grok-1 ====
Could not find hardware model for Grok-1


==== System: ChatGLM3 ====
Could not find hardware model for ChatGLM3


==== System: Falcon-180B ====
Estimated the server release price for NVIDIA A100 SXM4 40 GB: 42781.84480234261

==== System: Llama 2-70B ====
Estimated the server release price for NVIDIA A100 SXM4 80 GB: 29581.35305528613

==== System: Claude 2 ====
Could not find hardware model for Claude 2


==== System: xTrimoPGLM -100B ====
Estimated the server release price for NVIDIA A100 SXM4 40 GB: 42781.84480234261

==== System: PaLM 2 ====
No training time found, assuming 32.083333333333336

Estimated the server release price for Google TPU v4: 19390.4

==== System: GPT-4 ====
Estimated the server release price for NVIDIA A100 SXM4 40 GB: 42781.84480234261

==== System: LLaMA-65B ====
Soft matching NVIDIA A100 to NVIDIA A100
Soft matching NVIDIA A100 to NVIDIA A100 PCIe
Estimated

In [13]:
cost_df

Unnamed: 0,System,Domain,Task,Authors,Notability criteria,Notability criteria notes,Model accessibility,Link,Citations,Reference,...,Foundation model,Training compute lower bound,Training compute upper bound,Training chip-hours,Code accessibility,Dataset accessibility,Accessibility notes,Organization categorization (from Organization),Possibly over 1e23 FLOP,Cost
75,Gemini Ultra,Multimodal,"Language modelling,Visual question answering,C...",Gemini Team,SOTA improvement,""" Evaluation on a broad range of benchmarks sh...",Hosted access (no API),https://storage.googleapis.com/deepmind-media/...,633.0,Gemini: A Family of Highly Capable Multimodal ...,...,,,,132000000.0,,,,Industry,,8.687365e+07
91,Inflection-2,Language,Language modelling,,Significant use,Inflection-2 either already powers Pi or soon ...,Hosted access (no API),https://inflection.ai/inflection-2,,Inflection-2: The Next Step Up,...,checked,,,,,,"via Pi, no API",Industry,,1.160021e+07
116,Grok-1,Language,"Language modelling,Chat",,SOTA improvement,"""On these benchmarks, Grok-1 displayed strong ...",Open source,"https://x.ai/model-card/, https://x.ai/blog/gr...",,Announcing Grok,...,checked,2,7.0,,Unreleased,Unreleased,apache 2.0,Industry,checked,
130,ChatGLM3,Multimodal,"Chat,Visual question answering",,SOTA improvement,"Aiming at GPT-4V, ChatGLM3 has implemented ite...",,https://www.zhipuai.cn/en/news/76,,Zhipu AI launches third-generation base model,...,checked,,,,,,,Industry,,
176,Falcon-180B,Language,Language modelling,"Ebtesam Almazrouei, Hamza Alobeidli, Abdulaziz...",SOTA improvement,"""It's currently at the top of the Hugging Face...",Open access (restricted use),https://falconllm.tii.ae/falcon-180b.html; htt...,86.0,The Falcon Series of Open Language Models,...,,,,17694720.0,,,"""Falcon 180b can be commercially used but unde...",Government,,3.419020e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1145,Xception,Vision,Image classification,François Chollet,Highly cited,,,https://arxiv.org/abs/1610.02357,11578.0,Xception: Deep Learning with Depthwise Separab...,...,,,,43200.0,,,,Industry,,2.562735e+04
1146,GNMT,Language,Translation,"Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc ...",Highly cited,,Hosted access (no API),https://arxiv.org/abs/1609.08144,6196.0,Google's Neural Machine Translation System: Br...,...,,,,414720.0,,,presumably deployed via Google translate,Industry,,1.575738e+05
1182,AlphaGo Lee,Games,Go,"David Silver, Aja Huang, Chris J. Maddison, Ar...",Highly cited,,,https://www.nature.com/articles/nature16961,14887.0,Mastering the game of Go with deep neural netw...,...,,,,,,,,Industry,,
1186,ResNet-152 (ImageNet),Vision,Image classification,"Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun",Highly cited,,,https://arxiv.org/abs/1512.03385,156882.0,Deep Residual Learning for Image Recognition,...,,,,,,,,Industry,,


In [14]:
cost_df['Cost'].notna().sum()

46

Use the below to check data availability for specific systems

In [15]:
# system = 'WizardLM-7B'
# row = cost_df.loc[cost_df['System'] == system]
# print('Cost:', row['Cost'].values[0])
# print('Training hardware:', row['Training hardware'].values[0])
# print('Training time (hours):', row['Training time (hours)'].values[0])
# print('Hardware quantity:', row['Hardware quantity'].values[0])
# print('Hardware utilization:', row['Hardware utilization'].values[0])

# Apply inflation adjustment

In [16]:
# TODO move to function

In [17]:
cost_df['Publication date']

75     2023-12-06
91     2023-11-22
116    2023-11-04
130    2023-10-27
176    2023-09-06
          ...    
1145   2016-10-07
1146   2016-09-26
1182   2016-01-27
1186   2015-12-10
1188   2015-12-08
Name: Publication date, Length: 67, dtype: datetime64[ns]

In [18]:
from_year_month = cost_df['Publication date'].apply(str)
cost_df['Publication date'] = from_year_month

In [19]:
cost_df['Publication date']

75      2023-12-06 00:00:00
91      2023-11-22 00:00:00
116     2023-11-04 00:00:00
130     2023-10-27 00:00:00
176     2023-09-06 00:00:00
               ...         
1145    2016-10-07 00:00:00
1146    2016-09-26 00:00:00
1182    2016-01-27 00:00:00
1186    2015-12-10 00:00:00
1188    2015-12-08 00:00:00
Name: Publication date, Length: 67, dtype: object

In [20]:
cost_df = adjust_column_for_inflation(cost_df, 'Cost', 'data/PCU518210518210.csv', '2023-12-01')

In [21]:
cost_df['Cost (inflation-adjusted)'].dropna()

75      8.687365e+07
91      1.162737e+07
176     3.440205e+07
215     2.341945e+06
227     5.838318e+06
268     1.314585e+07
309     1.113190e+08
325     2.231983e+06
369     1.207654e+07
380     1.492584e+06
395     5.690547e+06
420     3.124366e+06
421     1.309547e+06
442     6.120570e+06
448     7.165606e+05
485     1.113754e+06
495     5.629325e+06
517     1.140153e+06
546     9.464932e+05
550     3.010045e+06
590     7.074952e+06
597     8.410990e+05
617     3.751311e+05
639     3.904798e+05
647     3.552884e+05
684     2.162117e+05
700     5.330149e+05
703     1.987343e+05
765     2.303021e+05
772     4.208945e+06
807     1.097811e+05
812     5.983208e+05
839     3.339794e+05
843     2.032130e+05
857     3.639313e+05
858     2.017062e+05
883     1.488732e+05
969     1.023505e+04
1042    8.584548e+05
1058    2.360339e+06
1092    4.617624e+04
1118    1.398404e+06
1134    7.615810e+02
1145    2.798554e+04
1146    1.720735e+05
1188    2.643349e+02
Name: Cost (inflation-adjusted), d

In [22]:
cost_df['Cost (inflation-adjusted)'].notna().sum()

46

# Regression

In [23]:
cost_df['Publication date (float)'] = datetime_to_float_year(pd.to_datetime(cost_df['Publication date']))

In [24]:
reg_results = fit_ols_regression(cost_df, ['Publication date (float)'], 'Cost (inflation-adjusted)', logy=True)
reg_results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.587
Model:,OLS,Adj. R-squared:,0.578
Method:,Least Squares,F-statistic:,62.6
Date:,"Mon, 06 May 2024",Prob (F-statistic):,5.4e-10
Time:,15:54:06,Log-Likelihood:,-48.648
No. Observations:,46,AIC:,101.3
Df Residuals:,44,BIC:,105.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-758.5419,96.616,-7.851,0.000,-953.258,-563.825
x1,0.3783,0.048,7.912,0.000,0.282,0.475

0,1,2,3
Omnibus:,2.666,Durbin-Watson:,2.169
Prob(Omnibus):,0.264,Jarque-Bera (JB):,1.685
Skew:,0.413,Prob(JB):,0.431
Kurtosis:,3.445,Cond. No.,1860000.0


In [25]:
print_growth_rates(reg_results)

0.38 OOMs/year (95% CI: 0.28, 0.47)
2.4x/year (95% CI: 1.9x, 3.0x)
doubling time of 10 months (95% CI: 8, 13)


In [26]:
pred_start_year = 2015
pred_end_year = 2025
pred_start_date = f'{pred_start_year}-01-01'
pred_end_date = f'{pred_end_year}-01-01'

pred_years = pd.DataFrame({'Publication date (float)': np.linspace(pred_start_year, pred_end_year, 100)})
pred_years

Unnamed: 0,Publication date (float)
0,2015.00000
1,2015.10101
2,2015.20202
3,2015.30303
4,2015.40404
...,...
95,2024.59596
96,2024.69697
97,2024.79798
98,2024.89899


In [27]:

predicted_cost_df = get_predictions(reg_results, pred_years, ['Publication date (float)'])
predicted_cost_df['Publication date'] = predicted_cost_df['Publication date (float)'].apply(float_year_to_datetime)
predicted_cost_df

Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper,obs_ci_lower,obs_ci_upper,Publication date (float),Publication date
0,3.651030,0.303035,3.040302,4.261757,2.090831,5.211228,2015.00000,2015-01-01
1,3.689238,0.298510,3.087630,4.290845,2.132586,5.245889,2015.10101,2015-02-06
2,3.727446,0.293995,3.134938,4.319953,2.174288,5.280603,2015.20202,2015-03-15
3,3.765654,0.289490,3.182226,4.349082,2.215937,5.315370,2015.30303,2015-04-21
4,3.803862,0.284995,3.229492,4.378232,2.257533,5.350191,2015.40404,2015-05-28
...,...,...,...,...,...,...,...,...
95,7.280793,0.203678,6.870307,7.691278,5.787564,8.774021,2024.59596,2024-08-06
96,7.319001,0.207830,6.900146,7.737855,5.823450,8.814551,2024.69697,2024-09-12
97,7.357209,0.212011,6.929928,7.784489,5.859276,8.855141,2024.79798,2024-10-19
98,7.395417,0.216219,6.959656,7.831178,5.895043,8.895790,2024.89899,2024-11-25


# Plots

In [28]:
if compute_threshold_method == 'window_percentile':
    title_suffix = f' to train top {100 - compute_threshold}% most compute-intensive ML models'
elif compute_threshold_method == 'top_n':
    title_suffix = f' to train running top {compute_threshold} most compute-intensive ML models'

plot_title_lookup = {
    'cloud': 'Cloud compute cost' + title_suffix,
    'amortized': 'Amortized cost of hardware' + title_suffix,
    'up-front-server-capex': 'Acquisition cost of hardware' + title_suffix,
    'hardware-capex-opex': 'Hardware CapEx + OpEx' + title_suffix,
}

In [29]:
fig = px.scatter(
    cost_df,
    x='Publication date',
    y='Cost (inflation-adjusted)',
    text='System',
    log_y=True,
)
fig.update_traces(textposition='top center')

# no legend
fig.update_layout(showlegend=False)

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD)')

# title
fig.update_layout(title_text=plot_title_lookup[estimation_method])

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_scatter')

fig.show()

In [30]:
cost_df['Training hardware'].str.contains('TPU', na=False)

75       True
91      False
116     False
130     False
176     False
        ...  
1145    False
1146    False
1182    False
1186    False
1188    False
Name: Training hardware, Length: 67, dtype: bool

In [31]:
label_systems = ['GNMT', 'DALL-E', 'GPT-3 175B (davinci)', 'PaLM (540B)', 'Llama 2-70B', 'Falcon 180B', 'GPT-4', 'Gemini Ultra']

tpu_mask = cost_df['Training hardware'].str.contains('TPU', na=False)
tpu_cost_df = cost_df.loc[tpu_mask]
gpu_cost_df = cost_df.loc[~tpu_mask]

fig = px.scatter(
    gpu_cost_df,
    x='Publication date',
    y='Cost (inflation-adjusted)',
    log_y=True,
)
fig.add_scatter(
    x=tpu_cost_df['Publication date'],
    y=tpu_cost_df['Cost (inflation-adjusted)'],
    mode='markers',
    marker_symbol='circle-open',
    name='Using equivalent price of TPU',
)
fig.add_scatter(
    x=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['Publication date'],
    y=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['Cost (inflation-adjusted)'],
    text=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['System'],
    mode='text',
    showlegend=False,
)
fig.add_scatter(
    x=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['Publication date'],
    y=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['Cost (inflation-adjusted)'],
    text=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['System'],
    mode='text',
    showlegend=False,
)

# Marker color
fig.update_traces(
    marker=dict(
        color='rgb(0,100,200)',
    ),
    selector=dict(mode='markers'),
)

# Shade in CI
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean_ci_lower'],
    mode='lines',
    line=dict(width=0),
    showlegend=False,
)
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean_ci_upper'],
    mode='lines',
    fill='tonexty',
    fillcolor='rgba(0,100,200,0.2)',
    line=dict(width=0),
    name='95% CI of mean',
)
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean'],
    mode='lines',
    line=dict(color='rgb(0,100,200)'),
    name=f'Regression mean (growth rate: {10**reg_results.params[1]:.1f}x per year)',
)

fig.update_traces(textposition='top center')

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])
fig.update_yaxes(range=[1, 9])

# legend on top
fig.update_layout(legend=dict(
    orientation='h',
    yanchor='top',
    y=-0.15,
    xanchor='center',
    x=0.5,
))

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD)')

# title
fig.update_layout(title_text=plot_title_lookup[estimation_method])

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
    # title_x=0.5,
    # title font size
    title_font=dict(
        size=16,
    )
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=[pred_start_date, pred_end_date])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_regression')

fig.show()

# Export data

In [32]:
keep_cols = [
    'System',
    'Domain',
    'Task',
    'Model accessibility',
    'Reference',
    'Publication date',
    'Organization',
    'Parameters',
    'Training compute (FLOP)',
    'Training dataset size (datapoints)',
    'Epochs',
    'Training time (hours)',
    'Training hardware',
    'Country (from Organization)',
    'Base model',
    'Finetune compute (FLOP)',
    'Hardware quantity',
    'Hardware utilization',
    'Training cloud compute vendor',
    'Training data center',
    # 'Training time (chip hours)',
    'Cost',
    'Cost (inflation-adjusted)',
]
cost_df[keep_cols]

Unnamed: 0,System,Domain,Task,Model accessibility,Reference,Publication date,Organization,Parameters,Training compute (FLOP),Training dataset size (datapoints),...,Training hardware,Country (from Organization),Base model,Finetune compute (FLOP),Hardware quantity,Hardware utilization,Training cloud compute vendor,Training data center,Cost,Cost (inflation-adjusted)
75,Gemini Ultra,Multimodal,"Language modelling,Visual question answering,C...",Hosted access (no API),Gemini: A Family of Highly Capable Multimodal ...,2023-12-06 00:00:00,Google DeepMind,,5.000000e+25,,...,Google TPU v4,Multinational,,,55000.0,,,,8.687365e+07,8.687365e+07
91,Inflection-2,Language,Language modelling,Hosted access (no API),Inflection-2: The Next Step Up,2023-11-22 00:00:00,Inflection AI,,1.001000e+25,,...,NVIDIA H100 SXM5,United States of America,,,5000.0,,,,1.160021e+07,1.162737e+07
116,Grok-1,Language,"Language modelling,Chat",Open source,Announcing Grok,2023-11-04 00:00:00,xAI,3.140000e+11,2.900000e+24,,...,,United States of America,,,,,,,,
130,ChatGLM3,Multimodal,"Chat,Visual question answering",,Zhipu AI launches third-generation base model,2023-10-27 00:00:00,Zhipu AI,1.300000e+11,1.092000e+24,1.050000e+12,...,,China,,,,,,,,
176,Falcon-180B,Language,Language modelling,Open access (restricted use),The Falcon Series of Open Language Models,2023-09-06 00:00:00,Technology Innovation Institute,1.800000e+11,3.760000e+24,2.625000e+12,...,NVIDIA A100 SXM4 40 GB,United Arab Emirates,,,4096.0,0.1876,Amazon Web Services,,3.419020e+07,3.440205e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1145,Xception,Vision,Image classification,,Xception: Deep Learning with Depthwise Separab...,2016-10-07 00:00:00,Google,2.285595e+07,4.360000e+20,3.500000e+08,...,NVIDIA Tesla K80,United States of America,,,60.0,,,,2.562735e+04,2.798554e+04
1146,GNMT,Language,Translation,Hosted access (no API),Google's Neural Machine Translation System: Br...,2016-09-26 00:00:00,Google,2.780000e+08,6.900000e+21,3.600000e+08,...,NVIDIA Tesla K80,United States of America,,,96.0,,,,1.575738e+05,1.720735e+05
1182,AlphaGo Lee,Games,Go,,Mastering the game of Go with deep neural netw...,2016-01-27 00:00:00,DeepMind,,1.900000e+21,2.940000e+07,...,,United Kingdom of Great Britain and Northern I...,,,,,,,,
1186,ResNet-152 (ImageNet),Vision,Image classification,,Deep Residual Learning for Image Recognition,2015-12-10 00:00:00,Microsoft,6.000000e+07,1.210000e+19,1.280000e+06,...,,United States of America,,,,,,,,


In [33]:
cost_df[keep_cols].to_csv(results_dir + 'price dataset.csv', index=False)