In [1]:
from collections import defaultdict
import numpy as np
import pandas as pd
import plotly.graph_objects as go

from data import *
from hardware import *
from parameters import *
from regression import *

In [2]:
frontier_pcd_df, hardware_df, price_df = load_data_for_cost_estimation(compute_threshold_method="window_percentile", compute_threshold=0)
frontier_pcd_df

Unnamed: 0,System,Domain,Task,Authors,Notability criteria,Notability criteria notes,Model accessibility,Link,Citations,Reference,...,Organization categorization,Foundation model,Training compute lower bound,Training compute upper bound,Training chip-hours,Code accessibility,Dataset accessibility,Accessibility notes,Organization categorization (from Organization),Possibly over 1e23 FLOP
65,CogAgent,"Vision,Language","Instruction interpretation,Visual question ans...","Wenyi Hong, Weihan Wang, Qingsong Lv, Jiazheng...",SOTA improvement,See Table 1,Open access (restricted use),"https://arxiv.org/abs/2312.08914Wenyi Hong, We...",50.0,CogAgent: A Visual Language Model for GUI Agents,...,,,,,,Open source,,Code is Apache License 2.0; model is under a m...,,
66,FunSearch,"Language,Search",Code generation,"Bernardino Romera-Paredes, Mohammadamin Bareka...","SOTA improvement,Historical significance",Improved SOTA for the cap set problem. Can pla...,Open source,https://www.nature.com/articles/s41586-023-069...,67.0,Mathematical discoveries from program search w...,...,Industry,,,,,,,Code to run FunSearch with an LLM of your choi...,Industry,
75,Gemini Ultra,Multimodal,"Language modelling,Visual question answering,C...",Gemini Team,SOTA improvement,""" Evaluation on a broad range of benchmarks sh...",Hosted access (no API),https://storage.googleapis.com/deepmind-media/...,633.0,Gemini: A Family of Highly Capable Multimodal ...,...,Industry,,,,132000000.0,,,,Industry,
83,Qwen-72B,Language,"Chat,Code generation","Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Ka...",SOTA improvement,"SOTA on several Chinese benchmarks, with highe...",Open access (restricted use),https://huggingface.co/Qwen/Qwen-72B,,,...,Industry,,,,,Unreleased,Unreleased,up to 100m active users:\nhttps://github.com/Q...,Industry,
91,Inflection-2,Language,Language modelling,,Significant use,Inflection-2 either already powers Pi or soon ...,Hosted access (no API),https://inflection.ai/inflection-2,,Inflection-2: The Next Step Up,...,Industry,checked,,,,,,"via Pi, no API",Industry,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1182,AlphaGo Lee,Games,Go,"David Silver, Aja Huang, Chris J. Maddison, Ar...",Highly cited,,,https://www.nature.com/articles/nature16961,14887.0,Mastering the game of Go with deep neural netw...,...,Industry,,,,,,,,Industry,
1183,"Variational (untied weights, MC) LSTM (Large)",Language,,"Yarin Gal, Zoubin Ghahramani","Highly cited,SOTA improvement","""The new approach outperforms existing techniq...",,https://arxiv.org/abs/1512.05287?context=stat,1838.0,A Theoretically Grounded Application of Dropou...,...,Academia,,,,,,,,Academia,
1186,ResNet-152 (ImageNet),Vision,Image classification,"Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun",Highly cited,,,https://arxiv.org/abs/1512.03385,156882.0,Deep Residual Learning for Image Recognition,...,Industry,,,,,,,,Industry,
1188,DeepSpeech2 (English),Speech,Speech recognition,"Dario Amodei, Rishita Anubhai, Eric Battenberg...",Highly cited,,,https://arxiv.org/abs/1512.02595,2749.0,Deep Speech 2: End-to-End Speech Recognition i...,...,Industry,,,,301.0,,,,Industry,


In [6]:
hardware_aliases = ['A100', 'H100', 'P100', 'V100', 'TPU v4', 'TPU v3', 'TPU v2', 'TPU v1', 'K80', 'K40']
hardware_counts = defaultdict(int)
hardware_release_dates = {}
for hardware in frontier_pcd_df['Training hardware'].dropna():
    if "," in hardware: continue
    alias_found = False
    for alias in hardware_aliases:
        if alias in hardware:
            hardware_release_dates[alias] = get_release_date(hardware, hardware_df)
            hardware_counts[alias] += 1
            alias_found = True
    if not alias_found:
        hardware_counts[hardware] += 1

# Print counts in descending order
for hardware, count in sorted(hardware_counts.items(), key=lambda x: x[1], reverse=True):
    print(f'{hardware}: {count}')

A100: 47
V100: 42
TPU v3: 38
TPU v4: 16
TPU v2: 4
P100: 4
K80: 3
NVIDIA GTX Titan X: 3
NVIDIA Geforce GTX1080 Ti: 2
K40: 2
TPU v1: 2
NVIDIA M40: 2
NVIDIA GeForce GTX TITAN X: 2
H100: 1
NVIDIA A800: 1
Huawei Ascend 910: 1
NVIDIA Quadro RTX 4000: 1
NVIDIA RTX A6000: 1
NVIDIA Quadro RTX 8000: 1
NVIDIA Quadro RTX 5000: 1
NVIDIA GeForce RTX 2080 Ti: 1
NVIDIA TITAN Xp: 1
NVIDIA GeForce GTX 1080 Ti: 1
NVIDIA Quadro P600: 1


In [7]:
hardware_release_dates

{'TPU v4': Timestamp('2021-05-20 00:00:00'),
 'H100': Timestamp('2022-09-20 00:00:00'),
 'A100': Timestamp('2020-03-01 00:00:00'),
 'V100': Timestamp('2017-06-21 00:00:00'),
 'TPU v3': Timestamp('2018-01-01 00:00:00'),
 'TPU v2': Timestamp('2017-05-01 00:00:00'),
 'P100': Timestamp('2016-04-05 00:00:00'),
 'K40': Timestamp('2013-11-22 00:00:00'),
 'TPU v1': Timestamp('2015-05-15 00:00:00'),
 'K80': Timestamp('2014-11-17 00:00:00')}

In [None]:
# No TPUs
hardware_aliases = ['A100', 'H100', 'P100', 'V100', 'TPU v4', 'TPU v3', 'TPU v2', 'TPU v1', 'K80', 'K40', 'Titan X']
hardware_counts = defaultdict(int)
for hardware in frontier_pcd_df['Training hardware'].dropna():
    if 'TPU' in hardware:
        continue
    alias_found = False
    for alias in hardware_aliases:
        if alias in hardware:
            hardware_counts[alias] += 1
            alias_found = True
    if not alias_found:
        hardware_counts[hardware] += 1

# Print counts in descending order
for hardware, count in sorted(hardware_counts.items(), key=lambda x: x[1], reverse=True):
    print(f'{hardware}: {count}')

A100: 47
V100: 43
P100: 4
Titan X: 4
K80: 3
NVIDIA Geforce GTX1080 Ti: 2
K40: 2
NVIDIA M40: 2
NVIDIA GeForce GTX TITAN X: 2
H100: 1
NVIDIA A800: 1
Huawei Ascend 910: 1
NVIDIA Quadro RTX 4000: 1
NVIDIA RTX A6000: 1
NVIDIA Quadro RTX 8000: 1
NVIDIA Quadro RTX 5000: 1
NVIDIA GeForce RTX 2080 Ti: 1
NVIDIA TITAN Xp: 1
NVIDIA GeForce GTX 1080 Ti: 1
NVIDIA Quadro P600: 1


# Purchase prices

In [None]:
price_colname = 'Price (hardware purchase)'
purchase_price_df = price_df.dropna(subset=[price_colname])
purchase_price_df

Unnamed: 0,Price source,Price date,Hardware model,Manufacturer (from Hardware model),Vendor,Location,Price per chip-hour (on-demand),Price per chip-hour (1-year CUD),Price per chip-hour (3-year CUD),Price (hardware purchase),Notes
25,https://www.nextplatform.com/2022/05/09/how-mu...,2020-07-01,NVIDIA A100 PCIe,NVIDIA,,,,,,15000.0,Single-unit list price
26,https://www.nextplatform.com/2022/05/09/how-mu...,2022-07-01,NVIDIA A100 PCIe,NVIDIA,,,,,,12500.0,Single-unit list price
38,https://web.archive.org/web/20200521074015/htt...,2020-05-21,NVIDIA A100 SXM4 40 GB,NVIDIA,,,,,,24875.0,"DGX A100, 8 GPU, 320GB. Release price."
39,https://www.nextplatform.com/2022/05/09/how-mu...,2020-07-01,NVIDIA A100 SXM4 40 GB,NVIDIA,,,,,,15000.0,Single-unit list price
40,https://www.nextplatform.com/2022/05/09/how-mu...,2022-07-01,NVIDIA A100 SXM4 40 GB,NVIDIA,,,,,,12500.0,Single-unit list price
48,https://web.archive.org/web/20210630170623/htt...,2021-06-30,NVIDIA A100 SXM4 80 GB,NVIDIA,,,,,,37500.0,"DGX A100, 8 GPU, 640GB"
49,https://web.archive.org/web/20220120191032/htt...,2022-01-20,NVIDIA A100 SXM4 80 GB,NVIDIA,,,,,,20875.0,"DGX A100, 8 GPU, 640GB"
50,https://web.archive.org/web/20230923154035/htt...,2023-09-23,NVIDIA A100 SXM4 80 GB,NVIDIA,,,,,,18548.75,"HGX A100, 8 GPU, 640GB"
57,https://www.techpowerup.com/gpu-specs/geforce-...,2015-03-17,NVIDIA GTX Titan X,NVIDIA,,,,,,999.0,Single-unit release price
59,https://web.archive.org/web/20220929115124/htt...,2022-09-29,NVIDIA H100 SXM5,NVIDIA,,,,,,44489.88,"DGX, 8 GPU, 640GB. Convert Euro to USD: https:..."


In [None]:
# Multiply single unit prices by overhead factor
server_adjusted_purchase_price_df = pd.DataFrame(columns=purchase_price_df.columns)
for i, row in purchase_price_df.iterrows():
    if 'single-unit' in row['Notes'].lower():
        row[price_colname] *= get_server_cost_overhead(row['Hardware model'])
        row['Notes'] = row['Notes'].lower().replace('single-unit', 'server-equivalent')
    server_adjusted_purchase_price_df.loc[i] = row
server_adjusted_purchase_price_df

Unnamed: 0,Price source,Price date,Hardware model,Manufacturer (from Hardware model),Vendor,Location,Price per chip-hour (on-demand),Price per chip-hour (1-year CUD),Price per chip-hour (3-year CUD),Price (hardware purchase),Notes
25,https://www.nextplatform.com/2022/05/09/how-mu...,2020-07-01,NVIDIA A100 PCIe,NVIDIA,,,,,,24900.0,server-equivalent list price
26,https://www.nextplatform.com/2022/05/09/how-mu...,2022-07-01,NVIDIA A100 PCIe,NVIDIA,,,,,,20750.0,server-equivalent list price
38,https://web.archive.org/web/20200521074015/htt...,2020-05-21,NVIDIA A100 SXM4 40 GB,NVIDIA,,,,,,24875.0,"DGX A100, 8 GPU, 320GB. Release price."
39,https://www.nextplatform.com/2022/05/09/how-mu...,2020-07-01,NVIDIA A100 SXM4 40 GB,NVIDIA,,,,,,24900.0,server-equivalent list price
40,https://www.nextplatform.com/2022/05/09/how-mu...,2022-07-01,NVIDIA A100 SXM4 40 GB,NVIDIA,,,,,,20750.0,server-equivalent list price
48,https://web.archive.org/web/20210630170623/htt...,2021-06-30,NVIDIA A100 SXM4 80 GB,NVIDIA,,,,,,37500.0,"DGX A100, 8 GPU, 640GB"
49,https://web.archive.org/web/20220120191032/htt...,2022-01-20,NVIDIA A100 SXM4 80 GB,NVIDIA,,,,,,20875.0,"DGX A100, 8 GPU, 640GB"
50,https://web.archive.org/web/20230923154035/htt...,2023-09-23,NVIDIA A100 SXM4 80 GB,NVIDIA,,,,,,18548.75,"HGX A100, 8 GPU, 640GB"
57,https://www.techpowerup.com/gpu-specs/geforce-...,2015-03-17,NVIDIA GTX Titan X,NVIDIA,,,,,,1638.36,server-equivalent release price
59,https://web.archive.org/web/20220929115124/htt...,2022-09-29,NVIDIA H100 SXM5,NVIDIA,,,,,,44489.88,"DGX, 8 GPU, 640GB. Convert Euro to USD: https:..."


In [None]:
# Log-linear contours decaying at -0.14 OOMs per year
t = np.arange(2012, 2026)
contours = []
for initial_price in np.arange(3, 10, 0.1):
    contours.append(10**initial_price * 10**(-0.1 * (t - 2012)))

In [None]:
fig = go.Figure()
for hardware_alias in hardware_aliases:
    print(hardware_alias)
    _hardware_df = server_adjusted_purchase_price_df[server_adjusted_purchase_price_df['Hardware model'].str.contains(hardware_alias)]
    fig.add_trace(go.Scatter(
        x=_hardware_df['Price date'],
        y=_hardware_df[price_colname],
        mode='markers',
        name=hardware_alias,
        text=_hardware_df['Hardware model'] + ' | ' + _hardware_df['Notes'],
    ))

for i in range(len(contours)):
    fig.add_trace(go.Scatter(
        x=[pd.to_datetime(f"{year}-01-01") for year in t],
        y=contours[i],
        mode='lines',
        line=dict(color='rgb(200, 200, 200)', width=1),
        name=f'-0.1 OOMs/year',
        showlegend=True if i == 0 else False,
    ))

# y limits
fig.update_yaxes(range=[3, 5])

fig.update_layout(
    title='Server-equivalent purchase price vs. performance',
    xaxis_title='Price date',
    yaxis_title='Server-equivalent purchase price ($)',
    yaxis_type='log',
    width=800,
    height=600,
)
fig.show()

A100
H100
P100
V100
TPU v4
TPU v3
TPU v2
TPU v1
K80
K40
Titan X


In [None]:
# Drop thenextplatform rows
no_nextplatform_df = server_adjusted_purchase_price_df[~server_adjusted_purchase_price_df['Price source'].str.contains('nextplatform')]

In [None]:
fig = go.Figure()
for hardware_alias in hardware_aliases:
    _hardware_df = no_nextplatform_df[no_nextplatform_df['Hardware model'].str.contains(hardware_alias)]
    fig.add_trace(go.Scatter(
        x=_hardware_df['Price date'],
        y=_hardware_df[price_colname],
        mode='markers',
        name=hardware_alias,
        text=no_nextplatform_df['Hardware model'] + ' | ' + _hardware_df['Notes'],
    ))
fig.update_layout(
    title='Server-equivalent purchase price vs. performance',
    xaxis_title='Price date',
    yaxis_title='Server-equivalent purchase price ($)',
    # yaxis_type='log',
    width=800,
    height=600,
)
fig.show()

# Training time

In [None]:
frontier_pcd_df, hardware_df, price_df = load_data_for_cost_estimation(compute_threshold_method="window_percentile", compute_threshold=0)
frontier_pcd_df

Unnamed: 0,System,Domain,Task,Authors,Notability criteria,Notability criteria notes,Model accessibility,Link,Citations,Reference,...,Organization categorization,Foundation model,Training compute lower bound,Training compute upper bound,Training chip-hours,Code accessibility,Dataset accessibility,Accessibility notes,Organization categorization (from Organization),Possibly over 1e23 FLOP
65,CogAgent,"Vision,Language","Instruction interpretation,Visual question ans...","Wenyi Hong, Weihan Wang, Qingsong Lv, Jiazheng...",SOTA improvement,See Table 1,Open access (restricted use),"https://arxiv.org/abs/2312.08914Wenyi Hong, We...",50.0,CogAgent: A Visual Language Model for GUI Agents,...,,,,,,Open source,,Code is Apache License 2.0; model is under a m...,,
66,FunSearch,"Language,Search",Code generation,"Bernardino Romera-Paredes, Mohammadamin Bareka...","SOTA improvement,Historical significance",Improved SOTA for the cap set problem. Can pla...,Open source,https://www.nature.com/articles/s41586-023-069...,67.0,Mathematical discoveries from program search w...,...,Industry,,,,,,,Code to run FunSearch with an LLM of your choi...,Industry,
75,Gemini Ultra,Multimodal,"Language modelling,Visual question answering,C...",Gemini Team,SOTA improvement,""" Evaluation on a broad range of benchmarks sh...",Hosted access (no API),https://storage.googleapis.com/deepmind-media/...,633.0,Gemini: A Family of Highly Capable Multimodal ...,...,Industry,,,,132000000.0,,,,Industry,
83,Qwen-72B,Language,"Chat,Code generation","Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Ka...",SOTA improvement,"SOTA on several Chinese benchmarks, with highe...",Open access (restricted use),https://huggingface.co/Qwen/Qwen-72B,,,...,Industry,,,,,Unreleased,Unreleased,up to 100m active users:\nhttps://github.com/Q...,Industry,
91,Inflection-2,Language,Language modelling,,Significant use,Inflection-2 either already powers Pi or soon ...,Hosted access (no API),https://inflection.ai/inflection-2,,Inflection-2: The Next Step Up,...,Industry,checked,,,,,,"via Pi, no API",Industry,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1182,AlphaGo Lee,Games,Go,"David Silver, Aja Huang, Chris J. Maddison, Ar...",Highly cited,,,https://www.nature.com/articles/nature16961,14887.0,Mastering the game of Go with deep neural netw...,...,Industry,,,,,,,,Industry,
1183,"Variational (untied weights, MC) LSTM (Large)",Language,,"Yarin Gal, Zoubin Ghahramani","Highly cited,SOTA improvement","""The new approach outperforms existing techniq...",,https://arxiv.org/abs/1512.05287?context=stat,1838.0,A Theoretically Grounded Application of Dropou...,...,Academia,,,,,,,,Academia,
1186,ResNet-152 (ImageNet),Vision,Image classification,"Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun",Highly cited,,,https://arxiv.org/abs/1512.03385,156882.0,Deep Residual Learning for Image Recognition,...,Industry,,,,,,,,Industry,
1188,DeepSpeech2 (English),Speech,Speech recognition,"Dario Amodei, Rishita Anubhai, Eric Battenberg...",Highly cited,,,https://arxiv.org/abs/1512.02595,2749.0,Deep Speech 2: End-to-End Speech Recognition i...,...,Industry,,,,301.0,,,,Industry,


In [None]:
training_time_df = frontier_pcd_df.dropna(subset=['Training time (hours)'])
training_time_df

Unnamed: 0,System,Domain,Task,Authors,Notability criteria,Notability criteria notes,Model accessibility,Link,Citations,Reference,...,Organization categorization,Foundation model,Training compute lower bound,Training compute upper bound,Training chip-hours,Code accessibility,Dataset accessibility,Accessibility notes,Organization categorization (from Organization),Possibly over 1e23 FLOP
66,FunSearch,"Language,Search",Code generation,"Bernardino Romera-Paredes, Mohammadamin Bareka...","SOTA improvement,Historical significance",Improved SOTA for the cap set problem. Can pla...,Open source,https://www.nature.com/articles/s41586-023-069...,67.0,Mathematical discoveries from program search w...,...,Industry,,,,,,,Code to run FunSearch with an LLM of your choi...,Industry,
75,Gemini Ultra,Multimodal,"Language modelling,Visual question answering,C...",Gemini Team,SOTA improvement,""" Evaluation on a broad range of benchmarks sh...",Hosted access (no API),https://storage.googleapis.com/deepmind-media/...,633.0,Gemini: A Family of Highly Capable Multimodal ...,...,Industry,,,,132000000.0,,,,Industry,
96,Nemotron-3-8B,Language,"Chat,Language generation",,SOTA improvement,"""The Nemotron-3-8B-QA model offers state-of-th...",Open access (restricted use),https://developer.nvidia.com/blog/nvidia-ai-fo...,,NVIDIA AI Foundation Models: Build Custom Ente...,...,Industry,,,,,,,can't use to train other models:\n\nhttps://de...,Industry,
102,MultiBand Diffusion,Audio,Audio generation,"Robin San Roman, Yossi Adi, Antoine Deleforge,...",SOTA improvement,"""At equal bit rate, the proposed approach outp...",Open source,https://arxiv.org/abs/2308.02560,2.0,From Discrete Tokens to High-Fidelity Audio Us...,...,"Industry,Academia,Academia",,,,,Open source,,MIT for weights and code,"Industry,Academia,Academia",
115,LLaVA 1.5,"Multimodal,Language,Vision","Chat,Question answering,Visual question answering","Haotian Liu, Chunyuan Li, Yuheng Li, Yong Jae Lee",SOTA improvement,"from abstract: ""we establish stronger baseline...",Open access (restricted use),"https://arxiv.org/abs/2310.03744,\nhttps://hug...",504.0,Improved Baselines with Visual Instruction Tuning,...,"Academia,Industry",,,,192.0,,,Llama 2 license,"Academia,Industry",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1139,BIDAF,Language,Question answering,"Minjoon Seo, Aniruddha Kembhavi, Ali Farhadi, ...","Highly cited,SOTA improvement","""Our experimental evaluations show that our mo...",Open source,https://arxiv.org/abs/1611.01603v6,2246.0,Bidirectional Attention Flow for Machine Compr...,...,"Academia,Research collective",,,,480.0,Open source,Open source,apache 2.0: https://github.com/allenai/bi-att-...,"Academia,Research collective",
1145,Xception,Vision,Image classification,François Chollet,Highly cited,,,https://arxiv.org/abs/1610.02357,11578.0,Xception: Deep Learning with Depthwise Separab...,...,Industry,,,,43200.0,,,,Industry,
1146,GNMT,Language,Translation,"Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc ...",Highly cited,,Hosted access (no API),https://arxiv.org/abs/1609.08144,6196.0,Google's Neural Machine Translation System: Br...,...,Industry,,,,414720.0,,,presumably deployed via Google translate,Industry,
1172,Part-of-sentence tagging model,Language,Part-of-speech tagging,"Xuehe Ma, Eduard Hovy",Highly cited,,,https://arxiv.org/abs/1603.01354,3193.0,End-to-end Sequence Labeling via Bi-directiona...,...,Academia,,,,12.0,,,,Academia,


In [None]:
training_time_df['Publication date (float)'] = datetime_to_float_year(pd.to_datetime(training_time_df['Publication date']))
reg_results = fit_ols_regression(training_time_df, ['Publication date (float)'], 'Training time (hours)', logy=True)
reg_results.summary()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0,1,2,3
Dep. Variable:,y,R-squared:,0.043
Model:,OLS,Adj. R-squared:,0.036
Method:,Least Squares,F-statistic:,5.875
Date:,"Fri, 10 May 2024",Prob (F-statistic):,0.0167
Time:,15:00:28,Log-Likelihood:,-139.44
No. Observations:,132,AIC:,282.9
Df Residuals:,130,BIC:,288.6
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-148.6952,62.329,-2.386,0.018,-272.005,-25.385
x1,0.0747,0.031,2.424,0.017,0.014,0.136

0,1,2,3
Omnibus:,35.341,Durbin-Watson:,1.977
Prob(Omnibus):,0.0,Jarque-Bera (JB):,86.651
Skew:,-1.052,Prob(JB):,1.53e-19
Kurtosis:,6.366,Cond. No.,2060000.0


In [None]:
# Plot training time
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=training_time_df['Publication date'],
    y=training_time_df['Training time (hours)'],
    mode='markers',
    text=training_time_df['System']
))
fig.update_layout(
    title='Training times of notable models',
    xaxis_title='Publication date',
    yaxis_title='Training time (hours)',
    yaxis_type='log',
    width=800,
    height=600,
)
fig.show()

In [None]:
np.median(training_time_df['Training time (hours)'])

279.0

In [None]:
np.percentile(training_time_df['Training time (hours)'], 10)

28.130000000000006

In [None]:
np.percentile(training_time_df['Training time (hours)'], 90)

1440.0

In [None]:
training_time_df['Training time (hours)'].notna().sum()

132

# Hardware depreciation

In [None]:
import plotly.graph_objects as go

# Values of x
x_values = [x * 0.1 for x in range(0, 61)]

# Calculating the values of each function
exponential_values = [10**(-0.14*x) for x in x_values]
exponential_slow = [10**(-0.10*x) for x in x_values]
exponential_fast = [10**(-0.18*x) for x in x_values]
linear_3_values = [1 - x/3 for x in x_values]
linear_4_values = [1 - x/4 for x in x_values]
linear_5_values = [1 - x/5 for x in x_values]
linear_6_values = [1 - x/6 for x in x_values]

# Creating the plot
fig = go.Figure()

# Adding each line plot to the figure
fig.add_trace(go.Scatter(x=x_values, y=exponential_values, mode='lines', name='Exponential: price performance trend'))
fig.add_trace(go.Scatter(x=x_values, y=exponential_fast, mode='lines', line=dict(width=0), showlegend=False,))
fig.add_trace(go.Scatter(x=x_values, y=exponential_slow, mode='lines', name='90% CI', line=dict(width=0), fill='tonexty', fillcolor='rgba(0,100,200,0.2)',))
fig.add_trace(go.Scatter(x=x_values, y=linear_3_values, mode='lines', line=dict(dash='dot'), name='Linear: 3-year lifetime'))
fig.add_trace(go.Scatter(x=x_values, y=linear_4_values, mode='lines', line=dict(dash='dot'), name='Linear: 4-year lifetime'))
fig.add_trace(go.Scatter(x=x_values, y=linear_5_values, mode='lines', line=dict(dash='dot'), name='Linear: 5-year lifetime'))
fig.add_trace(go.Scatter(x=x_values, y=linear_6_values, mode='lines', line=dict(dash='dot'), name='Linear: 6-year lifetime'))

# Setting the axes limits
fig.update_layout(
    width=800,
    height=600,
    xaxis=dict(range=[0, 6]),
    yaxis=dict(range=[0, 1]),
    title='Comparison of exponential and linear depreciation functions',
    xaxis_title='Years',
    yaxis_title='Value (normalized to 1 at year 0)'
)

# Display the plot
fig.show()

In [None]:
frontier_pcd_df, _hardware_df, price_df = load_data_for_cost_estimation(compute_threshold_method="top_n", compute_threshold=10)

In [None]:
gaps = []
for _, row in frontier_pcd_df.iterrows():
    hardware_model = row['Training hardware']
    try:
        hardware_release_date = get_release_date(hardware_model, _hardware_df)
    except:
        continue
    pub_date = pd.to_datetime(row['Publication date'])
    gap = pub_date - hardware_release_date
    gaps.append(gap.days)
    print(f"{row['System']}, {hardware_model}: {gap.days} days")


Gemini Ultra, Google TPU v4: 930 days
Inflection-2, NVIDIA H100 SXM5: 610 days
Falcon-180B, NVIDIA A100 SXM4 40 GB: 1210 days
Llama 2-70B, NVIDIA A100 SXM4 80 GB: 974 days
xTrimoPGLM -100B, NVIDIA A100 SXM4 40 GB: 1148 days
PaLM 2, Google TPU v4: 720 days
GPT-4, NVIDIA A100 SXM4 40 GB: 1035 days
LLaMA-65B, NVIDIA A100: 1090 days
GPT-3.5 (text-davinci-003), NVIDIA A100 SXM4 40 GB: 928 days
BLOOM-176B, NVIDIA A100 SXM4 80 GB: 722 days
U-PaLM (540B), Google TPU v4: 518 days
BlenderBot 3, NVIDIA A100 SXM4 40 GB: 818 days
GLM-130B, NVIDIA A100 SXM4 40 GB: 812 days
Minerva (540B), Google TPU v4: 405 days
Parti, Google TPU v4: 398 days
OPT-175B, NVIDIA A100 SXM4 80 GB: 532 days
PaLM (540B), Google TPU v4: 319 days
LaMDA, Google TPU v3: 1501 days
GLaM, Google TPU v4: 207 days
Gopher (280B), Google TPU v3: 1437 days
Megatron-Turing NLG 530B, NVIDIA A100 SXM4 80 GB: 329 days
HyperCLOVA, NVIDIA A100: 558 days
GOAT, Google TPU v3: 1303 days
ByT5-XXL, Google TPU v3: 1243 days
ProtT5-XXL, Google TPU

In [None]:
gaps = np.array(gaps)
gaps.mean()

763.6595744680851

In [None]:
len(gaps)

47

In [None]:
len(gaps[gaps < 2 * DAYS_PER_YEAR])

24

In [None]:
len(gaps[gaps < 3 * DAYS_PER_YEAR])

37

In [None]:
len(gaps[gaps < 4 * DAYS_PER_YEAR])

46