In [1]:
import numpy as np
import os
import pandas as pd
import plotly.express as px

from plotting import *
from utils import *

In [2]:
# Load two results CSVs
estimation_method = 'hardware-capex-opex'
compute_threshold_method = 'top_n'  # top_n, window_percentile
compute_threshold = 10  # e.g. 10 to select top 10; 75 to select top 25%
variant1 = 'original'
variant2 = 'depreciation-update'

results_dir1 = f'results/{estimation_method}-{compute_threshold_method}={compute_threshold}-{variant1}/'
results_dir2 = f'results/{estimation_method}-{compute_threshold_method}={compute_threshold}-{variant2}/'

results_dir = f'results/{estimation_method}-{compute_threshold_method}={compute_threshold}-{variant1}-vs-{variant2}/'
os.makedirs(results_dir, exist_ok=True)

In [3]:
cost_df = pd.read_csv(results_dir1 + 'price dataset.csv')
cost_df2 = pd.read_csv(results_dir2 + 'price dataset.csv')

In [4]:
cost_df[variant1] = cost_df['Cost (inflation-adjusted)']
cost_df[variant2] = cost_df2['Cost (inflation-adjusted)']
cost_df

Unnamed: 0,System,Domain,Task,Model accessibility,Reference,Publication date,Organization,Parameters,Training compute (FLOP),Training dataset size (datapoints),...,Base model,Finetune compute (FLOP),Hardware quantity,Hardware utilization,Training cloud compute vendor,Training data center,Cost,Cost (inflation-adjusted),original,depreciation-update
0,Gemini Ultra,Multimodal,"Language modelling,Visual question answering,C...",Hosted access (no API),Gemini: A Family of Highly Capable Multimodal ...,2023-12-06,Google DeepMind,,5.000000e+25,,...,,,55000.0,,,,8.897229e+07,8.897229e+07,8.897229e+07,9.590527e+07
1,Inflection-2,Language,Language modelling,Hosted access (no API),Inflection-2: The Next Step Up,2023-11-22,Inflection AI,,1.001000e+25,,...,,,5000.0,,,,1.166191e+07,1.168921e+07,1.168921e+07,1.828799e+07
2,Grok-1,Language,"Language modelling,Chat",Open source,Announcing Grok,2023-11-04,xAI,3.140000e+11,2.900000e+24,,...,,,,,,,,,,
3,ChatGLM3,Multimodal,"Chat,Visual question answering",,Zhipu AI launches third-generation base model,2023-10-27,Zhipu AI,1.300000e+11,1.092000e+24,1.050000e+12,...,,,,,,,,,,
4,Falcon-180B,Language,Language modelling,Open access (restricted use),The Falcon Series of Open Language Models,2023-09-06,Technology Innovation Institute,1.800000e+11,3.760000e+24,2.625000e+12,...,,,4096.0,0.1876,Amazon Web Services,,2.033419e+07,2.046019e+07,2.046019e+07,1.436562e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,Xception,Vision,Image classification,,Xception: Deep Learning with Depthwise Separab...,2016-10-07,Google,2.285595e+07,4.360000e+20,3.500000e+08,...,,,60.0,,,,1.679176e+04,1.833691e+04,1.833691e+04,1.749060e+04
63,GNMT,Language,Translation,Hosted access (no API),Google's Neural Machine Translation System: Br...,2016-09-26,Google,2.780000e+08,6.900000e+21,3.600000e+08,...,,,96.0,,,,1.612009e+05,1.760344e+05,1.760344e+05,1.914648e+05
64,AlphaGo Lee,Games,Go,,Mastering the game of Go with deep neural netw...,2016-01-27,DeepMind,,1.900000e+21,2.940000e+07,...,,,,,,,,,,
65,ResNet-152 (ImageNet),Vision,Image classification,,Deep Residual Learning for Image Recognition,2015-12-10,Microsoft,6.000000e+07,1.210000e+19,1.280000e+06,...,,,,,,,,,,


In [22]:
# Relative error stats
ratios = cost_df[variant1] / cost_df[variant2]
print_median_and_ci(ratios.dropna())
print(ratios.mean())

Median: 1 [90% CI: 0.61, 1.7]
1.0889389484755478


In [20]:
cost_df.iloc[ratios.argmin()]

System                                                                    Megatron-BERT
Domain                                                                         Language
Task                                                                                NaN
Model accessibility                                                          Unreleased
Reference                             Megatron-LM: Training Multi-Billion Parameter ...
Publication date                                                             2019-09-17
Organization                                                                     NVIDIA
Parameters                                                                 3900000000.0
Training compute (FLOP)                                       60270000000000000524288.0
Training dataset size (datapoints)                                        34800000000.0
Epochs                                                                              NaN
Training time (hours)           

In [21]:
fig = px.scatter(
    cost_df,
    x='Publication date',
    y=[variant1, variant2],
    # text='System',
    log_y=True,
)
fig.update_traces(textposition='top center')

# legend
fig.update_layout(
    legend_title_text='Estimate',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=0.05,
        xanchor='right',
        x=0.95,
    ),
)

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD)')

# title
fig.update_layout(title_text=get_cost_plot_title(estimation_method, compute_threshold_method, compute_threshold))

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
    title_font=dict(
        size=16,
    )
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_scatter_comparison')

fig.show()