# Setup

In [556]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [557]:
from contextlib import redirect_stdout
import numpy as np
import os
import pandas as pd
import plotly.express as px

from cost import *
from plotting import *
from prices import *
from imputation import *
from inflation import *
from regression import *
from utils import *

In [558]:
estimation_method = 'hardware-capex-opex'
compute_threshold_method = 'residual_from_trend'  # top_n, window_percentile, backward_window_percentile, residual_from_trend
compute_threshold = 80  # e.g. 10 to select top 10; 75 to select top 25%
variant = 'original'
exclude_models_containing = []

estimation_method_lookup = {
    'cloud': estimate_cloud_costs,
    'amortized': estimate_amortized_hardware_costs,
    'hardware-acquisition': estimate_hardware_acquisition_cost,
    'hardware-capex-opex': estimate_hardware_capex_opex,
}
cost_estimation_function = estimation_method_lookup[estimation_method]

results_dir = f'results/{estimation_method}-{compute_threshold_method}={compute_threshold}-{variant}/'
os.makedirs(results_dir, exist_ok=True)

# Load data

In [559]:
frontier_pcd_df, hardware_df, price_df = load_data_for_cost_estimation(
    compute_threshold_method=compute_threshold_method, compute_threshold=compute_threshold,
)

In [560]:
len(frontier_pcd_df), len(hardware_df), len(price_df)

(73, 5510, 142)

# Cost estimation

In [561]:
with open(f'{results_dir}/cost_estimation.out', 'w') as f:
    with redirect_stdout(f):
        cost_df = cost_estimation_function(frontier_pcd_df, hardware_df, price_df)

In [562]:
if estimation_method == 'hardware-capex-opex':
    frontier_pcd_df_copy = frontier_pcd_df.copy()
    with open(f'{results_dir}/component_cost_estimation.out', 'w') as f:
        with redirect_stdout(f):
            component_cost_df = cost_estimation_function(frontier_pcd_df_copy, hardware_df, price_df, separate_components=True)

In [563]:
cost_df

Unnamed: 0,System,Domain,Task,Authors,Notability criteria,Notability criteria notes,Model accessibility,Link,Citations,Reference,...,Foundation model,Training compute lower bound,Training compute upper bound,Training chip-hours,Code accessibility,Dataset accessibility,Accessibility notes,Organization categorization (from Organization),Possibly over 1e23 FLOP,Cost
5,Llama 3-70B,Language,"Chat,Language modelling/generation,Code genera...",Aaditya Singh; Aaron Grattafiori; Abhimanyu Du...,Significant use,Will almost certainly be very influential and ...,Open access (restricted use),https://ai.meta.com/blog/meta-llama-3/\r\n\r\n...,,Introducing Meta Llama 3: The most capable ope...,...,,,,6400000.0,Open access (restricted use),,https://github.com/meta-llama/llama3/blob/main...,Industry,checked,8.778959e+06
22,MM1-30B,"Multimodal,Language,Vision","Chat,Image captioning","Brandon McKinzie, Zhe Gan, Jean-Philippe Fauco...",SOTA improvement,""" In particular, the pretrained model MM1 is S...",Unreleased,https://arxiv.org/abs/2403.09611,11.0,"MM1: Methods, Analysis & Insights from Multimo...",...,,,,,Unreleased,Unreleased,,Industry,,
23,Inflection-2.5,Language,Chat,,Significant use,one million daily users; six million monthly,Hosted access (no API),https://inflection.ai/inflection-2-5,,Inflection-2.5: meet the world's best personal AI,...,,,,,,,,Industry,,1.072551e+07
38,MegaScale (Production),Language,Language modelling/generation,"Ziheng Jiang, Haibin Lin, Yinmin Zhong, Qi Hua...",SOTA improvement,Improves SOTA in FLOP utilization for distribu...,Unreleased,https://arxiv.org/abs/2402.15627,1.0,MegaScale: Scaling Large Language Model Traini...,...,,,,,,,Code for MegaScale (also called veScale) train...,"Industry,Academia",,2.410008e+06
72,FunSearch,"Language,Search",Code generation,"Bernardino Romera-Paredes, Mohammadamin Bareka...","SOTA improvement,Historical significance",Improved SOTA for the cap set problem. Can pla...,Open source,https://www.nature.com/articles/s41586-023-069...,67.0,Mathematical discoveries from program search w...,...,,,,,,,Code to run FunSearch with an LLM of your choi...,Industry,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1475,Linear Decision Functions,,,W. Highleyman,"Historical significance,Highly cited",,,https://ieeexplore.ieee.org/document/4066882?d...,,"Linear Decision Functions, with Application to...",...,,,,,,,,Industry,,
1480,Perceptron (1960),Vision,Image classification,Frank Rosenblatt,Historical significance,,,https://www.semanticscholar.org/paper/Perceptr...,394.0,Perceptron Simulation Experiments,...,,,,,,,,Academia,,
1482,Samuel Neural Checkers,Games,Checkers,Arthur L. Samuel,Highly cited,,,https://ieeexplore.ieee.org/abstract/document/...,4509.0,Some studies in machine learning using the gam...,...,,,,,,,,Industry,,
1483,Pandemonium (morse),Language,Morse translation,OG Selfridge,Highly cited,,,https://aitopics.org/doc/classics:504E1BAC/,1453.0,Pandemonium: A Paradigm for Learning,...,,,,,,,,Academia,,


In [564]:
cost_df['Cost'].notna().sum()

50

Exclusion

In [565]:
for kw in exclude_models_containing:
    cost_df = cost_df[cost_df['System'].str.contains(kw) == False]
list(cost_df['System'])

['Llama 3-70B',
 'MM1-30B',
 'Inflection-2.5',
 'MegaScale (Production)',
 'FunSearch',
 'Gemini Ultra',
 'Qwen-72B',
 'Inflection-2',
 'Grok-1',
 'Yi-34B',
 'ChatGLM3',
 'Falcon-180B',
 'Llama 2-70B',
 'Claude 2',
 'xTrimoPGLM -100B',
 'PaLM 2',
 'BloombergGPT',
 'PanGu-Σ',
 'GPT-4',
 'Falcon-40B',
 'LLaMA-65B',
 'ViT-22B',
 'GPT-3.5 (text-davinci-003)',
 'Galactica',
 'BLOOM-176B',
 'U-PaLM (540B)',
 'Flan-PaLM 540B',
 'BlenderBot 3',
 'GLM-130B',
 'AlexaTM 20B',
 'Minerva (540B)',
 'Parti',
 'UL2',
 'OPT-175B',
 'Flamingo',
 'PaLM (540B)',
 'Chinchilla',
 'ST-MoE',
 'LaMDA',
 'GPT-NeoX-20B',
 'AlphaCode',
 'ERNIE 3.0 Titan',
 'GLaM',
 'Gopher (280B)',
 'Yuan 1.0',
 'Megatron-Turing NLG 530B',
 'HyperCLOVA',
 'GOAT',
 'ByT5-XXL',
 'ProtT5-XXL',
 'Meta Pseudo Labels',
 'Switch',
 'DALL-E',
 'mT5-XXL',
 'iGPT-XL',
 'GPT-3 175B (davinci)',
 'Meena',
 'OpenAI Five',
 'AlphaStar',
 'T5-11B',
 'Megatron-BERT',
 'ResNeXt-101 32x48d',
 'AlphaZero',
 'AlphaGo Zero',
 'AlphaGo Master',
 'GNMT'

Use the below to check data availability for specific systems

In [566]:
# system = 'WizardLM-7B'
# row = cost_df.loc[cost_df['System'] == system]
# print('Cost:', row['Cost'].values[0])
# print('Training hardware:', row['Training hardware'].values[0])
# print('Training time (hours):', row['Training time (hours)'].values[0])
# print('Hardware quantity:', row['Hardware quantity'].values[0])
# print('Hardware utilization:', row['Hardware utilization'].values[0])

# Apply inflation adjustment

In [567]:
cost_df['Cost'].dropna()

5       8.778959e+06
23      1.072551e+07
38      2.410008e+06
82      6.126676e+07
98      1.171711e+07
184     9.362940e+06
223     9.938486e+05
235     1.646685e+06
278     9.684345e+06
311     3.332574e+05
319     3.739479e+07
320     2.882908e+05
335     5.180867e+05
341     5.689450e+05
379     4.222085e+06
385     5.260542e+05
390     8.022369e+05
405     5.941722e+06
407     5.917571e+06
430     1.303369e+06
431     7.312196e+05
432     2.386147e+05
453     6.361336e+06
459     6.876996e+05
493     2.539170e+05
496     6.531276e+05
497     3.670219e+05
506     5.906844e+06
528     3.770427e+05
530     1.618202e+05
557     1.077455e+06
561     1.024991e+06
602     3.250046e+06
609     5.145633e+05
629     1.419045e+05
651     1.547803e+05
659     1.436324e+05
697     9.048363e+04
713     2.343297e+05
716     1.037081e+05
778     8.527521e+04
785     1.820445e+06
825     3.480160e+05
852     2.091735e+05
856     1.255628e+05
870     6.613550e+05
1055    6.833213e+05
1071    1.036

In [568]:
cost_df = adjust_column_for_inflation(cost_df, 'Cost', 'data/PCU518210518210.csv', '2023-12-01')

In [569]:
cost_df['Cost (inflation-adjusted)'].dropna()

5       8.723917e+06
23      1.070554e+07
38      2.400127e+06
82      6.126676e+07
98      1.174444e+07
184     9.420876e+06
223     9.995083e+05
235     1.656062e+06
278     9.783886e+06
311     3.368315e+05
319     3.779583e+07
320     2.913827e+05
335     5.246990e+05
341     5.762063e+05
379     4.306936e+06
385     5.366264e+05
390     8.183594e+05
405     6.061445e+06
407     6.036806e+06
430     1.329358e+06
431     7.457999e+05
432     2.433726e+05
453     6.481348e+06
459     7.006736e+05
493     2.579324e+05
496     6.634560e+05
497     3.730966e+05
506     6.004609e+06
528     3.897914e+05
530     1.672917e+05
557     1.111858e+06
561     1.057719e+06
602     3.352803e+06
609     5.317725e+05
629     1.464025e+05
651     1.600638e+05
659     1.485353e+05
697     9.349111e+04
713     2.431737e+05
716     1.076222e+05
778     8.895901e+04
785     1.910809e+06
825     3.656134e+05
852     2.228981e+05
856     1.338014e+05
870     7.053802e+05
1055    7.387380e+05
1071    1.120

In [570]:
assert cost_df['Cost (inflation-adjusted)'].notna().sum() == cost_df['Cost'].notna().sum()

# Regression

In [571]:
cost_df['Publication date (float)'] = datetime_to_float_year(pd.to_datetime(cost_df['Publication date']))

In [572]:
reg_results = fit_ols_regression(cost_df, ['Publication date (float)'], 'Cost (inflation-adjusted)', logy=True)
reg_results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.234
Model:,OLS,Adj. R-squared:,0.218
Method:,Least Squares,F-statistic:,14.64
Date:,"Fri, 17 May 2024",Prob (F-statistic):,0.000376
Time:,15:15:44,Log-Likelihood:,-46.378
No. Observations:,50,AIC:,96.76
Df Residuals:,48,BIC:,100.6
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-378.8804,100.585,-3.767,0.000,-581.120,-176.641
x1,0.1903,0.050,3.826,0.000,0.090,0.290

0,1,2,3
Omnibus:,5.321,Durbin-Watson:,1.651
Prob(Omnibus):,0.07,Jarque-Bera (JB):,3.163
Skew:,0.416,Prob(JB):,0.206
Kurtosis:,2.091,Cond. No.,2300000.0


In [573]:
with open(f'{results_dir}/regression_results.out', 'w') as f:
    with redirect_stdout(f):
        print_growth_rates(reg_results)
print_growth_rates(reg_results)

N=50.0
R^2=0.23
0.19 OOMs/year (95% CI: 0.09, 0.29)
1.6x/year (95% CI: 1.2x, 2.0x)
doubling time of 19 months (95% CI: 12, 40)


In [574]:
pred_start_year = 2015
pred_end_year = 2025
pred_start_date = f'{pred_start_year}-01-01'
pred_end_date = f'{pred_end_year}-01-01'

pred_years = pd.DataFrame({'Publication date (float)': np.linspace(pred_start_year, pred_end_year, 100)})
pred_years

Unnamed: 0,Publication date (float)
0,2015.00000
1,2015.10101
2,2015.20202
3,2015.30303
4,2015.40404
...,...
95,2024.59596
96,2024.69697
97,2024.79798
98,2024.89899


In [575]:

predicted_cost_df = get_predictions(reg_results, pred_years, ['Publication date (float)'])
predicted_cost_df['Publication date'] = predicted_cost_df['Publication date (float)'].apply(float_year_to_datetime)
predicted_cost_df

Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper,obs_ci_lower,obs_ci_upper,Publication date (float),Publication date
0,4.665243,0.354980,3.951507,5.378979,3.221099,6.109386,2015.00000,2015-01-01
1,4.684470,0.350116,3.980515,5.388424,3.245135,6.123804,2015.10101,2015-02-06
2,4.703696,0.345255,4.009514,5.397879,3.269116,6.138277,2015.20202,2015-03-15
3,4.722923,0.340400,4.038503,5.407343,3.293041,6.152805,2015.30303,2015-04-21
4,4.742150,0.335550,4.067482,5.416818,3.316910,6.167390,2015.40404,2015-05-28
...,...,...,...,...,...,...,...,...
95,6.491788,0.160105,6.169876,6.813701,5.195733,7.787843,2024.59596,2024-08-06
96,6.511015,0.164320,6.180628,6.841402,5.212829,7.809201,2024.69697,2024-09-12
97,6.530242,0.168579,6.191290,6.869193,5.229850,7.830634,2024.79798,2024-10-19
98,6.549468,0.172880,6.201870,6.897066,5.246796,7.852141,2024.89899,2024-11-25


# Export data

In [576]:
keep_cols = [
    'System',
    'Domain',
    'Task',
    'Model accessibility',
    'Reference',
    'Publication date',
    'Organization',
    'Parameters',
    'Training compute (FLOP)',
    'Training dataset size (datapoints)',
    'Epochs',
    'Training time (hours)',
    'Training hardware',
    'Country (from Organization)',
    'Base model',
    'Finetune compute (FLOP)',
    'Hardware quantity',
    'Hardware utilization',
    'Training cloud compute vendor',
    'Training data center',
    # 'Training time (chip hours)',
    'Cost',
    'Cost (inflation-adjusted)',
]
cost_df[keep_cols]

Unnamed: 0,System,Domain,Task,Model accessibility,Reference,Publication date,Organization,Parameters,Training compute (FLOP),Training dataset size (datapoints),...,Training hardware,Country (from Organization),Base model,Finetune compute (FLOP),Hardware quantity,Hardware utilization,Training cloud compute vendor,Training data center,Cost,Cost (inflation-adjusted)
5,Llama 3-70B,Language,"Chat,Language modelling/generation,Code genera...",Open access (restricted use),Introducing Meta Llama 3: The most capable ope...,2024-04-18,Meta AI,7.000000e+10,6.300000e+24,1.500000e+13,...,NVIDIA H100 SXM5,United States of America,,,16000.0,0.4,,,8.778959e+06,8.723917e+06
22,MM1-30B,"Multimodal,Language,Vision","Chat,Image captioning",Unreleased,"MM1: Methods, Analysis & Insights from Multimo...",2024-03-14,Apple,3.000000e+10,4.300000e+23,1.500000e+12,...,,United States of America,,,,,,,,
23,Inflection-2.5,Language,Chat,Hosted access (no API),Inflection-2.5: meet the world's best personal AI,2024-03-07,Inflection AI,,1.000100e+25,,...,NVIDIA H100 SXM5,United States of America,,,,,,,1.072551e+07,1.070554e+07
38,MegaScale (Production),Language,Language modelling/generation,Unreleased,MegaScale: Scaling Large Language Model Traini...,2024-02-23,"ByteDance,Peking University",5.300000e+11,1.200000e+25,,...,NVIDIA A100,"China,China",,,12288.0,,,,2.410008e+06,2.400127e+06
72,FunSearch,"Language,Search",Code generation,Open source,Mathematical discoveries from program search w...,2023-12-14,Google DeepMind,1.500000e+10,3.870000e+23,0.000000e+00,...,,Multinational,PaLM 2,0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1475,Linear Decision Functions,,,,"Linear Decision Functions, with Application to...",1962-06-01,Bell Laboratories,,1.559250e+06,5.000000e+02,...,,United States of America,,,,,,,,
1480,Perceptron (1960),Vision,Image classification,,Perceptron Simulation Experiments,1960-03-30,Cornell Aeronautical Laboratory,1.000000e+03,7.200000e+08,5.000000e+03,...,,United States of America,,,,,,,,
1482,Samuel Neural Checkers,Games,Checkers,,Some studies in machine learning using the gam...,1959-07-01,IBM,1.600000e+01,4.284000e+08,5.300000e+04,...,,United States of America,,,,,,,,
1483,Pandemonium (morse),Language,Morse translation,,Pandemonium: A Paradigm for Learning,1959-02-01,Massachusetts Institute of Technology (MIT),,6.000000e+08,,...,,United States of America,,,,,,,,


In [577]:
cost_df[keep_cols].to_csv(results_dir + 'price dataset.csv', index=False)

# Plots

In [578]:
fig = px.scatter(
    cost_df,
    x='Publication date',
    y='Cost (inflation-adjusted)',
    text='System',
    log_y=True,
)
fig.update_traces(textposition='top center')

# no legend
fig.update_layout(showlegend=False)

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD, log scale)')

# title
fig.update_layout(title_text=get_cost_plot_title(estimation_method, compute_threshold_method, compute_threshold))

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
    title_font=dict(
        size=16,
    )
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_scatter')

fig.show()

In [579]:
label_systems = ['GNMT', 'AlphaGo Master', 'AlphaGo Zero', 'AlphaZero', 'DALL-E', 'GPT-3 175B (davinci)', 'PaLM (540B)', 'Llama 2-70B', 'Falcon 180B', 'GPT-4', 'Gemini Ultra', 'Inflection-2']

tpu_mask = cost_df['Training hardware'].str.contains('TPU', na=False)
tpu_cost_df = cost_df.loc[tpu_mask]
gpu_cost_df = cost_df.loc[~tpu_mask]

fig = px.scatter(
    gpu_cost_df,
    x='Publication date',
    y='Cost (inflation-adjusted)',
    log_y=True,
)
fig.add_scatter(
    x=tpu_cost_df['Publication date'],
    y=tpu_cost_df['Cost (inflation-adjusted)'],
    mode='markers',
    marker_symbol='circle-open' if estimation_method != 'cloud' else 'circle',
    name='Using equivalent price of TPU' if estimation_method != 'cloud' else '',
    showlegend=estimation_method != 'cloud',
)
fig.add_scatter(
    x=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['Publication date'],
    y=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['Cost (inflation-adjusted)'],
    text=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['System'],
    mode='text',
    showlegend=False,
)
fig.add_scatter(
    x=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['Publication date'],
    y=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['Cost (inflation-adjusted)'],
    text=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['System'],
    mode='text',
    showlegend=False,
)

# Marker color
fig.update_traces(
    marker=dict(
        color='rgb(0,100,200)',
    ),
    selector=dict(mode='markers'),
)

# Shade in CI
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean_ci_lower'],
    mode='lines',
    line=dict(width=0),
    showlegend=False,
)
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean_ci_upper'],
    mode='lines',
    fill='tonexty',
    fillcolor='rgba(0,100,200,0.2)',
    line=dict(width=0),
    name='95% CI of mean',
)
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean'],
    mode='lines',
    line=dict(color='rgb(0,100,200)'),
    name=f'Regression mean (growth rate: {10**reg_results.params[1]:.1f}x per year)',
)

fig.update_traces(textposition='top center')

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])
if estimation_method == 'hardware-acquisition':
    fig.update_yaxes(range=[4, 10])
else:
    fig.update_yaxes(range=[1, 9])

# legend on bottom-right of the axes
fig.update_layout(
    legend=dict(
        x=0.45,
        y=0.05,
    )
)

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD, log scale)')

# title
fig.update_layout(title_text=get_cost_plot_title(estimation_method, compute_threshold_method, compute_threshold))

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
    title_font=dict(
        size=16,
    )
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=[pred_start_date, pred_end_date])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_regression')

fig.show()

# Cost components

In [580]:
cost_component_names = [
    'AI accelerator chip cost',
    'Other server components cost',
    'Cluster-level interconnect cost',
    'Energy cost',
]

In [581]:
for key in cost_component_names:
    component_cost_df[f"{key} (%)"] = component_cost_df[key] / component_cost_df['Cost'] * 100
component_cost_df['AI accelerator chip cost (%)']

5       49.745134
22            NaN
23      49.454717
38      44.118938
72            NaN
          ...    
1475          NaN
1480          NaN
1482          NaN
1483          NaN
1484          NaN
Name: AI accelerator chip cost (%), Length: 73, dtype: float64

In [582]:
cost_component_pc_names = [name + ' (%)' for name in cost_component_names]
filtered_component_cost_df = component_cost_df.dropna(subset=cost_component_pc_names).sort_values(by='Publication date')

In [583]:
# Stacked bar chart of cost components, using component_cost_df
fig = px.bar(
    filtered_component_cost_df,
    x='System',
    y=cost_component_pc_names,
    barmode='stack',
)

# axis labels
fig.update_xaxes(title_text='ML model')
fig.update_yaxes(title_text='% of hardware CapEx + OpEx')
fig.update_layout(
    legend=dict(
        title_text='Cost component',
        x=0.60,
        y=0.05,
    )
)
# limits 0 to 100
fig.update_yaxes(range=[0, 100])

fig.update_yaxes(tickvals=list(range(0, 101, 10)))

# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_component_percentage')

fig.show()

In [584]:
fig = px.bar(
    filtered_component_cost_df,
    x='System',
    y='Energy cost (%)',
    barmode='stack',
    # labels='Cost %',
    # text='Energy cost %',
)
# axis labels
fig.update_xaxes(title_text='System')
fig.update_yaxes(title_text='Energy cost (% of Hardware CapEx + OpEx)')
# fig.update_layout(
#     legend=dict(
#         title_text='Cost component',
#         x=0.75,
#         y=0.05,
#     )
# )
# limits 0 to 100
fig.update_yaxes(range=[0, 30])
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'energy_percentage')

fig.show()

In [585]:
fig = px.scatter(
    filtered_component_cost_df,
    x='Publication date',
    y='Energy cost',
    text='System',
)
# axis labels
fig.update_xaxes(title_text='System')
fig.update_yaxes(title_text='Energy cost')
# log y
fig.update_yaxes(type='log')
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'energy_cost')

fig.show()

In [586]:
from energy import energy_price

# Stacked bar chart of cost components, using component_cost_df
filtered_component_cost_df.loc[:, 'Energy (kWh)'] = [
    row['Energy cost'] / energy_price(row['Publication date'].year) 
    for _, row in filtered_component_cost_df.iterrows()
]
fig = px.scatter(
    filtered_component_cost_df,
    x='Publication date',
    y='Energy (kWh)',
    text='System',
)
# log y
fig.update_yaxes(type='log')
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'energy_kwh')

fig.show()

In [587]:
filtered_component_cost_df = filtered_component_cost_df.dropna(subset=['Training hardware'])
filtered_component_cost_df.loc[:, 'Power capacity for 3-month training run (kW)'] = [
    row['Energy cost'] \
    / energy_price(row['Publication date'].year) \
    / (HOURS_PER_YEAR/4) \
    / server_TDP_fraction(row['Training hardware']) \
    for _, row in filtered_component_cost_df.iterrows()
]

fig = px.scatter(
    filtered_component_cost_df,
    x='Publication date',
    y='Power capacity for 3-month training run (kW)',
    text='System',
)
# log y
fig.update_yaxes(type='log')
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'power_capacity_kw')

fig.show()

In [588]:
filtered_component_cost_df['Publication date (float)'] = datetime_to_float_year(
    pd.to_datetime(filtered_component_cost_df['Publication date'])
)

In [589]:
energy_reg_results = fit_ols_regression(
    filtered_component_cost_df,
    ['Publication date (float)'],
    'Power capacity for 3-month training run (kW)',
    logy=True
)
energy_reg_results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.354
Model:,OLS,Adj. R-squared:,0.341
Method:,Least Squares,F-statistic:,26.31
Date:,"Fri, 17 May 2024",Prob (F-statistic):,5.18e-06
Time:,15:15:48,Log-Likelihood:,-40.432
No. Observations:,50,AIC:,84.86
Df Residuals:,48,BIC:,88.69
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-455.3279,89.307,-5.098,0.000,-634.892,-275.764
x1,0.2266,0.044,5.129,0.000,0.138,0.315

0,1,2,3
Omnibus:,2.606,Durbin-Watson:,1.736
Prob(Omnibus):,0.272,Jarque-Bera (JB):,2.496
Skew:,0.505,Prob(JB):,0.287
Kurtosis:,2.579,Cond. No.,2300000.0
