# Setup

In [70]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [71]:
from contextlib import redirect_stdout
import numpy as np
import os
import pandas as pd
import plotly.express as px

from cost import *
from plotting import *
from prices import *
from imputation import *
from inflation import *
from regression import *
from utils import *

In [72]:
estimation_method = 'cloud'
compute_threshold_method = 'top_n'  # top_n, window_percentile
compute_threshold = 10  # e.g. 10 to select top 10; 75 to select top 25%
variant = 'original'
exclude_models_containing = []

estimation_method_lookup = {
    'cloud': estimate_cloud_costs,
    'amortized': estimate_amortized_hardware_costs,
    'hardware-acquisition': estimate_hardware_acquisition_cost,
    'hardware-capex-opex': estimate_hardware_capex_opex,
}
cost_estimation_function = estimation_method_lookup[estimation_method]

results_dir = f'results/{estimation_method}-{compute_threshold_method}={compute_threshold}-{variant}/'
os.makedirs(results_dir, exist_ok=True)

# Load data

In [73]:
frontier_pcd_df, hardware_df, price_df = load_data_for_cost_estimation(
    compute_threshold_method=compute_threshold_method, compute_threshold=compute_threshold,
)

In [74]:
len(frontier_pcd_df), len(hardware_df), len(price_df)

(67, 5510, 142)

In [75]:
# load the data on all models
all_models = pd.read_csv("https://epochai.org/data/epochdb/all_systems.csv", dtype={'Training compute (FLOP)': 'float64'})
all_models['Decimal year'] = pd.to_datetime(all_models['Publication date']).dt.year + (pd.to_datetime(all_models['Publication date']).dt.month - 1) / 12 + (pd.to_datetime(all_models['Publication date']).dt.day - 1) / 365
all_models.sort_values(by='Decimal year', inplace=True)
# all_models.dropna(subset='Training compute (FLOP)', inplace=True)
all_models = all_models[all_models['Confidence'] != 'Wrong']
all_models = all_models[all_models['System'] != 'λ-WASP']
all_models = all_models[all_models['System'] != 'DOC + Finetune∗ + Partial Shuffle (WT2)']
all_models = all_models[all_models['System'] != 'DOC + Finetune∗ + Partial Shuffle (PTB)']
all_models = all_models[all_models['System'] != 'PanGu-α']
all_models = all_models[all_models['System'] != 'PIXART-α']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce GTX 580']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce GTX 780']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce GTX 980']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce GTX 1080']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce GTX 1080 Ti']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce RTX 2080 Ti']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce RTX 3090']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA Geforce GTX1080 Ti']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA Geforce GTX 1080 Ti']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA TITAN Xp']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce GTX TITAN']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GeForce GTX TITAN X']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA GTX Titan Black']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA Titan V']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA M40']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA Quadro P600']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA Quadro RTX 5000']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA Quadro RTX 4000']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA Quadro RTX 8000']
all_models = all_models[all_models['Training hardware'] != 'Huawei Ascend 910']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA RTX A6000']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA RTX A5000']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA A10 PCIe']
all_models = all_models[all_models['Training hardware'] != 'NVIDIA A800']
all_models = all_models[all_models['Training hardware'] != 'AMD Instinct MI250X']
all_models = all_models[all_models['Training hardware'] != 'Cerebras CS-2']
all_models = all_models[all_models['Training hardware'] != 'Google TPU v5e']
all_models = all_models[all_models['Training hardware'] != 'Google TPU v5p']



# Replace frontier model df with all models
frontier_pcd_df = all_models.copy()

# Cost estimation

In [76]:
cost_df = cost_estimation_function(frontier_pcd_df, hardware_df, price_df)

==== System: Theseus ====
No training time found, assuming 33.0625

Could not find hardware model for Theseus


==== System: SNARC ====
No training time found, assuming 33.0625

Could not find hardware model for SNARC


==== System: Genetic algorithm ====
No training time found, assuming 33.0625

Could not find hardware model for Genetic algorithm


==== System: Self Organizing System ====
No training time found, assuming 33.0625

Could not find hardware model for Self Organizing System


==== System: Sequence-based pattern recognition ====
No training time found, assuming 33.0625

Could not find hardware model for Sequence-based pattern recognition


==== System: Conditional probability machines ====
No training time found, assuming 33.0625

Could not find hardware model for Conditional probability machines


==== System: Perceptron Mark I ====
No training time found, assuming 33.0625

Could not find hardware model for Perceptron Mark I


==== System: Pandemonium (morse) ====
No train

In [77]:
with open(f'{results_dir}/cost_estimation.out', 'w') as f:
    with redirect_stdout(f):
        cost_df = cost_estimation_function(frontier_pcd_df, hardware_df, price_df)

In [78]:
if estimation_method == 'hardware-capex-opex':
    frontier_pcd_df_copy = frontier_pcd_df.copy()
    with open(f'{results_dir}/component_cost_estimation.out', 'w') as f:
        with redirect_stdout(f):
            component_cost_df = cost_estimation_function(frontier_pcd_df_copy, hardware_df, price_df, separate_components=True)

In [79]:
cost_df

Unnamed: 0,System,Domain,Organization,Publication date,Reference,Link,Parameters,Parameters notes,Training compute (FLOP),Training compute notes,...,Base model,Finetune compute notes,Training cloud compute vendor,Batch size notes,Finetune compute (FLOP),Training compute upper bound,Archived links,Benchmark data,Decimal year,Cost
1292,Theseus,Robotics,Bell Laboratories,1950-07-02,Mighty Mouse,https://www.technologyreview.com/2018/12/19/13...,4.000000e+01,The learned part is the maze configuration. Th...,4.000000e+01,"The ""training"" consists on the mouse running a...",...,,,,,,,,,1950.502740,
1291,SNARC,Robotics,Harvard University,1952-01-08,A Neural-Analogue Calculator Based upon a Prob...,https://en.wikipedia.org/wiki/Stochastic_neura...,4.000000e+01,The link below seems to suggest the SNARC had ...,,,...,,,,,,,,,1952.019178,
1290,Genetic algorithm,Other,Institute for Advanced Study,1954-07-02,Numerical testing of evolution theories,https://link.springer.com/article/10.1007/BF01...,,,,,...,,,,,,,,,1954.502740,
1289,Self Organizing System,Other,Massachusetts Institute of Technology (MIT),1955-03-01,Generalization of pattern recognition in a sel...,https://dl.acm.org/doi/10.1145/1455292.1455309,2.250000e+02,Figure 4 contains the learnt weight matrix,,,...,,,,,,,,,1955.166667,
1288,Sequence-based pattern recognition,Vision,Massachusetts Institute of Technology (MIT),1955-03-01,Pattern recognition and modern computers,https://dl.acm.org/doi/10.1145/1455292.1455310,,,,,...,,,,,,,,,1955.166667,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,Llama 3-70B,Language,Meta AI,2024-04-18,Introducing Meta Llama 3: The most capable ope...,https://ai.meta.com/blog/meta-llama-3/\n\nhttp...,7.000000e+10,,6.300000e+24,direct calculation\n15000000000000 tokens*7000...,...,,,,,,,,,2024.296575,6.792973e+06
3,SenseNova 5.0,Language,SenseTime,2024-04-23,,https://zhidx.com/p/421866.html,,,,,...,,,,,,,,,2024.310274,
2,phi-3-medium 14B,Language,Microsoft,2024-04-23,Phi-3 Technical Report: A Highly Capable Langu...,https://arxiv.org/abs/2404.14219,1.400000e+10,14B,4.032000e+23,counting operations: 6×4.8×10^12×14×10^9 ≈ 4.0...,...,,,,,,,,,2024.310274,
1,Qwen 1.5 110B,Language,Alibaba,2024-04-25,Qwen1.5-110B: The First 100B+ Model of the Qwe...,https://qwenlm.github.io/blog/qwen1.5-110b/?re...,1.100000e+11,,,,...,,,,,,,,,2024.315753,


In [80]:
cost_df['Cost'].notna().sum()

220

Exclusion

In [81]:
for kw in exclude_models_containing:
    cost_df = cost_df[cost_df['System'].str.contains(kw) == False]
list(cost_df['System'])

['Theseus',
 'SNARC',
 'Genetic algorithm',
 'Self Organizing System',
 'Sequence-based pattern recognition',
 'Conditional probability machines',
 'Perceptron Mark I',
 'Pandemonium (morse)',
 'Samuel Neural Checkers',
 'Pattern recognition and reading by machine',
 'Perceptron (1960)',
 'ADALINE',
 'LMS',
 'Heuristic problem solving for AI',
 'PAPA',
 'Linear Decision Functions',
 'MADALINE I',
 'Print Recognition Logic',
 'STeLLA',
 'MENACE',
 'Heuristic Reinforcement Learning',
 'Samuel Neural Checkers II',
 'BOXES',
 'GLEE',
 'Graph-based structural reasoning',
 'Punish/Reward',
 'Naive Bayes',
 'Cognitron',
 'Continuous speech recognition by statistical methods',
 'TD(0)',
 'Internal functionality of visual invariants',
 'Neocognitron',
 'Kohonen network',
 'Hopfield network',
 'ASE+ACE',
 'Learnability theory of language development',
 'Error Propagation',
 'Learning past tenses',
 'PDP model for serial order',
 'Back-propagation',
 'Optimized Multi-Scale Edge Detection',
 'NetT

Use the below to check data availability for specific systems

In [82]:
# system = 'WizardLM-7B'
# row = cost_df.loc[cost_df['System'] == system]
# print('Cost:', row['Cost'].values[0])
# print('Training hardware:', row['Training hardware'].values[0])
# print('Training time (hours):', row['Training time (hours)'].values[0])
# print('Hardware quantity:', row['Hardware quantity'].values[0])
# print('Hardware utilization:', row['Hardware utilization'].values[0])

# Apply inflation adjustment

In [83]:
cost_df['Cost'].dropna()

971    1.783296e+05
970    1.857600e+04
928    6.182400e+02
923    3.096000e+04
885    3.001965e+05
           ...     
24     8.590207e+04
20     1.150248e+07
18     2.990346e+06
9      3.346887e+05
4      6.792973e+06
Name: Cost, Length: 220, dtype: float64

In [84]:
cost_df = adjust_column_for_inflation(cost_df, 'Cost', 'data/PCU518210518210.csv', '2023-12-01')

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
cost_df['Cost (inflation-adjusted)'].dropna()

In [None]:
assert cost_df['Cost (inflation-adjusted)'].notna().sum() == cost_df['Cost'].notna().sum()

# Regression

In [None]:
cost_df['Publication date (float)'] = datetime_to_float_year(pd.to_datetime(cost_df['Publication date']))

In [None]:
reg_results = fit_ols_regression(cost_df, ['Publication date (float)'], 'Cost (inflation-adjusted)', logy=True)
reg_results.summary()

In [None]:
with open(f'{results_dir}/regression_results.out', 'w') as f:
    with redirect_stdout(f):
        print_growth_rates(reg_results)
print_growth_rates(reg_results)

In [None]:
pred_start_year = 2015
pred_end_year = 2025
pred_start_date = f'{pred_start_year}-01-01'
pred_end_date = f'{pred_end_year}-01-01'

pred_years = pd.DataFrame({'Publication date (float)': np.linspace(pred_start_year, pred_end_year, 100)})
pred_years

In [None]:

predicted_cost_df = get_predictions(reg_results, pred_years, ['Publication date (float)'])
predicted_cost_df['Publication date'] = predicted_cost_df['Publication date (float)'].apply(float_year_to_datetime)
predicted_cost_df

# Export data

In [85]:
keep_cols = [
    'System',
    'Domain',
    # 'Task',
    'Model accessibility',
    'Reference',
    'Publication date',
    'Organization',
    'Parameters',
    'Training compute (FLOP)',
    'Training dataset size (datapoints)',
    'Epochs',
    'Training time (hours)',
    'Training hardware',
    'Country (from Organization)',
    'Base model',
    'Finetune compute (FLOP)',
    'Hardware quantity',
    'Hardware utilization',
    'Training cloud compute vendor',
    'Training data center',
    # 'Training time (chip hours)',
    'Cost',
    # 'Cost (inflation-adjusted)',
]
cost_df[keep_cols]

Unnamed: 0,System,Domain,Model accessibility,Reference,Publication date,Organization,Parameters,Training compute (FLOP),Training dataset size (datapoints),Epochs,Training time (hours),Training hardware,Country (from Organization),Base model,Finetune compute (FLOP),Hardware quantity,Hardware utilization,Training cloud compute vendor,Training data center,Cost
1292,Theseus,Robotics,,Mighty Mouse,1950-07-02,Bell Laboratories,4.000000e+01,4.000000e+01,4.000000e+01,,,,United States of America,,,,,,,
1291,SNARC,Robotics,,A Neural-Analogue Calculator Based upon a Prob...,1952-01-08,Harvard University,4.000000e+01,,,,,,United States of America,,,,,,,
1290,Genetic algorithm,Other,,Numerical testing of evolution theories,1954-07-02,Institute for Advanced Study,,,,,,,United States of America,,,,,,,
1289,Self Organizing System,Other,,Generalization of pattern recognition in a sel...,1955-03-01,Massachusetts Institute of Technology (MIT),2.250000e+02,,2.560000e+02,,,,United States of America,,,,,,,
1288,Sequence-based pattern recognition,Vision,,Pattern recognition and modern computers,1955-03-01,Massachusetts Institute of Technology (MIT),,,,,,,United States of America,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,Llama 3-70B,Language,Open access (restricted use),Introducing Meta Llama 3: The most capable ope...,2024-04-18,Meta AI,7.000000e+10,6.300000e+24,1.500000e+13,,,NVIDIA H100 SXM5,United States of America,,,16000.0,0.4,,,6.792973e+06
3,SenseNova 5.0,Language,Hosted access (no API),,2024-04-23,SenseTime,,,1.670000e+12,,,,Hong Kong,,,,,,,
2,phi-3-medium 14B,Language,Unreleased,Phi-3 Technical Report: A Highly Capable Langu...,2024-04-23,Microsoft,1.400000e+10,4.032000e+23,4.800000e+12,,,,United States of America,,,,,,,
1,Qwen 1.5 110B,Language,Open source,Qwen1.5-110B: The First 100B+ Model of the Qwe...,2024-04-25,Alibaba,1.100000e+11,,,,,,China,,,,,,,


In [86]:
cost_df[keep_cols].to_csv('results/price dataset cloud.csv', index=False)

# Plots

In [None]:
fig = px.scatter(
    cost_df,
    x='Publication date',
    y='Cost (inflation-adjusted)',
    text='System',
    log_y=True,
)
fig.update_traces(textposition='top center')

# no legend
fig.update_layout(showlegend=False)

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD, log scale)')

# title
fig.update_layout(title_text=get_cost_plot_title(estimation_method, compute_threshold_method, compute_threshold))

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
    title_font=dict(
        size=16,
    )
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_scatter')

fig.show()

In [None]:
label_systems = ['GNMT', 'AlphaGo Master', 'AlphaGo Zero', 'AlphaZero', 'DALL-E', 'GPT-3 175B (davinci)', 'PaLM (540B)', 'Llama 2-70B', 'Falcon 180B', 'GPT-4', 'Gemini Ultra', 'Inflection-2']

tpu_mask = cost_df['Training hardware'].str.contains('TPU', na=False)
tpu_cost_df = cost_df.loc[tpu_mask]
gpu_cost_df = cost_df.loc[~tpu_mask]

fig = px.scatter(
    gpu_cost_df,
    x='Publication date',
    y='Cost (inflation-adjusted)',
    log_y=True,
)
fig.add_scatter(
    x=tpu_cost_df['Publication date'],
    y=tpu_cost_df['Cost (inflation-adjusted)'],
    mode='markers',
    marker_symbol='circle-open' if estimation_method != 'cloud' else 'circle',
    name='Using equivalent price of TPU' if estimation_method != 'cloud' else '',
    showlegend=estimation_method != 'cloud',
)
fig.add_scatter(
    x=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['Publication date'],
    y=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['Cost (inflation-adjusted)'],
    text=gpu_cost_df.loc[gpu_cost_df['System'].isin(label_systems)]['System'],
    mode='text',
    showlegend=False,
)
fig.add_scatter(
    x=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['Publication date'],
    y=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['Cost (inflation-adjusted)'],
    text=tpu_cost_df.loc[tpu_cost_df['System'].isin(label_systems)]['System'],
    mode='text',
    showlegend=False,
)

# Marker color
fig.update_traces(
    marker=dict(
        color='rgb(0,100,200)',
    ),
    selector=dict(mode='markers'),
)

# Shade in CI
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean_ci_lower'],
    mode='lines',
    line=dict(width=0),
    showlegend=False,
)
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean_ci_upper'],
    mode='lines',
    fill='tonexty',
    fillcolor='rgba(0,100,200,0.2)',
    line=dict(width=0),
    name='95% CI of mean',
)
fig.add_scatter(
    x=predicted_cost_df['Publication date'],
    y=10**predicted_cost_df['mean'],
    mode='lines',
    line=dict(color='rgb(0,100,200)'),
    name=f'Regression mean (growth rate: {10**reg_results.params[1]:.1f}x per year)',
)

fig.update_traces(textposition='top center')

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])
if estimation_method == 'hardware-acquisition':
    fig.update_yaxes(range=[4, 10])
else:
    fig.update_yaxes(range=[1, 9])

# legend on bottom-right of the axes
fig.update_layout(
    legend=dict(
        x=0.45,
        y=0.05,
    )
)

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD, log scale)')

# title
fig.update_layout(title_text=get_cost_plot_title(estimation_method, compute_threshold_method, compute_threshold))

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
    title_font=dict(
        size=16,
    )
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=[pred_start_date, pred_end_date])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_regression')

fig.show()

# Cost components

In [None]:
cost_component_names = [
    'AI accelerator chip cost',
    'Other server components cost',
    'Cluster-level interconnect cost',
    'Energy cost',
]

In [None]:
for key in cost_component_names:
    component_cost_df[f"{key} (%)"] = component_cost_df[key] / component_cost_df['Cost'] * 100
component_cost_df['AI accelerator chip cost (%)']

In [None]:
cost_component_pc_names = [name + ' (%)' for name in cost_component_names]
filtered_component_cost_df = component_cost_df.dropna(subset=cost_component_pc_names).sort_values(by='Publication date')

In [None]:
# Stacked bar chart of cost components, using component_cost_df
fig = px.bar(
    filtered_component_cost_df,
    x='System',
    y=cost_component_pc_names,
    barmode='stack',
)

# axis labels
fig.update_xaxes(title_text='ML model')
fig.update_yaxes(title_text='% of hardware CapEx + OpEx')
fig.update_layout(
    legend=dict(
        title_text='Cost component',
        x=0.60,
        y=0.05,
    )
)
# limits 0 to 100
fig.update_yaxes(range=[0, 100])

fig.update_yaxes(tickvals=list(range(0, 101, 10)))

# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_component_percentage')

fig.show()

In [None]:
fig = px.bar(
    filtered_component_cost_df,
    x='System',
    y='Energy cost (%)',
    barmode='stack',
    # labels='Cost %',
    # text='Energy cost %',
)
# axis labels
fig.update_xaxes(title_text='System')
fig.update_yaxes(title_text='Energy cost (% of Hardware CapEx + OpEx)')
# fig.update_layout(
#     legend=dict(
#         title_text='Cost component',
#         x=0.75,
#         y=0.05,
#     )
# )
# limits 0 to 100
fig.update_yaxes(range=[0, 30])
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'energy_percentage')

fig.show()

In [None]:
fig = px.scatter(
    filtered_component_cost_df,
    x='Publication date',
    y='Energy cost',
    text='System',
)
# axis labels
fig.update_xaxes(title_text='System')
fig.update_yaxes(title_text='Energy cost')
# log y
fig.update_yaxes(type='log')
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'energy_cost')

fig.show()

In [None]:
from energy import energy_price

# Stacked bar chart of cost components, using component_cost_df
filtered_component_cost_df.loc[:, 'Energy (kWh)'] = [
    row['Energy cost'] / energy_price(row['Publication date'].year) 
    for _, row in filtered_component_cost_df.iterrows()
]
fig = px.scatter(
    filtered_component_cost_df,
    x='Publication date',
    y='Energy (kWh)',
    text='System',
)
# log y
fig.update_yaxes(type='log')
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'energy_kwh')

fig.show()

In [None]:
filtered_component_cost_df = filtered_component_cost_df.dropna(subset=['Training hardware'])
filtered_component_cost_df.loc[:, 'Power capacity for 3-month training run (kW)'] = [
    row['Energy cost'] \
    / energy_price(row['Publication date'].year) \
    / (HOURS_PER_YEAR/4) \
    / server_TDP_fraction(row['Training hardware']) \
    for _, row in filtered_component_cost_df.iterrows()
]

fig = px.scatter(
    filtered_component_cost_df,
    x='Publication date',
    y='Power capacity for 3-month training run (kW)',
    text='System',
)
# log y
fig.update_yaxes(type='log')
# size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'power_capacity_kw')

fig.show()

In [None]:
filtered_component_cost_df['Publication date (float)'] = datetime_to_float_year(
    pd.to_datetime(filtered_component_cost_df['Publication date'])
)

In [None]:
energy_reg_results = fit_ols_regression(
    filtered_component_cost_df,
    ['Publication date (float)'],
    'Power capacity for 3-month training run (kW)',
    logy=True
)
energy_reg_results.summary()