# Setup

In [55]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [56]:
from contextlib import redirect_stdout
import numpy as np
import os
import pandas as pd
import plotly.express as px

from cost import *
from plotting import *
from prices import *
from inflation import *
from regression import *
from utils import *

In [57]:
compute_threshold_method = 'top_n'  # top_n, window_percentile
compute_threshold = 10  # e.g. 10 to select top 10; 75 to select top 25%
variant = '2025-03-17_exclude_finetunes_at_threshold_stage'  # whatever else distinguishes this run, e.g. 'excluding-AlphaGo'
exclude_models_containing = []  # ['GNMT', 'AlphaZero', 'AlphaGo Master', 'AlphaGo Zero']

# Run all three cost estimation methods
estimation_methods = ['hardware-capex-energy', 'hardware-acquisition', 'cloud']
estimation_method_lookup = {
    'hardware-capex-energy': estimate_hardware_capex_energy,
    'hardware-acquisition': estimate_hardware_acquisition_cost,
    'cloud': estimate_cloud_costs,
}

results_dir = f'results/all-methods-{compute_threshold_method}={compute_threshold}-{variant}/'
os.makedirs(results_dir, exist_ok=True)

# Load data

In [58]:
frontier_pcd_df, hardware_df, price_df = load_data_for_cost_estimation(
    compute_threshold_method=compute_threshold_method, compute_threshold=compute_threshold,
)

In [59]:
len(frontier_pcd_df), len(hardware_df), len(price_df)

(89, 5775, 590)

# Cost estimation

In [60]:
# Run all three cost estimation methods
cost_dfs = {}
component_cost_df = None

for estimation_method in estimation_methods:
    print(f"\n=== Running {estimation_method} estimation ===")
    cost_estimation_function = estimation_method_lookup[estimation_method]
    
    with open(f'{results_dir}/cost_estimation_{estimation_method}.out', 'w') as f:
        with redirect_stdout(f):
            cost_df = cost_estimation_function(frontier_pcd_df.copy(), hardware_df, price_df)
    
    cost_dfs[estimation_method] = cost_df
    
    # Create component cost breakdown only for hardware-capex-energy method
    if estimation_method == 'hardware-capex-energy':
        frontier_pcd_df_copy = frontier_pcd_df.copy()
        with open(f'{results_dir}/component_cost_estimation.out', 'w') as f:
            with redirect_stdout(f):
                component_cost_df = cost_estimation_function(frontier_pcd_df_copy, hardware_df, price_df, separate_components=True)

print(f"\nCost estimation completed for all methods")


=== Running hardware-capex-energy estimation ===

=== Running hardware-acquisition estimation ===

=== Running cloud estimation ===

Cost estimation completed for all methods


In [61]:
# Remove the old conditional component cost creation since it's now handled in the loop above
# Display results for each method
for method, df in cost_dfs.items():
    print(f"\n=== {method} results ===")
    print(f"Total models: {len(df)}")
    print(f"Models with cost estimates: {df['Cost'].notna().sum()}")
    print(f"Models with training time: {df.dropna(subset=['Cost'])['Training time (hours)'].notna().sum()}")
    print(f"Models with hardware utilization: {df.dropna(subset=['Cost'])['Hardware utilization'].notna().sum()}")
    print(f"Cost range: ${df['Cost'].min():.0f} - ${df['Cost'].max():.0f}")
    print()


=== hardware-capex-energy results ===
Total models: 89
Models with cost estimates: 61
Models with training time: 40
Models with hardware utilization: 22
Cost range: $185 - $301454771


=== hardware-acquisition results ===
Total models: 89
Models with cost estimates: 43
Models with training time: 37
Models with hardware utilization: 22
Cost range: $32363 - $5492577778


=== cloud results ===
Total models: 89
Models with cost estimates: 56
Models with training time: 36
Models with hardware utilization: 21
Cost range: $10175 - $1166400000



In [62]:
# Use hardware-capex-energy results as the base for further analysis
cost_df = cost_dfs['hardware-capex-energy']
cost_df

Unnamed: 0,Model,Domain,Task,Organization,Authors,Publication date,Reference,Link,Citations,Notability criteria,...,Organization categorization (from Organization),Training compute cost (2023 USD),Utilization notes,Numerical format,Training power draw (W),Training compute estimation method,Hugging Face developer id,Post-training compute (FLOP),Post-training compute notes,Cost
52,Llama 4 Behemoth (preview),"Multimodal,Language,Vision","Chat,Code generation,Visual question answering...",Meta AI,,2025-04-05,The Llama 4 herd: The beginning of a new era o...,https://ai.meta.com/blog/llama-4-multimodal-in...,,Training cost,...,Industry,,,,,Operation counting,,,,
87,GPT-4.5,"Language,Vision,Multimodal","Language modeling/generation,Question answerin...",OpenAI,"Foundational contributors\r\nAlex Paino, Ali K...",2025-02-27,Introducing GPT-4.5,https://openai.com/index/introducing-gpt-4-5/,,Training cost,...,Industry,,,,,Benchmarks,,,,
94,Claude 3.7 Sonnet,"Language,Vision,Multimodal","Language modeling/generation,Question answerin...",Anthropic,,2025-02-24,Claude 3.7 Sonnet,https://www.anthropic.com/news/claude-3-7-sonnet,,Training cost,...,Industry,,,,,,,,,
101,Grok-3,"Language,Vision,Multimodal","Chat,Language modeling/generation,Question ans...",xAI,,2025-02-17,Grok 3 Beta — The Age of Reasoning Agents,https://x.ai/blog/grok-3,,Training cost,...,Industry,,,,1.374358e+08,"Hardware,Comparison with other models",,,,3.014548e+08
206,Doubao-pro,Language,"Language modeling/generation,Question answerin...",ByteDance,,2024-10-28,Doubao General Model Pro (Doubao-pro),https://www.volcengine.com/docs/6360/1264663,,Training cost,...,Industry,,,,,Operation counting,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1632,ResNet-200,Vision,Image classification,Microsoft Research Asia,"Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun",2016-09-17,Identity Mappings in Deep Residual Networks,https://link.springer.com/chapter/10.1007/978-...,9621.0,Highly cited,...,Industry,,,,,Hardware,,,,
1660,AlphaGo Lee,Games,Go,DeepMind,"David Silver, Aja Huang, Chris J. Maddison, Ar...",2016-01-27,Mastering the game of Go with deep neural netw...,https://www.nature.com/articles/nature16961,16057.0,Highly cited,...,Industry,,,,,Comparison with other models,,,,
1664,ResNet-152 (ImageNet),Vision,Image classification,Microsoft,"Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun",2015-12-10,Deep Residual Learning for Image Recognition,https://arxiv.org/abs/1512.03385,175697.0,Highly cited,...,Industry,,,FP32,,"Operation counting,Third-party estimation",,,,
1665,DeepSpeech2 (English),Speech,Speech recognition,Baidu Research - Silicon Valley AI Lab,"Dario Amodei, Rishita Anubhai, Eric Battenberg...",2015-12-08,Deep Speech 2: End-to-End Speech Recognition i...,https://arxiv.org/abs/1512.02595,2853.0,Highly cited,...,Industry,$206.31,"""Overall the system sustains approximately 50 ...",FP32,8.463468e+03,"Operation counting,Third-party estimation",,,,1.854566e+02


In [63]:
cost_df['Cost'].notna().sum()

61

In [64]:
cost_df.dropna(subset=['Cost'])['Training time (hours)'].notna().sum()

40

In [65]:
cost_df.dropna(subset=['Cost'])['Hardware utilization'].notna().sum()

22

Exclusion

In [66]:
cost_df[['Model', 'Publication date']].tail(15)

Unnamed: 0,Model,Publication date
1566,AlphaGo Master,2017-10-19
1567,AlphaGo Zero,2017-10-18
1572,Libratus,2017-08-19
1577,OpenAI TI7 DOTA 1v1,2017-08-11
1584,JFT,2017-07-10
1604,MoE-Multi,2017-01-23
1615,PolyNet,2016-11-17
1617,NASv3 (CIFAR-10),2016-11-05
1623,Xception,2016-10-07
1624,GNMT,2016-09-26


In [67]:
# Apply exclusions to all cost dataframes
for method in estimation_methods:
    for kw in exclude_models_containing:
        cost_dfs[method] = cost_dfs[method][cost_dfs[method]['Model'].str.contains(kw) == False]

# Show the models after exclusion (using hardware-capex-energy as reference)
cost_dfs['hardware-capex-energy'][['Model', 'Publication date']].tail(15)

Unnamed: 0,Model,Publication date
1566,AlphaGo Master,2017-10-19
1567,AlphaGo Zero,2017-10-18
1572,Libratus,2017-08-19
1577,OpenAI TI7 DOTA 1v1,2017-08-11
1584,JFT,2017-07-10
1604,MoE-Multi,2017-01-23
1615,PolyNet,2016-11-17
1617,NASv3 (CIFAR-10),2016-11-05
1623,Xception,2016-10-07
1624,GNMT,2016-09-26


Use the below to check data availability for specific systems

In [68]:
# system = 'WizardLM-7B'
# row = cost_df.loc[cost_df['Model'] == system]
# print('Cost:', row['Cost'].values[0])
# print('Training hardware:', row['Training hardware'].values[0])
# print('Training time (hours):', row['Training time (hours)'].values[0])
# print('Hardware quantity:', row['Hardware quantity'].values[0])
# print('Hardware utilization:', row['Hardware utilization'].values[0])

# Apply inflation adjustment

In [69]:
# Show costs before inflation adjustment (using hardware-capex-energy)
cost_dfs['hardware-capex-energy']['Cost'].dropna()

101     3.014548e+08
366     3.060541e+07
403     5.126034e+07
448     2.057972e+07
612     1.179460e+07
            ...     
1604    3.538189e+03
1615    5.635997e+02
1623    1.155451e+04
1624    1.774592e+05
1665    1.854566e+02
Name: Cost, Length: 61, dtype: float64

In [70]:
# Apply inflation adjustment to all cost dataframes
for method in estimation_methods:
    cost_dfs[method] = adjust_column_for_inflation(cost_dfs[method], 'Cost', 'data/PCU518210518210.csv', '2024-12-01')

# Update the main cost_df reference
cost_df = cost_dfs['hardware-capex-energy']

In [71]:
cost_df['Cost (inflation-adjusted)'].dropna()

101     3.008724e+08
366     3.049986e+07
403     5.104052e+07
448     2.052898e+07
612     1.180459e+07
            ...     
1604    3.874123e+03
1615    6.171107e+02
1623    1.265155e+04
1624    1.943081e+05
1665    2.068604e+02
Name: Cost (inflation-adjusted), Length: 61, dtype: float64

In [72]:
# Equal number of non-null values
assert cost_df['Cost (inflation-adjusted)'].notna().sum() == cost_df['Cost'].notna().sum()

# Regression

In [73]:
cost_df['Publication date (float)'] = datetime_to_float_year(pd.to_datetime(cost_df['Publication date']))

In [74]:
reg_results = fit_ols_regression(cost_df, ['Publication date (float)'], 'Cost (inflation-adjusted)', logy=True)
reg_results.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.707
Model:,OLS,Adj. R-squared:,0.702
Method:,Least Squares,F-statistic:,142.1
Date:,"Fri, 04 Jul 2025",Prob (F-statistic):,2.36e-17
Time:,12:51:19,Log-Likelihood:,-62.231
No. Observations:,61,AIC:,128.5
Df Residuals:,59,BIC:,132.7
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-887.9037,74.944,-11.848,0.000,-1037.867,-737.940
x1,0.4421,0.037,11.923,0.000,0.368,0.516

0,1,2,3
Omnibus:,4.397,Durbin-Watson:,1.406
Prob(Omnibus):,0.111,Jarque-Bera (JB):,4.037
Skew:,0.63,Prob(JB):,0.133
Kurtosis:,2.962,Cond. No.,1730000.0


In [75]:
with open(f'{results_dir}/regression_results.out', 'w') as f:
    with redirect_stdout(f):
        print_growth_rates(reg_results, round_digits=None)
print_growth_rates(reg_results, ci=90, round_digits=5)

N=61.0
R^2=0.71
0.44209 OOMs/year (90% CI: 0.38012, 0.50405)
2.76748x/year (90% CI: 2.3995x, 3.1919x)
doubling time of 8.17119 months (90% CI: 7.16669, 9.50317)


In [76]:
pred_start_year = 2015
pred_end_year = 2025
pred_start_date = f'{pred_start_year}-01-01'
pred_end_date = f'{pred_end_year}-01-01'

pred_years = pd.DataFrame({'Publication date (float)': np.linspace(pred_start_year, pred_end_year, 100)})
pred_years

Unnamed: 0,Publication date (float)
0,2015.00000
1,2015.10101
2,2015.20202
3,2015.30303
4,2015.40404
...,...
95,2024.59596
96,2024.69697
97,2024.79798
98,2024.89899


In [77]:

predicted_cost_df = get_predictions(reg_results, pred_years, ['Publication date (float)'])
predicted_cost_df['Publication date'] = predicted_cost_df['Publication date (float)'].apply(float_year_to_datetime)
predicted_cost_df

Unnamed: 0,mean,mean_se,mean_ci_lower,mean_ci_upper,obs_ci_lower,obs_ci_upper,Publication date (float),Publication date
0,2.897729,0.244884,2.488505,3.306952,1.686120,4.109338,2015.00000,2015-01-01
1,2.942384,0.241388,2.539001,3.345767,1.732736,4.152032,2015.10101,2015-02-06
2,2.987039,0.237901,2.589484,3.384594,1.779322,4.194756,2015.20202,2015-03-15
3,3.031694,0.234421,2.639954,3.423434,1.825878,4.237510,2015.30303,2015-04-21
4,3.076349,0.230950,2.690410,3.462288,1.872405,4.280293,2015.40404,2015-05-28
...,...,...,...,...,...,...,...,...
95,7.139960,0.154196,6.882284,7.397636,5.970802,8.309117,2024.59596,2024-08-06
96,7.184615,0.157296,6.921758,7.447472,6.014305,8.354925,2024.69697,2024-09-12
97,7.229270,0.160424,6.961186,7.497354,6.057775,8.400765,2024.79798,2024-10-19
98,7.273925,0.163578,7.000571,7.547279,6.101212,8.446637,2024.89899,2024-11-25


In [78]:
# Save prediction dataset - this uses hardware-capex-energy method for regression
predicted_cost_df.to_csv(results_dir + 'predicted_cost_dataset_hardware_capex_energy.csv', index=False)

# Export data

In [79]:
# Create cost_dataset_3_estimates.csv with Model + 3 cost columns
cost_comparison_df = pd.DataFrame()
cost_comparison_df['Model'] = cost_dfs['hardware-capex-energy']['Model']

# Add inflation-adjusted costs from each method
for method in estimation_methods:
    method_df = cost_dfs[method]
    # Apply inflation adjustment to each method's costs
    method_df = adjust_column_for_inflation(method_df, 'Cost', 'data/PCU518210518210.csv', '2024-12-01')
    cost_comparison_df[f'{method.replace("-", "_")}_cost'] = method_df['Cost (inflation-adjusted)']

# Display the comparison
print("Cost comparison across methods:")
print(cost_comparison_df.dropna().head(10))

# Save the 3-method comparison dataset
cost_comparison_df.to_csv(results_dir + 'cost_dataset_3_estimates.csv', index=False)
print(f"\nSaved cost_dataset_3_estimates.csv with {len(cost_comparison_df)} models")

# Also keep the original detailed export for the hardware-capex-energy method
keep_cols = [
    'Model',
    'Domain',
    'Task',
    'Model accessibility',
    'Reference',
    'Publication date',
    'Organization',
    'Parameters',
    'Training compute (FLOP)',
    'Training dataset size (datapoints)',
    'Epochs',
    'Training time (hours)',
    'Training hardware',
    'Base model',
    'Finetune compute (FLOP)',
    'Hardware quantity',
    'Hardware utilization',
    'Training cloud compute vendor',
    'Training data center',
    'Cost',
    'Cost (inflation-adjusted)',
]
cost_df[keep_cols]

Cost comparison across methods:
                      Model  hardware_capex_energy_cost  \
101                  Grok-3                3.008724e+08   
403          Llama 3.1-405B                5.104052e+07   
448         Nemotron-4 340B                2.052898e+07   
635  MegaScale (Production)                2.614019e+06   
725        Gemini 1.0 Ultra                2.830960e+07   
748            Inflection-2                1.299155e+07   
831            Amazon Titan                7.656705e+06   
856             Falcon-180B                1.036871e+07   
908             Llama 2-70B                1.102561e+06   
927        xTrimoPGLM -100B                1.823415e+06   

     hardware_acquisition_cost    cloud_cost  
101               5.481967e+09  1.164147e+09  
403               8.960449e+08  1.698280e+08  
448               3.366319e+08  6.552968e+07  
635               3.768262e+08  8.967297e+06  
725               5.979132e+08  1.919146e+08  
748               2.760095e+08  3.66

Unnamed: 0,Model,Domain,Task,Model accessibility,Reference,Publication date,Organization,Parameters,Training compute (FLOP),Training dataset size (datapoints),...,Training time (hours),Training hardware,Base model,Finetune compute (FLOP),Hardware quantity,Hardware utilization,Training cloud compute vendor,Training data center,Cost,Cost (inflation-adjusted)
52,Llama 4 Behemoth (preview),"Multimodal,Language,Vision","Chat,Code generation,Visual question answering...",Unreleased,The Llama 4 herd: The beginning of a new era o...,2025-04-05,Meta AI,2.000000e+12,5.184000e+25,3.000000e+13,...,,,,,32000.0,,,,,
87,GPT-4.5,"Language,Vision,Multimodal","Language modeling/generation,Question answerin...",API access,Introducing GPT-4.5,2025-02-27,OpenAI,,2.100000e+26,,...,,,,,,,Azure AI,,,
94,Claude 3.7 Sonnet,"Language,Vision,Multimodal","Language modeling/generation,Question answerin...",API access,Claude 3.7 Sonnet,2025-02-24,Anthropic,,3.350000e+25,,...,,,,,,,,,,
101,Grok-3,"Language,Vision,Multimodal","Chat,Language modeling/generation,Question ans...",Hosted access (no API),Grok 3 Beta — The Age of Reasoning Agents,2025-02-17,xAI,,4.640000e+26,,...,2400.0,NVIDIA H100 SXM5 80GB,,,100000.0,,,xAI Memphis Colossus,3.014548e+08,3.008724e+08
206,Doubao-pro,Language,"Language modeling/generation,Question answerin...",API access,Doubao General Model Pro (Doubao-pro),2024-10-28,ByteDance,5.000000e+11,2.505000e+25,8.350000e+12,...,,,,,,,,"There is no paper to reference, also no inform...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1632,ResNet-200,Vision,Image classification,Unreleased,Identity Mappings in Deep Residual Networks,2016-09-17,Microsoft Research Asia,,2.974164e+19,1.281167e+06,...,500.0,,,,,,,,,
1660,AlphaGo Lee,Games,Go,Unreleased,Mastering the game of Go with deep neural netw...,2016-01-27,DeepMind,,1.900000e+21,2.940000e+07,...,696.0,,,,,,,,,
1664,ResNet-152 (ImageNet),Vision,Image classification,,Deep Residual Learning for Image Recognition,2015-12-10,Microsoft,6.020000e+07,1.041408e+19,1.280000e+06,...,,,,,,,,,,
1665,DeepSpeech2 (English),Speech,Speech recognition,,Deep Speech 2: End-to-End Speech Recognition i...,2015-12-08,Baidu Research - Silicon Valley AI Lab,3.800000e+07,2.600000e+19,1.633392e+08,...,120.0,NVIDIA GeForce GTX TITAN X,,,16.0,0.4484,,,1.854566e+02,2.068604e+02


In [80]:
# Keep the detailed export for the hardware-capex-energy method
cost_df[keep_cols].to_csv(results_dir + 'cost_dataset_detailed.csv', index=False)

# Plots

In [81]:
cost_component_names = [
    'AI accelerator chip cost',
    'Other server components cost',
    'Cluster-level interconnect cost',
    'Energy cost',
]

In [82]:
for key in cost_component_names:
    component_cost_df[f"{key} (%)"] = component_cost_df[key] / component_cost_df['Cost'] * 100
component_cost_df['AI accelerator chip cost (%)']

52            NaN
87            NaN
94            NaN
101     45.507920
206           NaN
          ...    
1632          NaN
1660          NaN
1664          NaN
1665    34.484219
1667          NaN
Name: AI accelerator chip cost (%), Length: 89, dtype: float64

In [83]:
cost_component_pc_names = [name + ' (%)' for name in cost_component_names]
filtered_component_cost_df = component_cost_df.dropna(subset=cost_component_pc_names).sort_values(by='Publication date')

In [84]:
filtered_component_cost_df.head()

Unnamed: 0,Model,Domain,Task,Organization,Authors,Publication date,Reference,Link,Citations,Notability criteria,...,Post-training compute notes,AI accelerator chip cost,Other server components cost,Cluster-level interconnect cost,Energy cost,Cost,AI accelerator chip cost (%),Other server components cost (%),Cluster-level interconnect cost (%),Energy cost (%)
1665,DeepSpeech2 (English),Speech,Speech recognition,Baidu Research - Silicon Valley AI Lab,"Dario Amodei, Rishita Anubhai, Eric Battenberg...",2015-12-08,Deep Speech 2: End-to-End Speech Recognition i...,https://arxiv.org/abs/1512.02595,2853.0,Highly cited,...,,63.953273,40.930095,24.602271,55.971,185.456639,34.484219,22.0699,13.265781,30.180101
1624,GNMT,Language,Translation,Google,"Yonghui Wu, Mike Schuster, Zhifeng Chen, Quoc ...",2016-09-26,Google's Neural Machine Translation System: Br...,https://arxiv.org/abs/1609.08144,6483.0,Highly cited,...,,77894.044829,49852.188691,29965.165887,19747.833534,177459.232941,43.89405,28.092192,16.885662,11.128096
1623,Xception,Vision,Image classification,Google,François Chollet,2016-10-07,Xception: Deep Learning with Depthwise Separab...,https://arxiv.org/abs/1610.02357,13038.0,Highly cited,...,,5064.230483,3241.107509,1948.165702,1301.00256,11554.506253,43.829051,28.050593,16.860657,11.259698
1615,PolyNet,Vision,Image classification,Chinese University of Hong Kong (CUHK),"X Zhang, Z Li, C Change Loy",2016-11-17,PolyNet: A Pursuit of Structural Diversity in ...,https://arxiv.org/abs/1611.05725,282.0,SOTA improvement,...,,178.564122,114.281038,68.692074,202.062472,563.599706,31.682792,20.276987,12.188096,35.852125
1604,MoE-Multi,Language,"Language modeling,Translation","Jagiellonian University,Google Brain","N Shazeer, A Mirhoseini, K Maziarz, A Davis",2017-01-23,Outrageously Large Neural Networks: The Sparse...,https://arxiv.org/abs/1701.06538,2037.0,"Highly cited,SOTA improvement",...,,1519.646471,972.573741,584.594865,461.374341,3538.189418,42.949834,27.487894,16.52243,13.039843


In [85]:
filtered_component_cost_df.to_csv(results_dir + 'cost_components.csv', index=False)

In [86]:
# Average percentage for each component
filtered_component_cost_df[cost_component_pc_names].mean()

AI accelerator chip cost (%)           45.618313
Other server components cost (%)       29.544045
Cluster-level interconnect cost (%)    17.630677
Energy cost (%)                         7.206965
dtype: float64

In [87]:
filtered_component_cost_df.columns

Index(['Model', 'Domain', 'Task', 'Organization', 'Authors',
       'Publication date', 'Reference', 'Link', 'Citations',
       'Notability criteria', 'Notability criteria notes', 'Parameters',
       'Parameters notes', 'Training compute (FLOP)', 'Training compute notes',
       'Training dataset', 'Training dataset notes',
       'Training dataset size (datapoints)', 'Dataset size notes',
       'Training time (hours)', 'Training time notes', 'Training hardware',
       'Approach', 'Confidence', 'Abstract', 'Epochs', 'Benchmark data',
       'Model accessibility', 'Country (of organization)', 'Base model',
       'Finetune compute (FLOP)', 'Finetune compute notes',
       'Hardware quantity', 'Hardware utilization', 'Last modified',
       'Training cloud compute vendor', 'Training data center',
       'Archived links', 'Batch size', 'Batch size notes',
       'Organization categorization', 'Foundation model',
       'Training compute lower bound', 'Training compute upper bound',
  

In [88]:
filtered_component_cost_df = filtered_component_cost_df.dropna(subset=['Training hardware'])
power_col = 'Power capacity for final training run (kW)'
filtered_component_cost_df.loc[:, power_col] = [
    cluster_power_capacity(row['Training hardware'], row['Hardware quantity'], hardware_df, row['Organization'])
    for _, row in filtered_component_cost_df.iterrows()
]

In [89]:
filtered_component_cost_df['Publication date (float)'] = datetime_to_float_year(
    pd.to_datetime(filtered_component_cost_df['Publication date'])
)