# Setup

In [83]:
from collections import defaultdict
from datetime import datetime
import json
import numpy as np
import os
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from cost import *
from data import *
from energy import *
from inflation import *
from parameters import *
from plotting import *
from utils import *

Sample size for distributions

In [84]:
N = 10000

Flag to include equity in the R&D staff cost calculations

In [85]:
INCLUDE_EQUITY = True
rnd_staff_cost_name = 'R&D staff cost (including equity)' if INCLUDE_EQUITY else 'R&D staff cost (excluding equity)'

In [86]:
results_dir = f'results/development-costs-updated-gemini/' if INCLUDE_EQUITY else f'results/development-costs-updated-gemini-excluding-equity/'
os.makedirs(results_dir, exist_ok=True)

Cost of staff compensation

levels.fyi - Google Software Engineer in SF Bay Area: https://web.archive.org/web/20240515221234/https://www.levels.fyi/companies/google/salaries/software-engineer/locations/san-francisco-bay-area?dma=807
- "Software Engineer compensation in San Francisco Bay Area at Google ranges from $194K per year for L3 to $1.21M per year for L8. The median compensation in San Francisco Bay Area package totals $316K."
- Base salary: $148K (level 3) to $335K (level 8)
- Equity: $34K (level 3) to $760K (level 8)

aipaygrad.es: https://web.archive.org/web/20240415184855/https://aipaygrad.es/

- Median overall: $324,350/ year
- 134 data points. 5th percentile is ~7th lowest value, which is $191,800. 95th percentile is ~7th highest value, which is $632,250.
- There isn't much difference to the overall median for Facebook, Google, Microsoft, DeepMind. So going with the overall distribution seems reasonable.
- Base salary: Median $160,000, 5th percentile $132,000, 95th percentile $210,000
- Equity: Median $100,000, 5th percentile $37,000, 95th percentile $331,250


In [87]:
salary_low = (148000 * 132000)**0.5
salary_high = (335000 * 210000)**0.5
equity_low = (34000 * 37000)**0.5
equity_high = (760000 * 331250)**0.5

salary_test_distribution = lognorm_from_90_ci(salary_low, salary_high, N)
equity_test_distribution = lognorm_from_90_ci(equity_low, equity_high, N)
print_median_and_ci(salary_test_distribution)
print_median_and_ci(equity_test_distribution)


Median: 1.9e+05 [90% CI: 1.4e+05, 2.7e+05]
Median: 1.4e+05 [90% CI: 3.4e+04, 5e+05]


In [88]:
compensation_test_distribution = salary_test_distribution + equity_test_distribution
print_median_and_ci(compensation_test_distribution)
print_median_and_ci(compensation_test_distribution)

Median: 3.4e+05 [90% CI: 2.1e+05, 7e+05]
Median: 3.4e+05 [90% CI: 2.1e+05, 7e+05]


In [89]:
# Old values for total compensation
print_median_and_ci(lognorm_from_90_ci(2e5, 1.2e6, N))

Median: 4.9e+05 [90% CI: 2e+05, 1.2e+06]


Apply 1.25 - 1.4 overhead factor for benefits, payroll taxes, etc. https://www.sba.gov/blog/how-much-does-employee-cost-you 

In [90]:
compensation_overhead_low = 1.25
compensation_overhead_high = 1.4

In [91]:
cost_data = {}
cost_data_with_hardware_acq = {}

Default ratio of total experiment + training compute to final training run compute
- Smaller versions of GPT-3 made up ~4.5e22 FLOP (based on the formula compute = 6 * parameters * tokens), compared to 175B using ~3e23 FLOP. See Table 2.1 of the [paper](https://arxiv.org/pdf/2005.14165). That's a factor of ~1.14 to go from 175B to total FLOP. Certainly more than that was needed due to failures and other experiments. So 1.2x seems like a good low-end estimate.
- https://arxiv.org/pdf/2111.00364.pdf, p.3, Fig. 3 caption: "At Facebook, we observe a rough power capacity breakdown of 10:20:70 for AI infrastructures devoted to the three key phases — Experimentation, Training, and Inference". 10:20 ratio means a 1.5x multiplier from training to training+experiment compute. Note however that the "training" category is not just the final training run; it includes additional hyper-parameter tuning and retraining for some models. So the actual multiplier would be higher under our definition.
- BLOOM project - https://arxiv.org/abs/2211.02001, p.8, Table 5. ~63% of compute was spent training smaller models than 176B. That's a multiplier of ~2.7x.
- https://arxiv.org/abs/2205.01068: "[...] OPT-175B was developed with an estimated carbon emissions footprint (CO2eq) of 75 tons [...] With ablations, baselines and downtime, our own estimates of total cost is roughly 2× higher."
- We think the Facebook, BLOOM and OPT-175B cases are the more central examples as they account better for all experiments. A factor close to 2x seems like a reasonable median estimate.
- On the high end, it's plausible that several, full iterations of the full-scale training run are necessary before success - say, 4x.

In [92]:
experiment_factor = lognorm_from_90_ci(1.2, 4, N)
print_median_and_ci(experiment_factor)

Median: 2.2 [90% CI: 1.2, 4]


Uncertainty in energy prices

In [93]:
likely_datacenter_states = ['California', 'Nevada', 'Oregon', 'Washington']
energy_prices = [US_STATE_ENERGY_PRICES_PER_KWH[state] for state in likely_datacenter_states]
mean = np.mean(energy_prices)
np.max(energy_prices) / mean, np.min(energy_prices) / mean

(1.727914785226352, 0.6247539654972791)

In [94]:
energy_price_uncertainty_factor = lognorm_from_90_ci(0.6, 1.7, N)

In [95]:
dt2float = lambda x: x.year + x.month/12 + x.day/365.25
float2dt = lambda x: datetime(int(x), int((x-int(x))*12), int(((x-int(x))*12-int((x-int(x))*12))*365.25))

In [96]:
frontier_pcd_df, hardware_df, price_df = load_data_for_cost_estimation(
    compute_threshold_method='top_n', compute_threshold=10,
)

In [97]:
frontier_pcd_df['Publication date'] = pd.to_datetime(frontier_pcd_df['Publication date'])

Models

In [98]:
models = ['GPT-3 175B (davinci)', 'OPT-175B', 'GPT-4', 'Gemini 1.0 Ultra']

In [99]:
selected_model_df = frontier_pcd_df.loc[[s in models for s in frontier_pcd_df['System']]]

## Hardware

In [100]:
# Unset index
selected_model_df.reset_index(inplace=True)

In [101]:
def _estimate_chip_hours(row):
    return estimate_chip_hours(row, hardware_df)

selected_model_df['Training chip-hours'] = selected_model_df.apply(_estimate_chip_hours, axis=1)
selected_model_df['Training chip-hours']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



0    132000000.0
1     57000000.0
2       812544.0
3      3552000.0
Name: Training chip-hours, dtype: float64

In [102]:
selected_model_df.loc[:, 'Original training chip-hours'] = selected_model_df.loc[:, 'Training chip-hours']
hardware_costs = defaultdict(dict)
cost_component_names = [
    'AI accelerator chip cost',
    'Other server components cost',
    'Cluster-level interconnect cost',
    'Energy cost',
]

for percentile in [2.5, 50, 97.5]:
    print(f'\n\n{percentile}th percentile of experiments time')
    experiment_factor_value = np.percentile(experiment_factor, percentile)
    selected_model_df.loc[:, 'Training chip-hours'] = selected_model_df.loc[:, 'Original training chip-hours'] * experiment_factor_value
    cost_df = estimate_hardware_capex_energy(selected_model_df, hardware_df, price_df, separate_components=True)
    cost_df = cost_df.set_index('System', inplace=False)

    for model in models:
        cost_components = {}
        for cost_component_name in cost_component_names:
            cost_components[cost_component_name] = cost_df.loc[model, cost_component_name]
        hardware_costs[model][str(percentile)] = cost_components

hardware_costs



2.5th percentile of experiments time
==== System: Gemini 1.0 Ultra ====
Estimated the value of Google TPU v4 server, available from 2021-05-20 00:00:00 and used from 2023-05-10 00:00:00: 4488.559125020449 per chip

==== System: GPT-4 ====
Estimated the value of NVIDIA A100 SXM4 40 GB server, available from 2020-08-12 00:00:00 and used from 2022-05-12 00:00:00: 14165.054738257799 per chip

==== System: OPT-175B ====
Estimated the value of NVIDIA A100 SXM4 80 GB server, available from 2021-02-14 00:00:00 and used from 2022-01-28 23:00:00: 18296.79402527096 per chip

==== System: GPT-3 175B (davinci) ====
Estimated the value of NVIDIA Tesla V100 DGXS 32 GB server, available from 2018-06-25 00:00:00 and used from 2019-10-01 00:00:00: 11463.61551009175 per chip

==== System: Gemini 1.0 Ultra ====
Estimated cost: {'AI accelerator chip cost': 14226418.824411752, 'Other server components cost': 9104908.04762352, 'Cluster-level interconnect cost': 5472780.377390989, 'Energy cost': 3135594.128



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

defaultdict(dict,
            {'GPT-3 175B (davinci)': {'2.5': {'AI accelerator chip cost': 948781.9646363092,
               'Other server components cost': 654659.5555990533,
               'Cluster-level interconnect cost': 376115.9121539739,
               'Energy cost': 118920.40249256055},
              '50': {'AI accelerator chip cost': 1945981.781722941,
               'Other server components cost': 1342727.429388829,
               'Cluster-level interconnect cost': 771425.6174212794,
               'Energy cost': 243909.50223678618},
              '97.5': {'AI accelerator chip cost': 3989522.1133041997,
               'Other server components cost': 2752770.2581798974,
               'Cluster-level interconnect cost': 1581525.3710888622,
               'Energy cost': 500047.000418024}},
             'OPT-175B': {'2.5': {'AI accelerator chip cost': 352673.1164007059,
               'Other server components cost': 232764.25682446582,
               'Cluster-level interconnect 

# GPT-3

## Project duration

Training end time
1. Shevlane (2022)
https://uploads-ssl.webflow.com/614b70a71b9f71c9c240c7a7/6262a1a55526a373cc93207d_Shevlane%20dissertation%20preprint.pdf
p.66 of the PDF: A senior member of OpenAI (specified anonymously on p.27 of the PDF) 
told the author "GPT-3 existed for a long time before the paper came out. We delayed the 
paper. [...] But it's months, it doesn't really count."
p.67 of the PDF: CAMERON said "Firstly, [the idea for a commercial API for GPT-3] 
started out as a research API. It probably was . . . early January 2020."

1. We think it plausibly could have been produced soon after the Microsoft deal was 
announced in July 2019. Supposing the announcement coincided with Microsoft giving 
OpenAI access to the necessary compute, and OpenAI already having almost everything 
about GPT-3 planned in advance, and it took less than 1 month to train, then GPT-3 could 
have been produced in August 2019.

1. So we estimate August to January as our 90% CI, and halfway between (October) as the
central estimate.

In [103]:
training_end_low = datetime(2019, 8, 1)
training_end_high = datetime(2020, 1, 1)


Project start time
- Lower bound on start time: when GPT-2 was published, 2019-Feb-14
  - It could have been earlier than this, but that seems unlikely unless GPT-2 was delayed for months
  - The scaling laws paper was only published in January 2020, and GPT-3 was based on the findings of that paper
- Upper bound on start time: 2019-Nov-01, two months before the upper bound on training completion
  - We think they’d need six weeks at the absolute minimum, to prepare training data and scale up the experiments successfully


In [104]:
project_start_low = datetime(2019, 2, 14)
project_start_high = datetime(2019, 11, 1)


In [105]:
publication_date = frontier_pcd_df.loc[frontier_pcd_df['System'] == 'GPT-3 175B (davinci)', 'Publication date'].values[0]
publication_date

numpy.datetime64('2020-05-28T00:00:00.000000000')

In [106]:
min_project_duration = 15 / 365  # training duration
experiments_duration = []
while len(experiments_duration) < N:
    project_start_sample = lognorm_from_90_ci(dt2float(project_start_low), dt2float(project_start_high), 1)[0]
    training_end_sample = lognorm_from_90_ci(dt2float(training_end_low), dt2float(training_end_high), 1)[0]
    experiment_duration_sample = training_end_sample - project_start_sample
    if experiment_duration_sample > min_project_duration:
        experiments_duration.append(experiment_duration_sample)

project_duration = []
while len(project_duration) < N:
    project_start_sample = lognorm_from_90_ci(dt2float(project_start_low), dt2float(project_start_high), 1)[0]
    # Paper was delayed by "months"
    publication_delay_sample = lognorm_from_90_ci(2*30, 4*30, 1)[0]
    project_end_sample = dt2float(publication_date - pd.to_timedelta(publication_delay_sample, unit='D'))
    project_duration_sample = project_end_sample - project_start_sample
    if project_duration_sample > min_project_duration:
        project_duration.append(project_duration_sample)

experiments_duration = np.array(experiments_duration)
project_duration = np.array(project_duration)
print_median_and_ci(experiments_duration * 12)  # convert years to months
print_median_and_ci(project_duration * 12)

Median: 4.3 [90% CI: 1.1, 8.9]
Median: 8.3 [90% CI: 4, 13]


## Energy

Total energy consumption of training: 1287 MWh ([Patterson et al. 2021](https://arxiv.org/abs/2104.10350), Table 4)

In [107]:
energy_consumption_kwh = 1287e3 * experiment_factor
price_per_kwh = energy_price(2020) * energy_price_uncertainty_factor
energy_cost = energy_consumption_kwh * price_per_kwh
print_median_and_ci(energy_cost)

Median: 1.9e+05 [90% CI: 8.6e+04, 4.2e+05]


## Personnel

In [108]:
num_contributors = 25  # counting listed contributors in paper

In [109]:
compensation = lognorm_from_90_ci(salary_low, salary_high, (N, num_contributors))
overhead = lognorm_from_90_ci(compensation_overhead_low, compensation_overhead_high, (N, num_contributors))
compensation *= overhead
if INCLUDE_EQUITY:
    equity = lognorm_from_90_ci(equity_low, equity_high, num_contributors)
    compensation += equity
ftes = lognorm_from_90_ci(0.05, 0.8, (N, num_contributors))
compensation *= ftes
total_compensation = compensation.sum(axis=1)
assert total_compensation.shape == (N,)

In [110]:
personnel_cost = total_compensation * project_duration

In [111]:
print_median_and_ci(personnel_cost, ci=[2.5, 97.5])

Median: 2e+06 [95% CI: 7.3e+05, 3.9e+06]


## Overall

In [112]:
hardware_costs['GPT-3 175B (davinci)']['2.5']

{'AI accelerator chip cost': 948781.9646363092,
 'Other server components cost': 654659.5555990533,
 'Cluster-level interconnect cost': 376115.9121539739,
 'Energy cost': 118920.40249256055}

In [113]:
def get_hardware_cost_dist(model, component, ci=[2.5, 97.5]):
    return lognorm_from_ci(
        hardware_costs[model][str(ci[0])][component],
        hardware_costs[model][str(ci[1])][component],
        ci[1] - ci[0],
        N
    )

In [114]:
cost_data['GPT-3 175B (davinci)'] = {
    'AI accelerator chip cost': get_hardware_cost_dist('GPT-3 175B (davinci)', 'AI accelerator chip cost'),
    'Other server components cost': get_hardware_cost_dist('GPT-3 175B (davinci)', 'Other server components cost'),
    'Cluster-level interconnect cost': get_hardware_cost_dist('GPT-3 175B (davinci)', 'Cluster-level interconnect cost'),
    'Energy cost': energy_cost,
    rnd_staff_cost_name: personnel_cost,
}

# OPT-175B

## Energy

https://arxiv.org/abs/2205.01068: "[...] OPT-175B was developed with an estimated carbon emissions footprint (CO2eq) of 75 tons [...] With ablations, baselines and downtime, our own estimates of total cost is roughly 2× higher."

- I haven't found numbers for carbon intensity, or how they calculate it
- According to https://arxiv.org/pdf/2104.10350.pdf (Table 1), Gross CO2e/KWh (kg/KWh) for the “Google Iowa Council Bluffs” data center in 2020 was 0.478 CO2e/kWh (kg/kWh)
- Energy use:
  - 992 A100 GPU units
  - 300W per unit (https://www.nvidia.com/en-us/data-center/a100/)
  - Training time 33 days (https://github.com/facebookresearch/metaseq/blob/d703cf1ae1e0faaff6c20629398dfbe02b98cf77/projects/OPT/chronicles/final_update.md)
  - Energy = 992 units * 0.3 kW/unit * 33 days * 24h/day ~= 236000 kWh
  - 75000 kg / 236000 kWh ~= 0.318 CO2e/kWh (kg/kWh)
  - Sanity check: this is not far from the Google data center gross carbon intensity of 0.478 reported by [Patterson et al. (2021)](https://arxiv.org/abs/2104.10350)

In [115]:
training_time_hours = 33 * 24
power_per_gpu = 0.3
num_gpus = 992
energy_consumption_kwh = training_time_hours * num_gpus * power_per_gpu * experiment_factor
price_per_kwh = energy_price(2022) * energy_price_uncertainty_factor
energy_cost = energy_consumption_kwh * price_per_kwh
print_median_and_ci(energy_cost, ci=[2.5, 97.5])

Median: 4.3e+04 [95% CI: 1.7e+04, 1.1e+05]


## Personnel

2021-Oct-20: First date in [OPT logbook](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/chronicles/OPT175B_Logbook.pdf)

2022-Jan-06: Date in the logbook where the final training run is stated to have completed

2022-May-02: Publication on arxiv

In [116]:
project_duration = 192 / DAYS_PER_YEAR

In [117]:
num_authors = 19
num_contributors = num_authors

In [118]:
compensation = lognorm_from_90_ci(salary_low, salary_high, (N, num_contributors))
overhead = lognorm_from_90_ci(compensation_overhead_low, compensation_overhead_high, (N, num_contributors))
compensation *= overhead
if INCLUDE_EQUITY:
    equity = lognorm_from_90_ci(equity_low, equity_high, num_contributors)
    compensation += equity
ftes = lognorm_from_90_ci(0.05, 0.8, (N, num_contributors))
compensation *= ftes
total_compensation = compensation.sum(axis=1)
assert total_compensation.shape == (N,)

In [119]:
personnel_cost = total_compensation * project_duration

In [120]:
print_median_and_ci(personnel_cost, ci=[2.5, 97.5])

Median: 1.2e+06 [95% CI: 7.7e+05, 1.9e+06]


## Overall

In [121]:
cost_data['OPT-175B'] = {
    'AI accelerator chip cost': get_hardware_cost_dist('OPT-175B', 'AI accelerator chip cost'),
    'Other server components cost': get_hardware_cost_dist('OPT-175B', 'Other server components cost'),
    'Cluster-level interconnect cost': get_hardware_cost_dist('OPT-175B', 'Cluster-level interconnect cost'),
    'Energy cost': energy_cost,
    rnd_staff_cost_name: personnel_cost,
}

# GPT-4

## Project duration

https://arxiv.org/abs/2303.08774
"This system card analyzes GPT-4 [...] Since it finished training in August of 2022 [...]"

In [122]:
training_end_low = datetime(2022, 8, 1)
training_end_high = datetime(2022, 8, 31)

Project start time
- We know they started training GPT-3.5 about one year before [announcement](https://openai.com/index/gpt-4-research/) in March 2023, and then trained GPT-4 after that. They built new infrastructure/codebase for and GPT-3.5 was a test run. Maybe some research experiments were going on for a few months in total prior to that.
- Indirect source that final training run of GPT-4 took about three months: [SemiAnalysis](https://www.semianalysis.com/p/gpt-4-architecture-infrastructure). So would have started at the end of May at the latest.


In [123]:
project_start_low = datetime(2022, 1, 1)
project_start_high = datetime(2022, 5, 31)

In [124]:
publication_date = frontier_pcd_df.loc[frontier_pcd_df['System'] == 'GPT-4', 'Publication date'].values[0]
publication_date

numpy.datetime64('2023-03-15T00:00:00.000000000')

In [125]:
min_project_duration = 90 / 365  # training duration
experiments_duration = []
while len(experiments_duration) < N:
    project_start_sample = lognorm_from_90_ci(dt2float(project_start_low), dt2float(project_start_high), 1)[0]
    training_end_sample = lognorm_from_90_ci(dt2float(training_end_low), dt2float(training_end_high), 1)[0]
    experiment_duration_sample = training_end_sample - project_start_sample
    if experiment_duration_sample > min_project_duration:
        experiments_duration.append(experiment_duration_sample)

project_duration = []
while len(project_duration) < N:
    project_start_sample = lognorm_from_90_ci(dt2float(project_start_low), dt2float(project_start_high), 1)[0]
    # Technical report said six months was spent on safety research, risk assessment, and iteration,
    # which covers the time from the end of training to publication
    project_duration_sample = dt2float(pd.to_datetime(publication_date)) - project_start_sample
    if project_duration_sample > min_project_duration:
        project_duration.append(project_duration_sample)

experiments_duration = np.array(experiments_duration)
project_duration = np.array(project_duration)
print_median_and_ci(experiments_duration * 12)  # convert years to months
print_median_and_ci(project_duration * 12)

Median: 5.2 [90% CI: 3.3, 7.6]
Median: 12 [90% CI: 9.5, 14]


## Energy

In [126]:
energy_cost = get_hardware_cost_dist('GPT-4', 'Energy cost') * energy_price_uncertainty_factor
print_median_and_ci(energy_cost, ci=[2.5, 97.5])

Median: 7.7e+06 [95% CI: 3e+06, 2e+07]


## Personnel

In [127]:
# Load file: data/gpt-4_contributions.json
with open('data/gpt-4_contributions.json') as f:
    gpt_4_contributions = json.load(f)
gpt_4_contributions

{'Pretraining': {'Core contributors': ['Christopher Berner',
   'Greg Brockman',
   'Trevor Cai',
   'David Farhi',
   'Chris Hesse',
   'Shantanu Jain',
   'Kyle Kosic',
   'Jakub Pachocki',
   'Alex Paino',
   'Mikhail Pavlov',
   'Michael Petrov',
   'Nick Ryder',
   'Szymon Sidor',
   'Nikolas Tezak',
   'Phil Tillet',
   'Amin Tootoonchian',
   'Qiming Yuan',
   'Wojciech Zaremba'],
  'Compute cluster scaling': ['Christopher Berner',
   'Oleg Boiko',
   'Andrew Cann',
   'Ben Chess',
   'Christian Gibson',
   'Mateusz Litwin',
   'Emy Parparita',
   'Henri Roussez',
   'Eric Sigler',
   'Akila Welihinda'],
  'Data': ['Sandhini Agarwal',
   'Suchir Balaji',
   'Mo Bavarian',
   'Che Chang',
   'Sheila Dunning',
   'Leo Gao',
   'Jonathan Gordon',
   'Peter Hoeschele',
   'Shawn Jain',
   'Shantanu Jain',
   'Roger Jiang',
   'Heewoo Jun',
   'Łukasz Kaiser',
   'Nitish Shirish Keskar',
   'Jong Wook Kim',
   'Aris Konstantinidis',
   'Chak Ming Li',
   'Todor Markov',
   'Bianca Ma

In [128]:
unique_contributors = set()
for category in gpt_4_contributions.values():
    for group in category.values():
        for contributor in group:
            unique_contributors.add(contributor)
len(unique_contributors)

284

In [129]:
num_contributors = len(unique_contributors)

In [130]:
compensation = lognorm_from_90_ci(salary_low, salary_high, (N, num_contributors))
overhead = lognorm_from_90_ci(compensation_overhead_low, compensation_overhead_high, (N, num_contributors))
compensation *= overhead
if INCLUDE_EQUITY:
    equity = lognorm_from_90_ci(equity_low, equity_high, num_contributors)
    compensation += equity
ftes = lognorm_from_90_ci(0.05, 0.8, (N, num_contributors))
compensation *= ftes
total_compensation = compensation.sum(axis=1)
assert total_compensation.shape == (N,)

In [131]:
personnel_cost = total_compensation * project_duration

In [132]:
print_median_and_ci(personnel_cost, ci=[2.5, 97.5])

Median: 3.6e+07 [95% CI: 2.6e+07, 4.6e+07]


## Overall

In [133]:
cost_data['GPT-4'] = {
    'AI accelerator chip cost': get_hardware_cost_dist('GPT-4', 'AI accelerator chip cost'),
    'Other server components cost': get_hardware_cost_dist('GPT-4', 'Other server components cost'),
    'Cluster-level interconnect cost': get_hardware_cost_dist('GPT-4', 'Cluster-level interconnect cost'),
    'Energy cost': energy_cost,
    rnd_staff_cost_name: personnel_cost,
}

# Gemini Ultra

See https://colab.research.google.com/drive/1XEKlSo-3DCFp686yGOwwfS6_DEHsFimd#scrollTo=yqWMux2iZL8L

## Hardware

In [134]:
amortized_hardware_cost = lognorm_from_90_ci(5.9e6, 1.1e8, N)
server_cost = amortized_hardware_cost / 1.15
ai_accelerator_chip_cost = server_cost / 1.64
other_server_components_cost = server_cost - ai_accelerator_chip_cost
cluster_level_interconnect_cost = amortized_hardware_cost - server_cost

## Project duration

The project duration for Google Gemini should be at least 7 months. This is based on the fact that on May 10, 2023, at the Google I/O 2023 keynote, Google CEO Sundar Pichai [announced](https://blog.google/technology/ai/google-io-2023-keynote-sundar-pichai/#ai-responsibility:~:text=This%20includes%20our,like%20PaLM%202.) that Google DeepMind is training a new AI model called Gemini. The model was publicy released on Dec 6, 2023 (https://www.metaculus.com/questions/17283/google-deepmind-gemini-release-date/), about 7 months later.

An upper bound seems harder to find, but it seems likely that the project did not get well underway until after PaLM was publicly announced in April 2022 (https://arxiv.org/abs/2204.02311), which would bound the project duration at about 20 months.

Based on this, we set the 5th percentile estimate for project duration at 7 months, and the 95th percentile estimate at 20 months.

In [135]:
# Lognormal distribution parameters for project duration
p_5th_project_duration_years = 7 / 12
p_95th_project_duration_years = 20 / 12

project_duration = lognorm_from_90_ci(p_5th_project_duration_years, p_95th_project_duration_years, N)

project_duration_hours = project_duration * HOURS_PER_YEAR

print_median_and_ci(project_duration * 12, ci=[5, 95])

Median: 12 [90% CI: 6.9, 20]


In [136]:
# Training time - from Gemini compute notebook
percentile_5th = 1*31*24*3600 # 5th percentile in seconds
percentile_95th = 6*31*24*3600  # 95th percentile in seconds
training_time_seconds = lognorm_from_90_ci(percentile_5th, percentile_95th, N)
print_median_and_ci(training_time_seconds)

experiments_duration_seconds = training_time_seconds * experiment_factor
experiments_duration_hours = experiments_duration_seconds / 3600
experiments_duration_years = experiments_duration_hours / (365.25 * 24)
print_median_and_ci(experiments_duration_years)

Median: 6.6e+06 [90% CI: 2.7e+06, 1.6e+07]
Median: 0.46 [90% CI: 0.16, 1.3]


## Energy

In [137]:
chips_per_pod = 4096
number_of_pods = np.random.uniform(12, 19, N)
number_of_chips = number_of_pods * chips_per_pod
print_median_and_ci(number_of_chips)

Median: 6.4e+04 [90% CI: 5.1e+04, 7.6e+04]


In [138]:
hardware_model = 'Google TPU v4'
organization = 'Google'
"""
https://cloud.google.com/blog/topics/systems/tpu-v4-enables-performance-energy-and-co2e-efficiency-gains
"Google's Cloud TPU v4 outperforms TPU v3 by 2.1x on average on a per-chip basis and improves performance/Watt by 2.7x."
TPU v3 performance per Watt: 123 TFLOPS / 450W = 0.273 TFLOPS/W
0.273 * 2.7 = 0.738 TFLOPS/W
TPU v4 is 275 TFLOPS => 275 / 0.738 = 373W
"""
chip_TDP_kw = 373 / 1000
# Adjust for whole server power draw (CPUs, memory, cooling)
server_TDP_kw = chip_TDP_kw * chip_to_server_power(hardware_model)
# Adjust for average power draw
server_power_kw = server_TDP_kw * server_TDP_fraction(hardware_model)
# Adjust for data center power distribution and cooling
adj_server_power_kw = server_power_kw * power_usage_effectiveness(organization)
cluster_kwh = adj_server_power_kw * number_of_chips * experiments_duration_hours

cost_per_kwh = energy_price(2023) * energy_price_uncertainty_factor
energy_cost = cluster_kwh * cost_per_kwh
print_median_and_ci(energy_cost)

Median: 5.6e+06 [90% CI: 1.7e+06, 1.9e+07]


## Personnel

Number of FTEs

The [technical report](https://storage.googleapis.com/deepmind-media/gemini/gemini_1_report.pdf) lists contributions, decomposed by role.

> The roles are defined as below:
> 
> - Lead: Individual(s) responsible for the sub-team throughout the project.
> - Core Contributor: Individual that had significant impact throughout the project.
> - Contributor: Individual that had contributions to the project and was partially involved with the
> effort.
> - Program Lead: Responsible for the organizational aspects of the Gemini effort
> - Overall Technical Lead: Responsible for the technical direction of the overall Gemini effort
> 
> Within each role, contributions are equal [...]

### Raw data (warning: long)

In [139]:
leads_raw = """
Rohan Anil, Co-Lead, Text
Sebastian Borgeaud, Co-Lead, Text
Yonghui Wu, Co-Lead, Text
Jean-Baptiste Alayrac, Co-Lead, MM Vision
Jiahui Yu, Co-Lead, MM Vision
Radu Soricut, Co-Lead, MM Vision
Johan Schalkwyk, Lead, MM Audio
Andrew M. Dai, Co-Lead, Data
Anja Hauth, Co-Lead, Data
Katie Millican, Co-Lead, Data
David Silver, Co-Lead, Fine-Tuning
Slav Petrov, Co-Lead, Fine-Tuning
Melvin Johnson, Lead, Instruction Tuning
Ioannis Antonoglou, Co-Lead, RL Techniques
Julian Schrittwieser, Co-Lead, RL Techniques
Amelia Glaese, Lead, Human Data
Jilin Chen, Lead, Safety
Emily Pitler, Co-Lead, Tool Use
Timothy Lillicrap, Co-Lead, Tool Use
Angeliki Lazaridou, Co-Lead, Eval
Orhan Firat, Co-Lead, Eval
James Molloy, Co-Lead, Infra
Michael Isard, Co-Lead, Infra
Paul R. Barham, Co-Lead, Infra
Tom Hennigan, Co-Lead, Infra
Benjamin Lee, Co-Lead, Codebase & Parallelism
Fabio Viola, Co-Lead, Codebase & Parallelism
Malcolm Reynolds, Co-Lead, Codebase & Parallelism
Yuanzhong Xu, Co-Lead, Codebase & Parallelism
Ryan Doherty, Lead, Ecosystem
Eli Collins, Lead, Product
Clemens Meyer, Co-Lead, Operations
Eliza Rutherford, Co-Lead, Operations
Erica Moreira, Co-Lead, Operations
Kareem Ayoub, Co-Lead, Operations
Megha Goel, Co-Lead, Operations
"""

In [140]:
core_contributors_raw = """
George Tucker
Enrique Piqueras
Maxim Krikun
Iain Barr
Nikolay Savinov
Ivo Danihelka
Becca Roelofs
Anaïs White
Anders Andreassen
Tamara von Glehn
Lakshman Yagati
Mehran Kazemi
Lucas Gonzalez
Misha Khalman
Jakub Sygnowski
Alexandre Frechette
Charlotte Smith
Laura Culp
Lev Proleev
Yi Luan
Xi Chen
James Lottes
Nathan Schucher
Federico Lebron
Alban Rrustemi
Natalie Clay
Phil Crone
Tomas Kocisky
Jeffrey Zhao
Bartek Perz
Dian Yu
Heidi Howard
Adam Bloniarz
Jack W. Rae
Han Lu
Laurent Sifre
Marcello Maggioni
Fred Alcober
Dan Garrette
Megan Barnes
Shantanu Thakoor
Jacob Austin
Gabriel Barth-Maron
William Wong
Rishabh Joshi
Rahma Chaabouni
Deeni Fatiha
Arun Ahuja
Ruibo Liu
Yunxuan Li
Sarah Cogan
Jeremy Chen
Chao Jia
Chenjie Gu
Qiao Zhang
Jordan Grimstad
Ale Jakse Hartman
Martin Chadwick
Gaurav Singh Tomar
Xavier Garcia
Evan Senter
Emanuel Taropa
Thanumalayan Sankaranarayana Pillai
Jacob Devlin
Michael Laskin
Diego de Las Casas
Dasha Valter
Connie Tao
Lorenzo Blanco
Adrià Puigdomènech Badia
David Reitter
Mianna Chen
Jenny Brennan
Clara Rivera
Sergey Brin
Shariq Iqbal
Gabriela Surita
Jane Labanowski
Abhi Rao
Stephanie Winkler
Emilio Parisotto
Yiming Gu
Kate Olszewska
Yujing Zhang
Ravi Addanki
Antoine Miech
Annie Louis
Laurent El Shafey
Denis Teplyashin
Geoff Brown
Elliot Catt
Nithya Attaluri
Jan Balaguer
Jackie Xiang
Pidong Wang
Zoe Ashwood
Anton Briukhov
Albert Webson
Sanjay Ganapathy
Smit Sanghavi
Ajay Kannan
Ming-Wei Chang
Axel Stjerngren
Josip Djolonga
Yuting Sun
Ankur Bapna
Matthew Aitchison
Pedram Pejman
Henryk Michalewski
Tianhe Yu
Cindy Wang
Juliette Love
Junwhan Ahn
Dawn Bloxwich
Kehang Han
Peter Humphreys
Thibault Sellam
James Bradbury
Varun Godbole
Sina Samangooei
Bogdan Damoc
Alex Kaskasoli
Sébastien M. R. Arnold
Vijay Vasudevan
Shubham Agrawal
Jason Riesa
Dmitry Lepikhin
Richard Tanburn
Srivatsan Srinivasan
Hyeontaek Lim
Sarah Hodkinson
Pranav Shyam
Johan Ferret
Steven Hand
Ankush Garg
Tom Le Paine
Jian Li
Yujia Li
Minh Giang
Alexander Neitz
Zaheer Abbas
Sarah York
Machel Reid
Elizabeth Cole
Aakanksha Chowdhery
Dipanjan Das
Dominika Rogozińska
Vitaly Nikolaev
Pablo Sprechmann
Zachary Nado
Lukas Zilka
Flavien Prost
Luheng He
Marianne Monteiro
Gaurav Mishra
Chris Welty
Josh Newlan
Dawei Jia
Miltiadis Allamanis
Clara Huiyi Hu
Raoul de Liedekerke
Justin Gilmer
Carl Saroufim
Shruti Rijhwani
Shaobo Hou
Disha Shrivastava
Anirudh Baddepudi
Alex Goldin
Adnan Ozturel
Albin Cassirer
Yunhan Xu
Daniel Sohn
Devendra Sachan
Reinald Kim Amplayo
Craig Swanson
Dessie Petrova
Shashi Narayan
Arthur Guez
Siddhartha Brahma
Jessica Landon
Miteyan Patel
Ruizhe Zhao
Kevin Villela
Luyu Wang
Wenhao Jia
Matthew Rahtz
Mai Giménez
Legg Yeung
Hanzhao Lin
James Keeling
Petko Georgiev
Diana Mincu
Boxi Wu
Salem Haykal
Rachel Saputro
Kiran Vodrahalli
James Qin
Zeynep Cankara
Abhanshu Sharma
Nick Fernando
Will Hawkins
Behnam Neyshabur
Solomon Kim
Adrian Hutter
Priyanka Agrawal
Alex Castro-Ros
George van den Driessche
Tao Wang
Fan Yang
Shuo-yiin Chang
Paul Komarek
Ross McIlroy
Mario Lučić
Guodong Zhang
Wael Farhan
Michael Sharman
Paul Natsev
Paul Michel
Yong Cheng
Yamini Bansal
Siyuan Qiao
Kris Cao
Siamak Shakeri
Christina Butterfield
Justin Chung
Paul Kishan Rubenstein
Shivani Agrawal
Arthur Mensch
Kedar Soparkar
Karel Lenc
Timothy Chung
Aedan Pope
Loren Maggiore
Jackie Kay
Priya Jhakra
Shibo Wang
Joshua Maynez
Mary Phuong
Taylor Tobin
Andrea Tacchetti
Maja Trebacz
Kevin Robinson
Yash Katariya
Sebastian Riedel
Paige Bailey
Kefan Xiao
Nimesh Ghelani
Lora Aroyo
Ambrose Slone
Neil Houlsby
Xuehan Xiong
Zhen Yang
Elena Gribovskaya
Jonas Adler
Mateo Wirth
Lisa Lee
Music Li
Thais Kagohara
Jay Pavagadhi
Sophie Bridgers
Anna Bortsova
Sanjay Ghemawat
Zafarali Ahmed
Tianqi Liu
Richard Powell
Vijay Bolina
Mariko Iinuma
Polina Zablotskaia
James Besley
Da-Woon Chung
Timothy Dozat
Ramona Comanescu
Xiance Si
Jeremy Greer
Guolong Su
Martin Polacek
Raphaël Lopez Kaufman
Simon Tokumine
Hexiang Hu
Elena Buchatskaya
Yingjie Miao
Mohamed Elhawaty
Aditya Siddhant
Nenad Tomasev
Jinwei Xing
Christina Greer
Helen Miller
Shereen Ashraf
Aurko Roy
Zizhao Zhang
Ada Ma
Angelos Filos
Milos Besta
Rory Blevins
Ted Klimenko
Chih-Kuan Yeh
Soravit Changpinyo
Jiaqi Mu
Oscar Chang
Mantas Pajarskas
Carrie Muir
Vered Cohen
Charline Le Lan
Krishna Haridasan
Amit Marathe
Steven Hansen
Sholto Douglas
Rajkumar Samuel
Mingqiu Wang
Sophia Austin
Chang Lan
Jiepu Jiang
Justin Chiu
Jaime Alonso Lorenzo
Lars Lowe Sjösund
Sébastien Cevey
Zach Gleicher
Thi Avrahami
Anudhyan Boral
Hansa Srinivasan
Vittorio Selo
Rhys May
Konstantinos Aisopos
Léonard Hussenot
Livio Baldini Soares
Kate Baumli
Michael B. Chang
Adrià Recasens
Ben Caine
Alexander Pritzel
Filip Pavetic
Fabio Pardo
Anita Gergely
Justin Frye
Vinay Ramasesh
Dan Horgan
Kartikeya Badola
Nora Kassner
Subhrajit Roy
Ethan Dyer
Víctor Campos
Alex Tomala
Yunhao Tang
Dalia El Badawy
Elspeth White
Basil Mustafa
Oran Lang
Abhishek Jindal
Sharad Vikram
Zhitao Gong
Sergi Caelles
Ross Hemsley
Gregory Thornton
Fangxiaoyu Feng
Wojciech Stokowiec
Ce Zheng
Phoebe Thacker
Çağlar Ünlü
Zhishuai Zhang
Mohammad Saleh
James Svensson
Max Bileschi
Piyush Patil
Ankesh Anand
Roman Ring
Katerina Tsihlas
Arpi Vezer
Marco Selvi
Toby Shevlane
Mikel Rodriguez
Tom Kwiatkowski
Samira Daruki
Keran Rong
Allan Dafoe
Nicholas FitzGerald
Keren Gu-Lemberg
Mina Khan
Lisa Anne Hendricks
Marie Pellat
Vladimir Feinberg
James Cobon-Kerr
Tara Sainath
Maribeth Rauh
Sayed Hadi Hashemi
Richard Ives
Yana Hasson
YaGuang Li
Eric Noland
Yuan Cao
Nathan Byrd
Le Hou
Qingze Wang
Thibault Sottiaux
Michela Paganini
Jean-Baptiste Lespiau
Alexandre Moufarek
Samer Hassan
Kaushik Shivakumar
Joost van Amersfoort
Amol Mandhane
Pratik Joshi
Anirudh Goyal
Matthew Tung
Andrew Brock
Hannah Sheahan
Vedant Misra
Cheng Li
Nemanja Rakićević
Mostafa Dehghani
Fangyu Liu
Sid Mittal
Junhyuk Oh
Seb Noury
Eren Sezener
Fantine Huot
Matthew Lamm
Nicola De Cao
Charlie Chen
"""

In [141]:
contributors_raw = """
Gamaleldin Elsayed
Ed Chi
Mahdis Mahdieh
Ian Tenney
Nan Hua
Ivan Petrychenko
Patrick Kane
Dylan Scandinaro
Rishub Jain
Jonathan Uesato
Romina Datta
Adam Sadovsky
Oskar Bunyan
Dominik Rabiej
Shimu Wu
John Zhang
Gautam Vasudevan
Edouard Leurent
Mahmoud Alnahlawi
Ionut Georgescu
Nan Wei
Ivy Zheng
Betty Chan
Pam G Rabinovitch
Piotr Stanczyk
Ye Zhang
David Steiner
Subhajit Naskar
Michael Azzam
Matthew Johnson
Adam Paszke
Chung-Cheng Chiu
Jaume Sanchez Elias
Afroz Mohiuddin
Faizan Muhammad
Jin Miao
Andrew Lee
Nino Vieillard
Sahitya Potluri
Jane Park
Elnaz Davoodi
Jiageng Zhang
Jeff Stanway
Drew Garmon
Abhijit Karmarkar
Zhe Dong
Jong Lee
Aviral Kumar
Luowei Zhou
Jonathan Evens
William Isaac
Zhe Chen
Johnson Jia
Anselm Levskaya
Zhenkai Zhu
Chris Gorgolewski
Peter Grabowski
Yu Mao
Alberto Magni
Kaisheng Yao
Javier Snaider
Norman Casagrande
Paul Suganthan
Evan Palmer
Geoffrey Irving
Edward Loper
Manaal Faruqui
Isha Arkatkar
Nanxin Chen
Izhak Shafran
Michael Fink
Alfonso Castaño
Irene Giannoumis
Wooyeol Kim
Mikołaj Rybiński
Ashwin Sreevatsa
Jennifer Prendki
David Soergel
Adrian Goedeckemeyer
Willi Gierke
Mohsen Jafari
Meenu Gaba
Jeremy Wiesner
Diana Gage Wright
Yawen Wei
Harsha Vashisht
Yana Kulizhskaya
Jay Hoover
Maigo Le
Lu Li
Chimezie Iwuanyanwu
Lu Liu
Kevin Ramirez
Andrey Khorlin
Albert Cui
Tian LIN
Marin Georgiev
Marcus Wu
Ricardo Aguilar
Keith Pallo
Abhishek Chakladar
Alena Repina
Xihui Wu
Tom van der Weide
Priya Ponnapalli
Caroline Kaplan
Jiri Simsa
Shuangfeng Li
Olivier Dousse
Fan Yang
Jeff Piper
Nathan Ie
Minnie Lui
Rama Pasumarthi
Nathan Lintz
Anitha Vijayakumar
Lam Nguyen Thiet
Daniel Andor
Pedro Valenzuela
Cosmin Paduraru
Daiyi Peng
Katherine Lee
Shuyuan Zhang
Somer Greene
Duc Dung Nguyen
Paula Kurylowicz
Sarmishta Velury
Sebastian Krause
Cassidy Hardin
Lucas Dixon
Lili Janzer
Kiam Choo
Ziqiang Feng
Biao Zhang
Achintya Singhal
Tejasi Latkar
Mingyang Zhang
Quoc Le
Elena Allica Abellan
Dayou Du
Dan McKinnon
Natasha Antropova
Tolga Bolukbasi
Orgad Keller
David Reid
Daniel Finchelstein
Maria Abi Raad
Remi Crocker
Peter Hawkins
Robert Dadashi
Colin Gaffney
Sid Lall
Ken Franko
Egor Filonov
Anna Bulanova
Rémi Leblond
Vikas Yadav
Shirley Chung
Harry Askham
Luis C. Cobo
Kelvin Xu
Felix Fischer
Jun Xu
Christina Sorokin
Chris Alberti
Chu-Cheng Lin
Colin Evans
Hao Zhou
Alek Dimitriev
Hannah Forbes
Dylan Banarse
Zora Tung
Jeremiah Liu
Mark Omernick
Colton Bishop
Chintu Kumar
Rachel Sterneck
Ryan Foley
Rohan Jain
Swaroop Mishra
Jiawei Xia
Taylor Bos
Geoffrey Cideron
Ehsan Amid
Francesco Piccinno
Xingyu Wang
Praseem Banzal
Petru Gurita
Hila Noga
Premal Shah
Daniel J. Mankowitz
Alex Polozov
Nate Kushman
Victoria Krakovna
Sasha Brown
MohammadHossein Bateni
Dennis Duan
Vlad Firoiu
Meghana Thotakuri
Tom Natan
Anhad Mohananey
Matthieu Geist
Sidharth Mudgal
Sertan Girgin
Hui Li
Jiayu Ye
Ofir Roval
Reiko Tojo
Michael Kwong
James Lee-Thorp
Christopher Yew
Quan Yuan
Sumit Bagri
Danila Sinopalnikov
Sabela Ramos
John Mellor
Abhishek Sharma
Aliaksei Severyn
Jonathan Lai
Kathy Wu
Heng-Tze Cheng
David Miller
Nicolas Sonnerat
Denis Vnukov
Rory Greig
Jennifer Beattie
Emily Caveness
Libin Bai
Julian Eisenschlos
Alex Korchemniy
Tomy Tsai
Mimi Jasarevic
Weize Kong
Phuong Dao
Zeyu Zheng
Frederick Liu
Fan Yang
Rui Zhu
Mark Geller
Tian Huey Teh
Jason Sanmiya
Evgeny Gladchenko
Nejc Trdin
Andrei Sozanschi
Daniel Toyama
Evan Rosen
Sasan Tavakkol
Linting Xue
Chen Elkind
Oliver Woodman
John Carpenter
George Papamakarios
Rupert Kemp
Sushant Kafle
Tanya Grunina
Rishika Sinha
Alice Talbert
Abhimanyu Goyal
Diane Wu
Denese Owusu-Afriyie
Cosmo Du
Chloe Thornton
Jordi Pont-Tuset
Pradyumna Narayana
Jing Li
Saaber Fatehi
John Wieting
Omar Ajmeri
Benigno Uria
Tao Zhu
Yeongil Ko
Laura Knight
Amélie Héliou
Ning Niu
Shane Gu
Chenxi Pang
Dustin Tran
Yeqing Li
Nir Levine
Ariel Stolovich
Norbert Kalb
Rebeca Santamaria-Fernandez
Sonam Goenka
Wenny Yustalim
Robin Strudel
Ali Elqursh
Balaji Lakshminarayanan
Charlie Deck
Shyam Upadhyay
Hyo Lee
Mike Dusenberry
Zonglin Li
Xuezhi Wang
Kyle Levin
Raphael Hoffmann
Dan Holtmann-Rice
Olivier Bachem
Summer Yue
Sho Arora
Eric Malmi
Daniil Mirylenka
Qijun Tan
Christy Koh
Soheil Hassas Yeganeh
Siim Põder
Steven Zheng
Francesco Pongetti
Mukarram Tariq
Yanhua Sun
Lucian Ionita
Mojtaba Seyedhosseini
Pouya Tafti
Ragha Kotikalapudi
Zhiyu Liu
Anmol Gulati
Jasmine Liu
Xinyu Ye
Bart Chrzaszcz
Lily Wang
Nikhil Sethi
Tianrun Li
Ben Brown
Shreya Singh
Wei Fan
Aaron Parisi
Joe Stanton
Chenkai Kuang
Vinod Koverkathu
Christopher A. Choquette-Choo
Yunjie Li
TJ Lu
Abe Ittycheriah
Prakash Shroff
Pei Sun
Mani Varadarajan
Sanaz Bahargam
Rob Willoughby
David Gaddy
Ishita Dasgupta
Guillaume Desjardins
Marco Cornero
Brona Robenek
Bhavishya Mittal
Ben Albrecht
Ashish Shenoy
Fedor Moiseev
Henrik Jacobsson
Alireza Ghaffarkhah
Morgane Rivière
Alanna Walton
Clément Crepy
Alicia Parrish
Yuan Liu
Zongwei Zhou
Clement Farabet
Carey Radebaugh
Praveen Srinivasan
Claudia van der Salm
Andreas Fidjeland
Salvatore Scellato
Eri Latorre-Chimoto
Hanna Klimczak-Plucińska
David Bridson
Dario de Cesare
Tom Hudson
Piermaria Mendolicchio
Lexi Walker
Alex Morris
Ivo Penchev
Matthew Mauger
Alexey Guseynov
Alison Reid
Seth Odoom
Lucia Loher
Victor Cotruta
Madhavi Yenugula
Dominik Grewe
Anastasia Petrushkina
Tom Duerig
Antonio Sanchez
Steve Yadlowsky
Amy Shen
Amir Globerson
Adam Kurzrok
Lynette Webb
Sahil Dua
Dong Li
Preethi Lahoti
Surya Bhupatiraju
Dan Hurt
Haroon Qureshi
Ananth Agarwal
Tomer Shani
Matan Eyal
Anuj Khare
Shreyas Rammohan Belle
Lei Wang
Chetan Tekur
Mihir Sanjay Kale
Jinliang Wei
Ruoxin Sang
Brennan Saeta
Tyler Liechty
Yi Sun
Yao Zhao
Stephan Lee
Pandu Nayak
Doug Fritz
Manish Reddy Vuyyuru
John Aslanides
Nidhi Vyas
Martin Wicke
Xiao Ma
Taylan Bilal
Evgenii Eltyshev
Daniel Balle
Nina Martin
Hardie Cate
James Manyika
Keyvan Amiri
Yelin Kim
Xi Xiong
Kai Kang
Florian Luisier
Nilesh Tripuraneni
David Madras
Mandy Guo
Austin Waters
Oliver Wang
Joshua Ainslie
Jason Baldridge
Han Zhang
Garima Pruthi
Jakob Bauer
Feng Yang
Riham Mansour
Jason Gelman
Yang Xu
George Polovets
Ji Liu
Honglong Cai
Warren Chen
XiangHai Sheng
Emily Xue
Sherjil Ozair
Adams Yu
Christof Angermueller
Xiaowei Li
Weiren Wang
Julia Wiesinger
Emmanouil Koukoumidis
Yuan Tian
Anand Iyer
Madhu Gurumurthy
Mark Goldenson
Parashar Shah
MK Blake
Hongkun Yu
Anthony Urbanowicz
Jennimaria Palomaki
Chrisantha Fernando
Kevin Brooks
Ken Durden
Harsh Mehta
Nikola Momchev
Elahe Rahimtoroghi
Maria Georgaki
Amit Raul
Sebastian Ruder
Morgan Redshaw
Jinhyuk Lee
Komal Jalan
Dinghua Li
Ginger Perng
Blake Hechtman
Parker Schuh
Milad Nasr
Mia Chen
Kieran Milan
Vladimir Mikulik
Trevor Strohman
Juliana Franco
Tim Green
"""

In [142]:
program_leads_raw = """
Demis Hassabis
Koray Kavukcuoglu
"""

In [143]:
overall_technical_leads_raw = """
Jeffrey Dean
Oriol Vinyals
"""

### Estimates

In [144]:
def raw_contributors_to_list(raw_contributors):
  lst = raw_contributors.split('\n')
  lst = lst[1:-1]  # remove empty lines at start and end
  return lst

In [145]:
contributors_dict = {
  'Leads': raw_contributors_to_list(leads_raw),
  'Core contributors': raw_contributors_to_list(core_contributors_raw),
  'Contributors': raw_contributors_to_list(contributors_raw),
  'Program Leads': raw_contributors_to_list(program_leads_raw),
  'Overall Technical Leads': raw_contributors_to_list(overall_technical_leads_raw),
}

In [146]:
num_total_contributors = 0
for role, contributors in contributors_dict.items():
  num_total_contributors += len(contributors)
  print(f"{role}: {len(contributors)}")
print(f"Total: {num_total_contributors}")

Leads: 36
Core contributors: 418
Contributors: 483
Program Leads: 2
Overall Technical Leads: 2
Total: 941


Top engineers / leads could be making \$3M (may not be credible):

http://www.itworld.com/article/2693353/man-or-myth-the-3-million-google-engineer.html

“Commenters on the story on BI and Reddit
have posited that the \$3 million Google engineer in question is Jeff Dean, a Senior Fellow in the Knowledge group who, SFGate.com wrote, is "widely credited by Googlers for the blazing speed of the company's famed search engine." It would make a lot of sense that, if any engineer at Google is making \$3 million - and is a 10x engineer - it would be Jeff Dean, who is clearly exceptional.”

Unverified Redditor claims Jeff Dean would make 8 figures: https://www.reddit.com/r/programming/comments/xtjiwd/comment/iqqqy2u/?utm_source=share&utm_medium=web3x&utm_name=web3xcss&utm_term=1&utm_content=share_button

In [147]:
# The default compensation distribution can't model the extreme compensation values well
# So we will manually input some numbers
np.percentile(compensation_test_distribution, 99.99)

2708181.854920513

In [148]:
np.percentile(salary_test_distribution, 99.99)

380534.5979053316

In [149]:
# Estimates per role

"""
FTEs are rough estimates based on the description of each role.
We need to consider not just FTE while they worked on the project,
but what time period they worked on the project for.
E.g. some people might just be running evaluations and were not involved
in the preliminary experiments.
Also, most contributors are probably split between multiple projects, especially
the most senior leadership.
"""

dict_costs_role = {
    'Leads': {
        'salary_low' : np.percentile(salary_test_distribution, 75),
        'salary_high': np.percentile(salary_test_distribution, 99),
        'equity_low': np.percentile(equity_test_distribution, 75),
        'equity_high': np.percentile(equity_test_distribution, 99),
        'FTE_low' : 0.3,  # they had one or more other projects
        'FTE_high' : 0.9,  # this was their only assigned project, but admin and other duties took up some time
    },
    'Core contributors': {
        'salary_low' : salary_low,
        'salary_high': salary_high,
        'equity_low': equity_low,
        'equity_high': equity_high,
        'FTE_low' : 0.01,  # they made an important contribution but only for a few days' work (per year)
        'FTE_high' : 0.8,  # this was their main project and they worked on it throughout
    },
    # We were informed that contributors barely worked on the project
    # This plausibly corresponds to somewhere between a few hours and a couple weeks
    # i.e. ~0.1% to 5% of a year-long project
    'Contributors': {
        'salary_low' : salary_low,
        'salary_high': salary_high,
        'equity_low': equity_low,
        'equity_high': equity_high,
        'FTE_low' : 0.001,
        'FTE_high' : 0.05,
    },
    # Program and technical leads are very senior and likely split their time between several projects and management duties
    'Program Leads': {
        'salary_low' : 5e5,
        'salary_high': 1e6,
        'equity_low': 1e6,
        'equity_high': 10e6,
        'FTE_low' : 0.05,
        'FTE_high' : 0.2
    },
    'Overall Technical Leads': {
        'salary_low' : 5e5,
        'salary_high': 1e6,
        'equity_low': 1e6,
        'equity_high': 10e6,
        'FTE_low' : 0.05,
        'FTE_high' : 0.2
    },
}

# Compute salaries per role
total_personnel_cost = np.zeros(N)
for role in contributors_dict:
    n_contributors = len(contributors_dict[role])

    compensation = lognorm_from_90_ci(dict_costs_role[role]['salary_low'], dict_costs_role[role]['salary_high'], (N, n_contributors))
    overhead = lognorm_from_90_ci(compensation_overhead_low, compensation_overhead_high, (N, n_contributors))
    compensation *= overhead

    if INCLUDE_EQUITY:
        equity = lognorm_from_90_ci(dict_costs_role[role]['equity_low'], dict_costs_role[role]['equity_high'], (N, n_contributors))
        compensation += equity

    ftes = lognorm_from_90_ci(dict_costs_role[role]['FTE_low'], dict_costs_role[role]['FTE_high'], (N, n_contributors))
    compensation *= ftes
    role_personnel_cost = np.sum(compensation, axis=1)
    total_personnel_cost += role_personnel_cost

# Adjust for project duration
total_personnel_cost *= project_duration
personnel_cost = total_personnel_cost
print_median_and_ci(personnel_cost)

Median: 6.1e+07 [90% CI: 3.5e+07, 1e+08]


## Overall

In [150]:
cost_data['Gemini 1.0 Ultra'] = {
    'AI accelerator chip cost': get_hardware_cost_dist('Gemini 1.0 Ultra', 'AI accelerator chip cost'),
    'Other server components cost': get_hardware_cost_dist('Gemini 1.0 Ultra', 'Other server components cost'),
    'Cluster-level interconnect cost': get_hardware_cost_dist('Gemini 1.0 Ultra', 'Cluster-level interconnect cost'),
    # 'AI accelerator chip cost': ai_accelerator_chip_cost,
    # 'Other server components cost': other_server_components_cost,
    # 'Cluster-level interconnect cost': cluster_level_interconnect_cost,
    'Energy cost': energy_cost,
    rnd_staff_cost_name: personnel_cost,
}

# Inflation adjustment

In [151]:
def adjust_value_for_inflation(value, path_to_price_index, publication_date, to_year_month):
    price_index = pd.read_csv(path_to_price_index)
    from_date = str(publication_date)
    from_year_month = from_date.rsplit('-', maxsplit=1)[0] + '-01'
    from_price_index = price_index[price_index['DATE'] == from_year_month]['PCU518210518210'].values[0]
    to_price_index = price_index[price_index['DATE'] == to_year_month]['PCU518210518210'].values[0]
    adjust_factor = to_price_index / from_price_index
    return value * adjust_factor

In [152]:
for model in cost_data:
    for cost_component in cost_data[model]:
        # Adjust for inflation
        publication_date = frontier_pcd_df.loc[frontier_pcd_df['System'] == model, 'Publication date'].values[0]
        value = cost_data[model][cost_component]
        cost_data[model][cost_component] = adjust_value_for_inflation(value, 'data/PCU518210518210.csv', publication_date, '2023-12-01')

# Export data

In [153]:
rows = []
for model in models:
    component_costs = {'System': model}
    for component in cost_component_names:
        y = np.median(cost_data[model][component])
        y_low = np.percentile(cost_data[model][component], 2.5)
        y_high = np.percentile(cost_data[model][component], 97.5)
        component_costs.update({
            component: y,
            component + ' (2.5th percentile)': y_low,
            component + ' (97.5th percentile)': y_high
        })
    rows.append(component_costs)
dev_cost_df = pd.DataFrame(rows)
dev_cost_df

Unnamed: 0,System,AI accelerator chip cost,AI accelerator chip cost (2.5th percentile),AI accelerator chip cost (97.5th percentile),Other server components cost,Other server components cost (2.5th percentile),Other server components cost (97.5th percentile),Cluster-level interconnect cost,Cluster-level interconnect cost (2.5th percentile),Cluster-level interconnect cost (97.5th percentile),Energy cost,Energy cost (2.5th percentile),Energy cost (97.5th percentile)
0,GPT-3 175B (davinci),2047648.0,1006119.0,4203899.0,1409573.0,686472.1,2911509.0,813069.0,394568.2,1679117.0,198105.6,77429.78,512640.6
1,OPT-175B,730191.2,357712.6,1517942.0,486026.4,236868.3,1008629.0,287324.6,140254.8,582671.6,43797.38,17118.25,113335.1
2,GPT-4,39405970.0,19258830.0,80190210.0,26153420.0,12573990.0,54185760.0,15351130.0,7531177.0,31678750.0,7823102.0,2994667.0,20208520.0
3,Gemini 1.0 Ultra,29106340.0,14342350.0,59717910.0,18612000.0,9203103.0,38301170.0,11231330.0,5489078.0,23368030.0,5637958.0,1337121.0,23681780.0


In [154]:
dev_cost_df.to_csv(results_dir + 'cost_dataset.csv', index=False)

# Plots

In [155]:
cost_component_names.insert(0, rnd_staff_cost_name)

In [156]:
# Create a grouped bar chart with error bars
fig = go.Figure()

# Group 1
for component in cost_component_names:
    y_values = [np.median(cost_data[model][component]) for model in models]
    y_lows = [np.percentile(cost_data[model][component], 2.5) for model in models]
    y_highs = [np.percentile(cost_data[model][component], 97.5) for model in models]
    print(component)
    print(y_values, y_lows, y_highs)
    fig.add_trace(go.Bar(
        x=models,
        y=y_values,
        error_y=dict(
            type='data',
            symmetric=False,  # Use asymmetric error bars
            array=[y_highs[i] - y_values[i] for i in range(len(y_values))],  # Upper bounds of the error bars
            arrayminus=[y_values[i] - y_lows[i] for i in range(len(y_values))],  # Lower bounds
            visible=True  # Make error bars visible
        ),
        name=component
    ))

# log y axis
fig.update_yaxes(type="log")

# y limits
fig.update_yaxes(range=[4, 8.5])

# Customize the layout
fig.update_layout(
    width=800,
    height=600,
    barmode='group',  # Group bars together
    title='Amortized hardware CapEx, energy, and R&D staff costs for training and experiments',
    xaxis=dict(
        title='Model'
    ),
    yaxis=dict(
        title='Cost (2023 USD, log scale)'
    ),
)

save_plot(fig, results_dir, 'total_amortized_model_development_costs')

# Show the plot
fig.show()


R&D staff cost (including equity)
[2119505.0343017457, 1198573.7898185719, 35995546.401590794, 60821885.44408378] [763591.4125779939, 780488.2392765866, 26278403.03055827, 31551016.503551062] [4092861.2992937034, 1947891.6252119814, 46552804.088732935, 116173325.97185512]
AI accelerator chip cost
[2047647.7961608092, 730191.1761124248, 39405972.39996795, 29106337.34917251] [1006118.9517891147, 357712.59218234406, 19258825.969149232, 14342346.016000899] [4203899.347474982, 1517942.4340617347, 80190207.07462236, 59717906.125355]
Other server components cost
[1409573.156760231, 486026.38653410284, 26153423.80866784, 18611995.582405828] [686472.1498954927, 236868.26783177312, 12573987.002455369, 9203103.307842841] [2911509.22900503, 1008628.851344573, 54185757.126703806, 38301168.84054058]
Cluster-level interconnect cost
[813069.0474954192, 287324.6023918593, 15351130.313273245, 11231327.318395447] [394568.2067614988, 140254.83481252994, 7531176.959507559, 5489077.80640556] [1679117.428928

In [157]:
total_costs = {}
for model in models:
    total_costs[model] = sum(np.median(cost_data[model][component]) for component in cost_component_names)
total_costs

{'GPT-3 175B (davinci)': 6587900.611260872,
 'OPT-175B': 2745913.3358023087,
 'GPT-4': 124729175.03755166,
 'Gemini 1.0 Ultra': 125409503.30501996}

In [158]:
staff_cost_ratio = {}
for model in models:
    compute_cost = sum(np.median(cost_data[model][component]) for component in cost_component_names if component != rnd_staff_cost_name)
    staff_cost = np.median(cost_data[model][rnd_staff_cost_name])
    staff_cost_ratio[model] = staff_cost / compute_cost
staff_cost_ratio

{'GPT-3 175B (davinci)': 0.474332452845218,
 'OPT-175B': 0.7746029583032252,
 'GPT-4': 0.40565845164820596,
 'Gemini 1.0 Ultra': 0.941695753124687}

In [159]:
for model in models:
    print(model)
    for percentile in [2.5, 50, 97.5]:
        print(f'{percentile}th percentile')
        print(sum(np.percentile(cost_data[model][component], percentile) for component in cost_component_names[1:]))

GPT-3 175B (davinci)
2.5th percentile
2164589.0873949663
50th percentile
4468395.576959127
97.5th percentile
9307166.585196262
OPT-175B
2.5th percentile
751953.9485594342
50th percentile
1547339.5459837366
97.5th percentile
3222577.996826298
GPT-4
2.5th percentile
42358656.99362213
50th percentile
88733628.63596086
97.5th percentile
186263229.6012877
Gemini 1.0 Ultra
2.5th percentile
30371648.3935441
50th percentile
64587617.86093618
97.5th percentile
145068882.2210432


In [160]:
len(price_df['Hardware model'].unique())

24

In [161]:
# Uncertainty in the proportion of R&D staff cost
total_costs_dist = {model: sum(cost_data[model][component] for component in cost_component_names) for model in models}
component = rnd_staff_cost_name
low_proportions = {model: np.percentile(cost_data[model][component] / total_costs_dist[model], 2.5) * 100 for model in models}
median_proportions = {model: np.percentile(cost_data[model][component] / total_costs_dist[model], 50) * 100 for model in models}
high_proportions = {model: np.percentile(cost_data[model][component] / total_costs_dist[model], 97.5) * 100 for model in models}
print(f'2.5th percentile {rnd_staff_cost_name} (%):', low_proportions)
print(f'Median {rnd_staff_cost_name} (%):', median_proportions)
print(f'97.5th percentile {rnd_staff_cost_name} (%):', high_proportions)

2.5th percentile R&D staff cost (including equity) (%): {'GPT-3 175B (davinci)': 13.1671943793917, 'OPT-175B': 28.521027649032497, 'GPT-4': 18.717620358974965, 'Gemini 1.0 Ultra': 28.71865348658175}
Median R&D staff cost (including equity) (%): {'GPT-3 175B (davinci)': 30.894167830358544, 'OPT-175B': 42.638058497096566, 'GPT-4': 27.75941168173761, 'Gemini 1.0 Ultra': 46.9098090278183}
97.5th percentile R&D staff cost (including equity) (%): {'GPT-3 175B (davinci)': 49.64356031822493, 'OPT-175B': 58.408554183776985, 'GPT-4': 38.71926355611089, 'Gemini 1.0 Ultra': 65.54935836965737}


In [162]:
# Creating the figure
fig = go.Figure()

for component in cost_component_names:
    proportions = [np.median(cost_data[model][component]) / total_costs[model] * 100 for model in models]
    fig.add_trace(go.Bar(
        name=component,
        x=list(models),
        y=proportions,
        text=[f'{np.around(pp/100, 1) * 100:.0f}%' for pp in proportions],
        textposition='auto',
    ))

# y ticks
fig.update_yaxes(range=[0, 100])

# Update the layout
fig.update_layout(
    barmode='stack',
    title='Proportions of cost for training and experiments',
    xaxis_title='Models',
    yaxis_title='Proportion (%)',
    legend_title='Cost components',
    width=600,
    height=400,
)

# Save plot
save_plot(fig, results_dir, 'cost_proportions_stacked')

# Show the figure
fig.show()

In [163]:
for component in cost_component_names:
    proportions = [np.median(cost_data[model][component]) / total_costs[model] * 100 for model in models]
    print(component)
    print(models)
    print(proportions)

R&D staff cost (including equity)
['GPT-3 175B (davinci)', 'OPT-175B', 'GPT-4', 'Gemini 1.0 Ultra']
[32.172692931627104, 43.649367013557594, 28.858962941712534, 48.49862557557005]
AI accelerator chip cost
['GPT-3 175B (davinci)', 'OPT-175B', 'GPT-4', 'Gemini 1.0 Ultra']
[31.081947299883527, 26.59192359030062, 31.593227797830114, 23.20903646223709]
Other server components cost
['GPT-3 175B (davinci)', 'OPT-175B', 'GPT-4', 'Gemini 1.0 Ultra']
[21.39639378212249, 17.699990025070996, 20.968168674886165, 14.840977032767514]
Cluster-level interconnect cost
['GPT-3 175B (davinci)', 'OPT-175B', 'GPT-4', 'Gemini 1.0 Ultra']
[12.341853580875503, 10.463717068037326, 12.307569827709957, 8.955722670457202]
Energy cost
['GPT-3 175B (davinci)', 'OPT-175B', 'GPT-4', 'Gemini 1.0 Ultra']
[3.00711240549136, 1.5950023030334544, 6.272070757861223, 4.495638258968141]


In [164]:
for model in models:
    proportions = [np.median(cost_data[model][component]) / total_costs[model] * 100 
        for component in ['AI accelerator chip cost', 'Other server components cost', 'Cluster-level interconnect cost']]
    print(sum(proportions))

64.82019466288152
54.75563068340894
64.86896630042624
47.00573616546181
