In [None]:
import squigglepy as sq
import numpy as np
import pandas as pd
from squigglepy.numbers import K, M, B

sq.set_seed(42)
np.random.seed(42)
np.seterr(invalid='raise')  # Warn on operations involving NaN
N_SAMPLES = 5000

from chip_estimates_utils import (
    estimate_chip_sales,
    estimate_cumulative_chip_sales,
)

In [2]:
# NVIDIA chip types
CHIP_TYPES = ['A100', 'A800', 'H100/H200', 'H800', 'H20', 'B200', 'B300']

# Hardware share of compute revenue (vs cloud/software)
# This is our main source of revenue uncertainty for NVIDIA
HARDWARE_SHARE = sq.to(0.96, 0.99, credibility=80)

In [3]:
# Load revenue and price data from Google Sheets
revenue_df = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/1Yhu87Rw--9tviAuBwg_luL3OFAFkdHdVfli6tN215Xk/export?format=csv&gid=0"
).set_index('Quarter')

prices_df = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/1Yhu87Rw--9tviAuBwg_luL3OFAFkdHdVfli6tN215Xk/export?format=csv&gid=1819303346"
).set_index('Year')

QUARTERS = revenue_df.index.tolist()

print(f"Loaded {len(QUARTERS)} quarters of data")
print(revenue_df[['Compute revenue']].head())
print()
print(prices_df.head())

Loaded 15 quarters of data
         Compute revenue
Quarter                 
FY23Q1              2.81
FY23Q2              2.86
FY23Q3              2.87
FY23Q4              2.72
FY24Q1              3.35

      A100 low  A100 high  Notes  H100 low  H100 high  Geomean  Notes.1  \
Year                                                                      
2022     10000      15000    NaN     27000      35000  $30,741      NaN   
2023     10000      15000    NaN     27000      35000  $30,741      NaN   
2024     10000      15000    NaN     25000      32000  $28,284      NaN   
2025     10000      15000    NaN     22000      30000  $25,690      NaN   

      B200 low  B200 high Geomean.1  ...  Geomean.2  Notes.3  H20 low  \
Year                                 ...                                
2022       NaN        NaN       NaN  ...        NaN      NaN      NaN   
2023       NaN        NaN       NaN  ...        NaN      NaN      NaN   
2024   33000.0    42000.0   $37,229  ...        NaN   

In [4]:
# ==============================================
# BASE PRICE AND DEFLATION SETUP
# ==============================================

# Map chip types to their column names in the prices CSV
PRICE_COLUMN_MAP = {'H100/H200': 'H100'}

# Fallback prices if not found in CSV
FALLBACK_PRICES = {
    'A100': (10*K, 15*K), 'A800': (10*K, 15*K), 'H100/H200': (20*K, 30*K),
    'H800': (20*K, 30*K), 'H20': (10*K, 15*K), 'B200': (33*K, 42*K), 'B300': (33*K, 42*K)
}

# Build base price distributions (from first available year for each chip)
def get_price_dist_for_year(chip, year):
    """Get price distribution for a chip in a given year."""
    csv_chip_name = PRICE_COLUMN_MAP.get(chip, chip)
    low_col, high_col = f'{csv_chip_name} low', f'{csv_chip_name} high'
    
    if low_col in prices_df.columns and high_col in prices_df.columns:
        if year in prices_df.index:
            low = prices_df.loc[year, low_col]
            high = prices_df.loc[year, high_col]
            if pd.notna(low) and pd.notna(high):
                return sq.to(low, high, credibility=80)
    
    return sq.to(*FALLBACK_PRICES.get(chip, (20*K, 30*K)), credibility=80)

# Find first year each chip has price data
def find_first_year_with_price(chip):
    """Find the first year with price data for a chip."""
    csv_chip_name = PRICE_COLUMN_MAP.get(chip, chip)
    low_col = f'{csv_chip_name} low'
    
    if low_col in prices_df.columns:
        for year in sorted(prices_df.index):
            if pd.notna(prices_df.loc[year, low_col]):
                return year
    return min(prices_df.index)  # fallback to first year

# Build base prices dict
BASE_YEAR = {chip: find_first_year_with_price(chip) for chip in CHIP_TYPES}
BASE_PRICES = {chip: get_price_dist_for_year(chip, BASE_YEAR[chip]) for chip in CHIP_TYPES}

print("Base prices (first year available for each chip):")
for chip in CHIP_TYPES:
    dist = BASE_PRICES[chip]
    print(f"  {chip} ({BASE_YEAR[chip]}): ${dist.x:,.0f} - ${dist.y:,.0f}")

Base prices (first year available for each chip):
  A100 (2022): $10,000 - $15,000
  A800 (2022): $10,000 - $15,000
  H100/H200 (2022): $27,000 - $35,000
  H800 (2023): $25,000 - $35,000
  H20 (2024): $10,000 - $15,000
  B200 (2024): $33,000 - $42,000
  B300 (2025): $38,000 - $49,000


In [5]:
# ==============================================
# DEFLATION FACTORS
# ==============================================

def get_price_bounds(chip, year):
    """Get (low, high) price bounds for a chip in a given year, or None if unavailable."""
    csv_chip_name = PRICE_COLUMN_MAP.get(chip, chip)
    low_col, high_col = f'{csv_chip_name} low', f'{csv_chip_name} high'

    if low_col in prices_df.columns and high_col in prices_df.columns:
        if year in prices_df.index:
            low = prices_df.loc[year, low_col]
            high = prices_df.loc[year, high_col]
            if pd.notna(low) and pd.notna(high):
                return (low, high)
    return None

def get_price_year_for_quarter(quarter):
    """Get the calendar year to use for pricing a quarter."""
    start_date = revenue_df.loc[quarter, 'Start Date']
    return pd.to_datetime(start_date).year

def get_deflation_factor(quarter, chip):
    """Get deflation factor for a chip in a quarter (ratio of current price to base price)."""
    price_year = get_price_year_for_quarter(quarter)
    base_year = BASE_YEAR[chip]

    if price_year <= base_year:
        return 1.0

    base_bounds = get_price_bounds(chip, base_year)
    current_bounds = get_price_bounds(chip, price_year)

    if base_bounds and current_bounds:
        # For lognormal sq.to(low, high), geometric mean = sqrt(low * high)
        return np.sqrt((current_bounds[0] * current_bounds[1]) / (base_bounds[0] * base_bounds[1]))
    return 1.0

# Print deflation factors for reference
print("Deflation factors by year (ratio to base year):")
years = sorted(prices_df.index)
for chip in CHIP_TYPES:
    factors = {}
    for year in years:
        # Find a quarter in this year to test
        for q in QUARTERS:
            if get_price_year_for_quarter(q) == year:
                factors[year] = round(get_deflation_factor(q, chip), 3)
                break
    if factors:
        print(f"  {chip}: {factors}")

Deflation factors by year (ratio to base year):
  A100: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  A800: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  H100/H200: {2022: 1.0, 2023: 1.0, 2024: 0.92, 2025: 0.836}
  H800: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  H20: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 0.931}
  B200: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  B300: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}


In [None]:
# ==============================================
# SAMPLING FUNCTIONS
# ==============================================

def sample_revenue(quarter):
    """Return base revenue for a quarter (no uncertainty applied here)."""
    return revenue_df.loc[quarter, 'Compute revenue'] * B

def sample_shares(quarter):
    """Sample chip shares for a quarter."""
    return {chip: revenue_df.loc[quarter, f'{chip} share'] for chip in CHIP_TYPES}

def sample_base_price(chip):
    """Sample base price for a chip (from its first available year)."""
    return BASE_PRICES[chip] @ 1

def sample_revenue_uncertainty():
    """Sample hardware share (our main source of revenue uncertainty)."""
    return HARDWARE_SHARE @ 1

# Cache price distributions by (chip, year) to avoid recreating them on every sample
PRICE_DIST_CACHE = {}

def sample_price(quarter, chip):
    """Sample price for a chip in a quarter (for uncorrelated model)."""
    year = get_price_year_for_quarter(quarter)
    cache_key = (chip, year)
    if cache_key not in PRICE_DIST_CACHE:
        PRICE_DIST_CACHE[cache_key] = get_price_dist_for_year(chip, year)
    return PRICE_DIST_CACHE[cache_key] @ 1

In [7]:
print(estimate_cumulative_chip_sales.__doc__)


    Run Monte Carlo simulation to estimate cumulative chip volumes with correlated parameters.

    Similar to estimate_chip_sales, but presamples certain parameters to correlate them
    across quarters. Use this when estimating cumulative totals where you want price
    uncertainty (and optionally revenue multiplier) to compound rather than average out.

    Args:
        quarters: list of quarter identifiers (e.g., ['Q1_2023', 'Q2_2023', ...])
        chip_types: list of chip types (e.g., ['alpha', 'beta', 'gamma', ...])
        sample_revenue: fn(quarter) -> float, samples total chip revenue in dollars for a quarter
        sample_shares: fn(quarter) -> dict, samples {chip: share} for a quarter (should sum to 1)
        sample_base_price: fn(chip) -> float, samples the BASE price for a chip type
            (i.e., the price when the chip was first introduced). Called once per chip;
            subsequent quarters use this base price scaled by get_deflation_factor.
        get_defl

In [13]:
# ==============================================
# RUN CORRELATED SIMULATION
# ==============================================

cumulative_results = estimate_cumulative_chip_sales(
    quarters=QUARTERS,
    chip_types=CHIP_TYPES,
    sample_revenue=sample_revenue,
    sample_shares=sample_shares,
    sample_base_price=sample_base_price,
    get_deflation_factor=get_deflation_factor,
    sample_revenue_uncertainty=sample_revenue_uncertainty,
    n_samples=N_SAMPLES
)

print("Simulation complete.")

Simulation complete.


In [14]:
# ==============================================
# CUMULATIVE SUMMARY
# ==============================================

def print_cumulative_summary(cumulative_results, chip_types, title="Cumulative Production"):
    """Print formatted summary of cumulative chip counts with percentiles."""
    print(f"\n{title}")
    print(f"{'Version':<12} {'p5':>12} {'p50':>12} {'p95':>12}")
    print("-" * 51)

    grand_total = None
    for chip in chip_types:
        arr = cumulative_results[chip]
        if arr.sum() > 0:
            if grand_total is None:
                grand_total = np.zeros_like(arr)
            grand_total += arr
            print(f"{chip:<12} {int(np.percentile(arr, 5)):>12,} {int(np.percentile(arr, 50)):>12,} {int(np.percentile(arr, 95)):>12,}")

    if grand_total is not None:
        print("-" * 51)
        print(f"{'TOTAL':<12} {int(np.percentile(grand_total, 5)):>12,} {int(np.percentile(grand_total, 50)):>12,} {int(np.percentile(grand_total, 95)):>12,}")

print_cumulative_summary(cumulative_results, CHIP_TYPES, "Cumulative Nvidia Chip Sales (Correlated Model)")


Cumulative Nvidia Chip Sales (Correlated Model)
Version                p5          p50          p95
---------------------------------------------------
A100              687,284      896,350    1,155,578
A800               92,971      120,764      156,666
H100/H200       3,630,250    4,315,035    5,078,479
H800               93,368      116,313      144,810
H20             1,148,323    1,497,729    1,923,943
B200            1,366,105    1,597,211    1,876,533
B300              789,405      936,230    1,109,063
---------------------------------------------------
TOTAL           8,642,708    9,521,119   10,483,723


In [15]:
# ==============================================
# UNCORRELATED MODEL COMPARISON
# ==============================================

# Run uncorrelated simulation (price sampled independently each quarter)
uncorrelated = estimate_chip_sales(
    quarters=QUARTERS,
    versions=CHIP_TYPES,
    sample_revenue=sample_revenue,
    sample_shares=sample_shares,
    sample_price=sample_price,
    n_samples=2000 #not 10k 
)


In [None]:
cumulative_uncorrelated = {chip: np.zeros(2000) for chip in CHIP_TYPES}
for quarter in uncorrelated:
    for chip in CHIP_TYPES:
        cumulative_uncorrelated[chip] += np.array(uncorrelated[quarter][chip])

print_cumulative_summary(cumulative_uncorrelated, CHIP_TYPES, "Cumulative Nvidia Chip Sales (uncorrelated model)")


Cumulative Nvidia Chip Sales (uncorrelated model)
Version                p5          p50          p95
---------------------------------------------------
A100              833,622      931,252    1,046,702
A800              109,181      125,327      143,816
H100/H200       4,192,386    4,440,386    4,705,190
H800              104,295      119,984      139,099
H20             1,392,168    1,547,536    1,723,135
B200            1,519,193    1,647,631    1,786,075
B300              853,785      963,290    1,094,977
---------------------------------------------------
TOTAL           9,442,899    9,789,548   10,159,322
