In [28]:
import squigglepy as sq
import numpy as np
import pandas as pd
from squigglepy.numbers import K, M, B

sq.set_seed(42)
np.random.seed(42)
np.seterr(invalid='raise')  # Warn on operations involving NaN
N_SAMPLES = 5000

from chip_estimates_utils import (
    estimate_chip_sales,
    estimate_cumulative_chip_sales,
    aggregate_by_chip_type,
    interpolate_samples_to_calendar_quarters,
    compute_running_totals,
)

In [29]:
# NVIDIA chip types
CHIP_TYPES = ['A100', 'A800', 'H100/H200', 'H800', 'H20', 'B200', 'B300']

# Hardware share of compute revenue (vs cloud/software)
# This is our main source of revenue uncertainty for NVIDIA
HARDWARE_SHARE = sq.to(0.96, 0.99, credibility=80)

In [30]:
# Load revenue and price data from Google Sheets
revenue_df = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/1Yhu87Rw--9tviAuBwg_luL3OFAFkdHdVfli6tN215Xk/export?format=csv&gid=0"
).set_index('Quarter')

prices_df = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/1Yhu87Rw--9tviAuBwg_luL3OFAFkdHdVfli6tN215Xk/export?format=csv&gid=1819303346"
).set_index('Year')

QUARTERS = revenue_df.index.tolist()

print(f"Loaded {len(QUARTERS)} quarters of data")
print(revenue_df[['Compute revenue']].head())
print()
print(prices_df.head())

Loaded 15 quarters of data
         Compute revenue
Quarter                 
FY23Q1              2.81
FY23Q2              2.86
FY23Q3              2.87
FY23Q4              2.72
FY24Q1              3.35

      A100 low  A100 high  Notes  H100 low  H100 high  Geomean  Notes.1  \
Year                                                                      
2022     10000      15000    NaN     27000      35000  $30,741      NaN   
2023     10000      15000    NaN     27000      35000  $30,741      NaN   
2024     10000      15000    NaN     25000      32000  $28,284      NaN   
2025     10000      15000    NaN     22000      30000  $25,690      NaN   

      B200 low  B200 high Geomean.1  ...  Geomean.2  Notes.3  H20 low  \
Year                                 ...                                
2022       NaN        NaN       NaN  ...        NaN      NaN      NaN   
2023       NaN        NaN       NaN  ...        NaN      NaN      NaN   
2024   33000.0    42000.0   $37,229  ...        NaN   

In [31]:
# ==============================================
# BASE PRICE AND DEFLATION SETUP
# ==============================================

# Map chip types to their column names in the prices CSV
PRICE_COLUMN_MAP = {'H100/H200': 'H100'}

# Fallback prices if not found in CSV
FALLBACK_PRICES = {
    'A100': (10*K, 15*K), 'A800': (10*K, 15*K), 'H100/H200': (20*K, 30*K),
    'H800': (20*K, 30*K), 'H20': (10*K, 15*K), 'B200': (33*K, 42*K), 'B300': (33*K, 42*K)
}

# Build base price distributions (from first available year for each chip)
def get_price_dist_for_year(chip, year):
    """Get price distribution for a chip in a given year."""
    csv_chip_name = PRICE_COLUMN_MAP.get(chip, chip)
    low_col, high_col = f'{csv_chip_name} low', f'{csv_chip_name} high'
    
    if low_col in prices_df.columns and high_col in prices_df.columns:
        if year in prices_df.index:
            low = prices_df.loc[year, low_col]
            high = prices_df.loc[year, high_col]
            if pd.notna(low) and pd.notna(high):
                return sq.to(low, high, credibility=80)
    
    return sq.to(*FALLBACK_PRICES.get(chip, (20*K, 30*K)), credibility=80)

# Find first year each chip has price data
def find_first_year_with_price(chip):
    """Find the first year with price data for a chip."""
    csv_chip_name = PRICE_COLUMN_MAP.get(chip, chip)
    low_col = f'{csv_chip_name} low'
    
    if low_col in prices_df.columns:
        for year in sorted(prices_df.index):
            if pd.notna(prices_df.loc[year, low_col]):
                return year
    return min(prices_df.index)  # fallback to first year

# Build base prices dict
BASE_YEAR = {chip: find_first_year_with_price(chip) for chip in CHIP_TYPES}
BASE_PRICES = {chip: get_price_dist_for_year(chip, BASE_YEAR[chip]) for chip in CHIP_TYPES}

print("Base prices (first year available for each chip):")
for chip in CHIP_TYPES:
    dist = BASE_PRICES[chip]
    print(f"  {chip} ({BASE_YEAR[chip]}): ${dist.x:,.0f} - ${dist.y:,.0f}")

Base prices (first year available for each chip):
  A100 (2022): $10,000 - $15,000
  A800 (2022): $10,000 - $15,000
  H100/H200 (2022): $27,000 - $35,000
  H800 (2023): $25,000 - $35,000
  H20 (2024): $10,000 - $15,000
  B200 (2024): $33,000 - $42,000
  B300 (2025): $38,000 - $49,000


In [32]:
# ==============================================
# DEFLATION FACTORS
# ==============================================

def get_price_bounds(chip, year):
    """Get (low, high) price bounds for a chip in a given year, or None if unavailable."""
    csv_chip_name = PRICE_COLUMN_MAP.get(chip, chip)
    low_col, high_col = f'{csv_chip_name} low', f'{csv_chip_name} high'

    if low_col in prices_df.columns and high_col in prices_df.columns:
        if year in prices_df.index:
            low = prices_df.loc[year, low_col]
            high = prices_df.loc[year, high_col]
            if pd.notna(low) and pd.notna(high):
                return (low, high)
    return None

def get_price_year_for_quarter(quarter):
    """Get the calendar year to use for pricing a quarter."""
    start_date = revenue_df.loc[quarter, 'Start Date']
    return pd.to_datetime(start_date).year

def get_deflation_factor(quarter, chip):
    """Get deflation factor for a chip in a quarter (ratio of current price to base price)."""
    price_year = get_price_year_for_quarter(quarter)
    base_year = BASE_YEAR[chip]

    if price_year <= base_year:
        return 1.0

    base_bounds = get_price_bounds(chip, base_year)
    current_bounds = get_price_bounds(chip, price_year)

    if base_bounds and current_bounds:
        # For lognormal sq.to(low, high), geometric mean = sqrt(low * high)
        return np.sqrt((current_bounds[0] * current_bounds[1]) / (base_bounds[0] * base_bounds[1]))
    return 1.0

# Print deflation factors for reference
print("Deflation factors by year (ratio to base year):")
years = sorted(prices_df.index)
for chip in CHIP_TYPES:
    factors = {}
    for year in years:
        # Find a quarter in this year to test
        for q in QUARTERS:
            if get_price_year_for_quarter(q) == year:
                factors[year] = round(get_deflation_factor(q, chip), 3)
                break
    if factors:
        print(f"  {chip}: {factors}")

Deflation factors by year (ratio to base year):
  A100: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  A800: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  H100/H200: {2022: 1.0, 2023: 1.0, 2024: 0.92, 2025: 0.836}
  H800: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  H20: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 0.931}
  B200: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}
  B300: {2022: 1.0, 2023: 1.0, 2024: 1.0, 2025: 1.0}


In [33]:
# ==============================================
# SAMPLING FUNCTIONS
# ==============================================

def sample_revenue(quarter):
    """Return base revenue for a quarter (no uncertainty applied here)."""
    return revenue_df.loc[quarter, 'Compute revenue'] * B

def sample_shares(quarter):
    """Sample chip shares for a quarter."""
    return {chip: revenue_df.loc[quarter, f'{chip} share'] for chip in CHIP_TYPES}

def sample_base_price(chip):
    """Sample base price for a chip (from its first available year)."""
    return BASE_PRICES[chip] @ 1

def sample_revenue_uncertainty():
    """Sample hardware share (our main source of revenue uncertainty)."""
    return HARDWARE_SHARE @ 1

# Cache price distributions by (chip, year) to avoid recreating them on every sample
PRICE_DIST_CACHE = {}

def sample_price(quarter, chip):
    """Sample price for a chip in a quarter (for uncorrelated model)."""
    year = get_price_year_for_quarter(quarter)
    cache_key = (chip, year)
    if cache_key not in PRICE_DIST_CACHE:
        PRICE_DIST_CACHE[cache_key] = get_price_dist_for_year(chip, year)
    return PRICE_DIST_CACHE[cache_key] @ 1

In [34]:
print(estimate_cumulative_chip_sales.__doc__)


    Run Monte Carlo simulation to estimate cumulative chip volumes with correlated parameters.

    Similar to estimate_chip_sales, but presamples certain parameters to correlate them
    across quarters. Use this when estimating cumulative totals where you want price
    uncertainty (and optionally revenue multiplier) to compound rather than average out.

    Args:
        quarters: list of quarter identifiers (e.g., ['Q1_2023', 'Q2_2023', ...])
        chip_types: list of chip types (e.g., ['alpha', 'beta', 'gamma', ...])
        sample_revenue: fn(quarter) -> float, samples total chip revenue in dollars for a quarter
        sample_shares: fn(quarter) -> dict, samples {chip: share} for a quarter (should sum to 1)
        sample_base_price: fn(chip) -> float, samples the BASE price for a chip type
            (i.e., the price when the chip was first introduced). Called once per chip;
            subsequent quarters use this base price scaled by get_deflation_factor.
        get_defl

In [35]:
# ==============================================
# RUN CORRELATED SIMULATION
# ==============================================

quarterly_results = estimate_cumulative_chip_sales(
    quarters=QUARTERS,
    chip_types=CHIP_TYPES,
    sample_revenue=sample_revenue,
    sample_shares=sample_shares,
    sample_base_price=sample_base_price,
    get_deflation_factor=get_deflation_factor,
    sample_revenue_uncertainty=sample_revenue_uncertainty,
    n_samples=N_SAMPLES
)

# Aggregate to get cumulative totals by chip type
cumulative_results = aggregate_by_chip_type(quarterly_results)

print("Simulation complete.")

Simulation complete.


In [36]:
# ==============================================
# CUMULATIVE SUMMARY
# ==============================================

def print_cumulative_summary(cumulative_results, chip_types, title="Cumulative Production"):
    """Print formatted summary of cumulative chip counts with percentiles."""
    print(f"\n{title}")
    print(f"{'Version':<12} {'p5':>12} {'p50':>12} {'p95':>12}")
    print("-" * 51)

    grand_total = None
    for chip in chip_types:
        arr = cumulative_results[chip]
        if arr.sum() > 0:
            if grand_total is None:
                grand_total = np.zeros_like(arr)
            grand_total += arr
            print(f"{chip:<12} {int(np.percentile(arr, 5)):>12,} {int(np.percentile(arr, 50)):>12,} {int(np.percentile(arr, 95)):>12,}")

    if grand_total is not None:
        print("-" * 51)
        print(f"{'TOTAL':<12} {int(np.percentile(grand_total, 5)):>12,} {int(np.percentile(grand_total, 50)):>12,} {int(np.percentile(grand_total, 95)):>12,}")

print_cumulative_summary(cumulative_results, CHIP_TYPES, "Cumulative Nvidia Chip Sales (Correlated Model)")


Cumulative Nvidia Chip Sales (Correlated Model)
Version                p5          p50          p95
---------------------------------------------------
A100              691,651      897,432    1,169,575
A800               92,565      121,321      157,245
H100/H200       3,617,775    4,317,441    5,081,714
H800               93,213      115,405      143,436
H20             1,147,688    1,489,831    1,938,144
B200            1,363,400    1,598,070    1,872,270
B300              794,113      937,545    1,101,525
---------------------------------------------------
TOTAL           8,634,334    9,540,791   10,454,039


In [37]:
# ==============================================
# CUMULATIVE RUNNING TOTALS BY QUARTER
# ==============================================
# Shows how cumulative chip counts build up over time

# Compute running totals - for each quarter, sum all quarters up to and including it
running_totals = {}
cumulative_so_far = {chip: np.zeros(N_SAMPLES) for chip in CHIP_TYPES}

for quarter in QUARTERS:
    for chip in CHIP_TYPES:
        cumulative_so_far[chip] = cumulative_so_far[chip] + quarterly_results[quarter][chip]
    # Store a copy of current cumulative state
    running_totals[quarter] = {chip: cumulative_so_far[chip].copy() for chip in CHIP_TYPES}

# Display running totals
print("Cumulative Running Totals by Quarter")
print(f"{'Quarter':<10} {'Chip':<12} {'p5':>12} {'p50':>12} {'p95':>12}")
print("=" * 60)

for quarter in QUARTERS:
    quarter_has_data = False
    for chip in CHIP_TYPES:
        arr = running_totals[quarter][chip]
        if arr.sum() > 0:
            quarter_has_data = True
            print(f"{quarter:<10} {chip:<12} {int(np.percentile(arr, 5)):>12,} {int(np.percentile(arr, 50)):>12,} {int(np.percentile(arr, 95)):>12,}")
    
    # Grand total for this quarter
    if quarter_has_data:
        total = sum(running_totals[quarter][chip] for chip in CHIP_TYPES)
        print(f"{quarter:<10} {'TOTAL':<12} {int(np.percentile(total, 5)):>12,} {int(np.percentile(total, 50)):>12,} {int(np.percentile(total, 95)):>12,}")
        print("-" * 60)

Cumulative Running Totals by Quarter
Quarter    Chip                   p5          p50          p95
FY23Q1     A100              172,671      224,045      291,986
FY23Q1     TOTAL             172,671      224,045      291,986
------------------------------------------------------------
FY23Q2     A100              348,415      452,076      589,167
FY23Q2     TOTAL             348,415      452,076      589,167
------------------------------------------------------------
FY23Q3     A100              507,138      658,023      857,566
FY23Q3     H100/H200           7,649        9,128       10,744
FY23Q3     TOTAL             516,477      666,922      866,452
------------------------------------------------------------
FY23Q4     A100              558,952      725,252      945,183
FY23Q4     A800               31,478       41,257       53,473
FY23Q4     H100/H200          43,897       52,387       61,661
FY23Q4     TOTAL             653,888      819,346    1,039,826
------------------------

In [38]:
# ==============================================
# CALENDAR QUARTER INTERPOLATION (SAMPLE-BASED)
# ==============================================
# Interpolate per-quarter fiscal results to calendar quarters,
# then compute running totals from those

# Build quarter_dates from revenue_df
quarter_dates = {q: (revenue_df.loc[q, 'Start Date'], revenue_df.loc[q, 'End Date']) 
                 for q in QUARTERS}

# Step 1: Interpolate per-quarter samples to calendar quarters
calendar_quarterly = interpolate_samples_to_calendar_quarters(quarterly_results, quarter_dates)

# Step 2: Compute running totals from calendar quarters
calendar_running_totals = compute_running_totals(calendar_quarterly)

# Display calendar quarter running totals
print("Cumulative Running Totals by Calendar Quarter")
print(f"{'Quarter':<10} {'Chip':<12} {'p5':>12} {'p50':>12} {'p95':>12}")
print("=" * 60)

for cq in calendar_running_totals:
    quarter_has_data = False
    for chip in CHIP_TYPES:
        arr = calendar_running_totals[cq][chip]
        if arr.sum() > 0:
            quarter_has_data = True
            print(f"{cq:<10} {chip:<12} {int(np.percentile(arr, 5)):>12,} {int(np.percentile(arr, 50)):>12,} {int(np.percentile(arr, 95)):>12,}")
    
    if quarter_has_data:
        total = sum(calendar_running_totals[cq][chip] for chip in CHIP_TYPES)
        print(f"{cq:<10} {'TOTAL':<12} {int(np.percentile(total, 5)):>12,} {int(np.percentile(total, 50)):>12,} {int(np.percentile(total, 95)):>12,}")
        print("-" * 60)

Cumulative Running Totals by Calendar Quarter
Quarter    Chip                   p5          p50          p95
Q1 2022    A100              113,849      147,722      192,518
Q1 2022    TOTAL             113,849      147,722      192,518
------------------------------------------------------------
Q2 2022    A100              288,547      374,395      487,930
Q2 2022    TOTAL             288,547      374,395      487,930
------------------------------------------------------------
Q3 2022    A100              454,812      590,128      769,083
Q3 2022    H100/H200           5,127        6,119        7,202
Q3 2022    TOTAL             461,102      596,030      774,933
------------------------------------------------------------
Q4 2022    A100              542,440      703,827      917,261
Q4 2022    A800               21,446       28,109       36,432
Q4 2022    H100/H200          32,346       38,601       45,435
Q4 2022    TOTAL             609,856      770,650      983,857
---------------

In [39]:
# ==============================================
# CHRONOLOGICAL VIEW: FISCAL + CALENDAR INTERLEAVED
# ==============================================
# Pick a subset of quarters around a boundary to verify interpolation

from datetime import datetime

# Select a few interesting quarters to compare (around CY2024)
selected_fiscal = ['FY24Q4', 'FY25Q1', 'FY25Q2', 'FY25Q3', 'FY25Q4']
selected_calendar = ['Q1 2024', 'Q2 2024', 'Q3 2024', 'Q4 2024']

# Build timeline entries
timeline = []

for q in selected_fiscal:
    if q in QUARTERS:
        end_date = pd.to_datetime(revenue_df.loc[q, 'End Date'])
        timeline.append({
            'end_date': end_date,
            'label': q,
            'data': running_totals[q],
            'type': 'FISCAL'
        })

for cq in selected_calendar:
    if cq in calendar_running_totals:
        parts = cq.split()
        q_num, year = int(parts[0][1]), int(parts[1])
        end_dates = {1: (3, 31), 2: (6, 30), 3: (9, 30), 4: (12, 31)}
        end_date = datetime(year, *end_dates[q_num])
        timeline.append({
            'end_date': end_date,
            'label': cq,
            'data': calendar_running_totals[cq],
            'type': 'CALENDAR'
        })

# Sort chronologically
timeline.sort(key=lambda x: x['end_date'])

# Display with full chip breakdown
print("Chronological Comparison: Fiscal vs Calendar Quarter Running Totals")
print("=" * 80)

for entry in timeline:
    print(f"\n{entry['type']}: {entry['label']} (ends {entry['end_date'].strftime('%Y-%m-%d')})")
    print(f"  {'Chip':<12} {'p5':>12} {'p50':>12} {'p95':>12}")
    print(f"  {'-'*50}")
    
    total = np.zeros(N_SAMPLES)
    for chip in CHIP_TYPES:
        arr = entry['data'][chip]
        if arr.sum() > 0:
            total += arr
            print(f"  {chip:<12} {int(np.percentile(arr, 5)):>12,} {int(np.percentile(arr, 50)):>12,} {int(np.percentile(arr, 95)):>12,}")
    
    print(f"  {'-'*50}")
    print(f"  {'TOTAL':<12} {int(np.percentile(total, 5)):>12,} {int(np.percentile(total, 50)):>12,} {int(np.percentile(total, 95)):>12,}")

Chronological Comparison: Fiscal vs Calendar Quarter Running Totals

FISCAL: FY24Q4 (ends 2024-01-28)
  Chip                   p5          p50          p95
  --------------------------------------------------
  A100              691,651      897,432    1,169,575
  A800               92,565      121,321      157,245
  H100/H200         903,119    1,077,779    1,268,568
  H800               93,213      115,405      143,436
  --------------------------------------------------
  TOTAL           1,938,615    2,222,078    2,540,356

CALENDAR: Q1 2024 (ends 2024-03-31)
  Chip                   p5          p50          p95
  --------------------------------------------------
  A100              691,651      897,432    1,169,575
  A800               92,565      121,321      157,245
  H100/H200       1,242,303    1,482,560    1,745,003
  H800               93,213      115,405      143,436
  H20               106,857      138,713      180,454
  --------------------------------------------------
 

In [40]:
# ==============================================
# UNCORRELATED MODEL COMPARISON
# ==============================================

# Run uncorrelated simulation (price sampled independently each quarter)
uncorrelated = estimate_chip_sales(
    quarters=QUARTERS,
    versions=CHIP_TYPES,
    sample_revenue=sample_revenue,
    sample_shares=sample_shares,
    sample_price=sample_price,
    n_samples=1000 # not 10k because it was running slow
)


In [41]:
cumulative_uncorrelated = aggregate_by_chip_type(uncorrelated)

print_cumulative_summary(cumulative_uncorrelated, CHIP_TYPES, "Cumulative Nvidia Chip Sales (uncorrelated model)")


Cumulative Nvidia Chip Sales (uncorrelated model)
Version                p5          p50          p95
---------------------------------------------------
A100              834,473      933,077    1,051,962
A800              109,681      125,274      143,606
H100/H200       4,183,225    4,438,309    4,712,809
H800              104,354      120,163      138,695
H20             1,396,560    1,545,751    1,729,060
B200            1,518,916    1,644,106    1,789,360
B300              857,035      960,717    1,091,332
---------------------------------------------------
TOTAL           9,445,241    9,780,122   10,164,030
