In [16]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
import numpy as np
import os
import pandas as pd
import plotly.express as px

from plotting import *
from regression import *
from utils import *

In [18]:
# Load two results CSVs
results_name1 = 'hardware-capex-energy-top_n=10-original'
results_name2 = 'old-May-26/hardware-capex-energy-top_n=10-original'
results_dir1 = f'results/{results_name1}/'
results_dir2 = f'results/{results_name2}/'
results_dir = f'results/{results_name1}-vs-{results_name2}/'
os.makedirs(results_dir, exist_ok=True)

In [19]:
cost_df1 = pd.read_csv(results_dir1 + 'cost_dataset.csv')
cost_df2 = pd.read_csv(results_dir2 + 'cost_dataset.csv')

In [20]:
col = 'Cost (inflation-adjusted)'

In [21]:
# Relative error stats
ratios = cost_df2[col] / cost_df1[col]
print(f'Ratio stats: {results_name2} / {results_name1}')
print_median_and_ci(ratios.dropna(), ci=[2.5, 97.5])
print(f'Mean: {ratios.mean():.2g}')

Ratio stats: cloud-top_n=10-original / hardware-capex-energy-top_n=10-original
Median: 2.2 [95% CI: 0.95, 3.2]
Mean: 2.2


In [22]:
len(ratios.dropna())

42

In [23]:
cost_df1[cost_df1['System'] == 'Inflection-2'][col] / cost_df2[cost_df2['System'] == 'Inflection-2'][col]

1    1.058255
Name: Cost (inflation-adjusted), dtype: float64

In [24]:
cost_df1.iloc[ratios.argmin()][col], cost_df2.iloc[ratios.argmin()][col]

(769668.1657085726, 324544.81231796124)

In [25]:
cost_df1.iloc[ratios.argmax()][col], cost_df2.iloc[ratios.argmax()][col]

(404710.7316718201, 1319585.503057254)

# Difference in regression

In [26]:
cost_df1['Publication date (float)'] = datetime_to_float_year(pd.to_datetime(cost_df1['Publication date']))
cost_df2['Publication date (float)'] = datetime_to_float_year(pd.to_datetime(cost_df2['Publication date']))

In [27]:
# Compare slopes
regression_slope_t_test(cost_df1, cost_df2, ['Publication date (float)'], col, logy=True)

Slope 1: 0.38 (SE: 0.05)
Slope 2: 0.39 (SE: 0.04)
Test statistic: -0.24
p-value: 0.81


(-0.24496676549606267, 0.8070725967080206)

In [28]:
# Compare slopes and intercepts
chow_test(cost_df1, cost_df2, ['Publication date (float)'], col, logy=True)

Chow Test F-statistic: 2.6364809222282624
p-value: 0.07746444962831234


(2.6364809222282624, 0.07746444962831234)

# Plots

In [29]:
# Add a new column to differentiate the DataFrames
cost_df1['Source'] = results_name1
cost_df2['Source'] = results_name2

# Combine the DataFrames
combined_cost_df = pd.concat([cost_df1, cost_df2])

In [30]:
fig = px.scatter(
    combined_cost_df,
    x='Publication date',
    y=col,
    color='Source',
    hover_data=['System'],
    log_y=True,
)
fig.update_traces(textposition='top center')

# legend
fig.update_layout(
    legend_title_text='',
    legend=dict(
        orientation='h',
        yanchor='bottom',
        y=0.05,
        xanchor='right',
        x=0.95,
    ),
)

# axis labels
fig.update_xaxes(title_text='Publication date')
fig.update_yaxes(title_text='Cost (2023 USD)')

# title
fig.update_layout(title_text='')

# update size
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
    title_font=dict(
        size=16,
    )
)

# font size
fig.update_layout(
    font=dict(
        size=14,
    )
)

# axis limits
fig.update_xaxes(range=['2015-01-01', '2025-01-01'])

# margins
fig.update_layout(margin=dict(l=10, r=10, t=40, b=10))

save_plot(fig, results_dir, 'cost_scatter_comparison')

fig.show()