# Comparing prod and dev

This notebook compares results between prod and dev titiler deployments. Running end-to-end benchmarks is documented in [https://github.com/developmentseed/tile-benchmarking/tree/main/03-e2e/README.md](https://github.com/developmentseed/tile-benchmarking/tree/main/03-e2e/README.md).

In [2]:
# Import libraries
import os
import pandas as pd
import hvplot.pandas
import holoviews as hv
pd.options.plotting.backend = 'holoviews'
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append('../../../tile-benchmarking/helpers/')
# import eodc_hub_role
# credentials = eodc_hub_role.fetch_and_set_credentials()

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [4]:
%%capture
# identify the outputs from the run https://github.com/developmentseed/tile-benchmarking/actions/runs
# download results from s3 or copy them from the tile-benchmarking repo
!aws s3 cp --recursive s3://nasa-eodc-data-store/tile-benchmarking-results/2023-10-17_19-17-24/ downloaded_prod_results/
!aws s3 cp --recursive s3://nasa-eodc-data-store/tile-benchmarking-results/2023-10-17_19-17-09/ downloaded_dev_results/


Parse and merge results into a single dataframe.

In [5]:
results = { 'prod': {}, 'dev': {} }
for env in results.keys():
    # Specify the directory path and the suffix
    directory_path = f"downloaded_{env}_results/"
    suffix = "_urls_stats.csv"  # For example, if you're interested in text files

    # List all files in the directory
    all_files = os.listdir(directory_path)

    # Filter the files to only include those that end with the specified suffix
    files_with_suffix = [f"{directory_path}{f}" for f in all_files if f.endswith(suffix)]

    dfs = []
    for file in files_with_suffix:
        df = pd.read_csv(file)
        df['file'] = file
        dfs.append(df)

    merged_df = pd.concat(dfs)
    merged_df['dataset'] = [file.split('/')[1].replace('_urls_stats.csv', '') for file in merged_df['file']]
    results[env]['all'] = merged_df
    # The "Aggregated" results represent aggregations across tile endpoints. 
    results[env][f'Aggregated {env}'] = merged_df[merged_df['Name'] == 'Aggregated']

In [6]:
prod_df = results['prod'][f'Aggregated prod']
dev_df = results['dev'][f'Aggregated dev']
merged_df = pd.merge(prod_df, dev_df, on='dataset', suffixes=(' Prod', ' Dev'))
merged_df[['Median Response Time Prod', 'Median Response Time Dev', 'dataset']]

Unnamed: 0,Median Response Time Prod,Median Response Time Dev,dataset
0,540.0,500.0,pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc
1,7700.0,7600.0,prod-giovanni-cache-GPM_3IMERGHH_06_precipitat...
2,510.0,480.0,aws-noaa-oisst-feedstock_reference
3,620.0,590.0,cmip6-pds_GISS-E2-1-G_historical_tas
4,560.0,520.0,power_901_monthly_meteorology_utc.zarr
