# Benchmarking data

This notebook generates the data used in the benchmarking suite. 

In particular, the `_metadata` file can take a little while to generate, so we don't want to do it for every benchmarking run.

In [None]:
import numpy as np

from hats.catalog import PartitionInfo, TableProperties
from hats.pixel_math import HealpixPixel
from pathlib import Path

## Large catalog

This contains 196_607 partitions at order 7. This might seem like a silly number, and I guess it is, but it keeps the `_metadata` file under the github size limit.

In [None]:
pixel_list = [HealpixPixel(7, pixel) for pixel in np.arange(196_608)]
partition_info = PartitionInfo.from_healpix(pixel_list)

catalog_base_dir = Path("large_catalog")
catalog_base_dir.mkdir(exist_ok=True)

partition_info.write_to_file(catalog_base_dir / "partition_info.csv")
partition_info.write_to_metadata_files(catalog_base_dir)

table_properties = TableProperties(
    catalog_name="large_catalog",
    catalog_type="object",
    total_rows=196_608,
    ra_column="",
    dec_column="",
)
table_properties.to_properties_file(catalog_base_dir)

## Midsize catalog

This contains 30_000 partitions at order 6.

In [None]:
pixel_list = [HealpixPixel(6, pixel) for pixel in np.arange(30_000)]
partition_info = PartitionInfo.from_healpix(pixel_list)

catalog_base_dir = Path("midsize_catalog")
catalog_base_dir.mkdir(exist_ok=True)

partition_info.write_to_file(catalog_base_dir / "partition_info.csv")
partition_info.write_to_metadata_files(catalog_base_dir)
table_properties = TableProperties(
    catalog_name="midsize_catalog",
    catalog_type="object",
    total_rows=30_000,
    ra_column="",
    dec_column="",
)
table_properties.to_properties_file(catalog_base_dir)