# read

> Read and query chronicle parquet files.

In most cases you should prefer the scan interface over the read interface.  Refer to `scan_chronicle()` for more information.

In [None]:
#| default_exp read

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import polars as pl
import pyarrow.parquet as pq
# import pyarrow.dataset as ds
# from s3fs import S3FileSystem
# import pandas as pd
# from fastcore.basics import patch
# import re

## Using the read interface

In [None]:
#| hide
#| export
def read_chronicle(
        path: str, # Path to dataset,
        type: str = "", # must be `metrics` or `logs`
        date:str = "", # date in format `YYYY/MM/DD` 
        version: str = "v1" # currently must be `v1`
    ) -> pl.DataFrame:
    "Read a chronicle parquet file into a polars dataframe."
    path = f"{path}/{version}/{type}/{date}"
    return pl.from_arrow(pq.read_table(path))



In [None]:
#| export
def read_chronicle_metrics(
        path: str, # Path to dataset,
        date:str = "", # date in format `YYYY/MM/DD` 
        version: str = "v1" # currently must be `v1`
) -> pl.DataFrame:
    "Read a chronicle metrics parquet file into a polars dataframe."
    return read_chronicle(path, "metrics", date, version = version) 

def read_chronicle_logs(
        path: str, # Path to dataset,
        date:str = "", # date in format `YYYY/MM/DD` 
        version: str = "v1" # currently must be `v1`
) -> pl.DataFrame:
    "Read a chronicle logs parquet file into a polars dataframe."
    return read_chronicle(path, "logs", date, version) 


In [None]:
z = read_chronicle_metrics("./data", "2023/04/03")
assert type(z) == pl.DataFrame
assert z.columns == [
    'service',
    'host',
    'os',
    'attributes',
    'name',
    'description',
    'unit',
    'type',
    'timestamp',
    'value_float',
    'value_int',
    'value_uint',
    'value_column'
]

In [None]:

z = read_chronicle_logs("./data", "2023/04/03")
assert type(z) == pl.dataframe.frame.DataFrame
assert z.columns == [
    'service', 
    'host', 
    'os', 
    'attributes', 
    'body', 
    'timestamp'
]