# core

> Read and query chronicle parquet files.

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import polars as pl
import pyarrow.parquet as pq
import pyarrow.dataset as ds
from s3fs import S3FileSystem
import pandas as pd
import plotly.express as px
# import plotly

SyntaxError: invalid syntax (2808678947.py, line 6)

## read_chronicle

In [None]:
#| export
def read_chronicle(
        dataset: str # Path to dataset
    ) -> pl.DataFrame:
    "Read a chronicle parquet file into a polars dataframe."
    return pl.from_arrow(pq.read_table(dataset))

In [None]:
z = read_chronicle("./data/v1/metrics")
assert type(z) == pl.dataframe.frame.DataFrame

z = read_chronicle("./data/v1/logs")
assert type(z) == pl.dataframe.frame.DataFrame

## describe_measures

In [None]:
#| export
def describe_measures(
        x: pl.DataFrame # metrics dataframe
    ) -> pd.DataFrame:
    "Reads metrics dataframe and returns a pandas dataframe with summary of service, name and description of all metrics"
    return (
        x
        .groupby("service", "name")
        .agg(
            pl.col("description").unique(),
            pl.col("value_column").unique(),
        )
        .sort("service", "name")
        .to_pandas()
    )

In [None]:
m = read_chronicle("./data/v1/metrics/")
describe_measures(m)

## get_metric_values

In [None]:
#| export
def get_metric_values(
        x:pl.DataFrame, # metrics dataframe
        name:str, # name of metric to extract
        alias:str # alias to use for new column
    ) -> pd.DataFrame:
    "Extract a single metric from a metrics dataframe"
    return (
        x
        .lazy()
        .filter(pl.col("name") == name)
        .sort(pl.col("host"), pl.col("timestamp"))
        .select([
            "host",
            pl.col("timestamp"),
            pl.col("value_float").alias(alias)
        ])
        .collect()
        .to_pandas()
    )

In [None]:
m = read_chronicle("./data/v1/metrics/")
get_metric_values(m, "cpu_percent", "cpu")

In [None]:
#| export
def plot_metric_values(
        x:pl.DataFrame, # metrics dataframe
        name:str, # name of metric to extract
        alias:str # alias to use for new column
    ) -> px.line: 
    "Plot a selected metric using a Plotly line plot"

    dat = get_metric_values(x, name, alias) 
    fig = px.line(dat, x='timestamp', y=alias, line_group="host", color="host")
    return fig

In [None]:
m = read_chronicle("./data/v1/metrics/")
p = plot_metric_values(m, "cpu_percent", "cpu")
assert str(type(p)) == "<class 'plotly.graph_objs._figure.Figure'>"
# str(type(p))

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()