In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
from windmark.core.structs import LevelSet

In [8]:
levelset = LevelSet("event_type", ["hello", "world", "oh_yeah"])

In [12]:
levelset.mapping

<enum 'LevelEnum'>

In [21]:
from zlib import crc32

import polars as pl

import random
import string


def generate_random_string_list(size, str_length):
    """Generates a list of random strings.

    Args:
        size (int): The number of random strings to generate.
        str_length (int): The length of each string.

    Returns:
        list: A list containing 'size' random strings, each 'str_length' characters long.
    """
    # Create a list of 'size' random strings, each 'str_length' characters long
    random_strings = ["".join(random.choices(string.ascii_letters + string.digits, k=str_length)) for _ in range(size)]
    return random_strings


# Example usage:
strings = generate_random_string_list(100000, 10)  # Generates 10,000 random strings, each 10 characters long.

df = pl.DataFrame({"ham": strings})

df.select(
    pl.col("ham")
    .map_elements(lambda x: float(crc32(str.encode(x)) & 0xFFFFFFFF), return_dtype=pl.Float32)
    .mul(1 / 2**32)
).describe()

statistic,ham
str,f64
"""count""",100000.0
"""null_count""",0.0
"""mean""",0.498969
"""std""",0.288691
"""min""",1.4e-05
"""25%""",0.248285
"""50%""",0.499331
"""75%""",0.748348
"""max""",0.999999


In [4]:
import torch
import torchmetrics


metrics = dict(
    ap=torchmetrics.AveragePrecision,
    f1=torchmetrics.F1Score,
    auc=torchmetrics.AUROC,
    acc=torchmetrics.Accuracy,
)

collection = torchmetrics.MetricCollection(
    {name: metric(task="multiclass", num_classes=2) for name, metric in metrics.items()}
)

collections = torch.nn.ModuleDict({
    "train_collection": collection.clone(),
    "validate_collection": collection.clone(),
    "test_collection": collection.clone(),
})




ModuleDict(
  (train_collection): MetricCollection(
    (acc): MulticlassAccuracy()
    (ap): MulticlassAveragePrecision()
    (auc): MulticlassAUROC()
    (f1): MulticlassF1Score()
  )
  (validate_collection): MetricCollection(
    (acc): MulticlassAccuracy()
    (ap): MulticlassAveragePrecision()
    (auc): MulticlassAUROC()
    (f1): MulticlassF1Score()
  )
  (test_collection): MetricCollection(
    (acc): MulticlassAccuracy()
    (ap): MulticlassAveragePrecision()
    (auc): MulticlassAUROC()
    (f1): MulticlassF1Score()
  )
)


In [5]:
for name, metric in collection.items():
    print(name)
    print(metric)

acc
MulticlassAccuracy()
ap
MulticlassAveragePrecision()
auc
MulticlassAUROC()
f1
MulticlassF1Score()
