In [1]:
import pandas as pd
import sys, os
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "src"))

# Example for a custom detector (preliminary)

1. Import DetectorBase and ConfigBase
2. Define config class
3. Define detector class and train + detect methods

In [2]:
from components.Base.DetectorBase import DetectorBase
from components.Base.ConfigBase import ConfigBase

In [3]:
class MyConfig(ConfigBase):
    """Configuration for a detector class."""
    tolerance: float = 0

class MyDetector(DetectorBase):
    # define buffer_mode and buffer_size based on the requirement for a buffer or not (batch or window)
    def __init__(self, buffer_mode="no_buf", buffer_size=None, config: MyConfig | None = None) -> None:
        super().__init__(buffer_mode=buffer_mode, buffer_size=buffer_size, config=config or MyConfig())

    def detect(self, data):
        # implement detection logic here
        # example: VariableLengthDetector - detects anomalies in variable length sequences
        tolerance = self.config.tolerance
        anomalies = []
        for i, log in enumerate(data["logs"]):
            if not self.min_max[0] - tolerance <= len(log) <= self.min_max[1] + tolerance:
                anomalies.append({i: log})
        return anomalies

    def train(self, data) -> None:
        # implement training logic here
        self.min_max = (min(data["logs"].apply(len)), max(data["logs"].apply(len)))

## Test with data:

In [4]:
data = pd.DataFrame([
    "abc",
    "abc",
    "abc",
    "abc",
    "abcd", # still in tolerance
    "abc",
    "abcdefg", # anomaly
    "abc",
], columns = ["logs"])
display(data)

Unnamed: 0,logs
0,abc
1,abc
2,abc
3,abc
4,abcd
5,abc
6,abcdefg
7,abc


We train the detector with the first three instances and then try to detect anomalies in the rest:

In [5]:
config = MyConfig()
detector = MyDetector(config=config)

detector.train(data=data[:3])
anomalies = detector.detect(data=data[3:])
print("Anomalies detected:", anomalies)

Anomalies detected: [{1: 'abcd'}, {3: 'abcdefg'}]


... since the tolerance configuration parameter defaults to 0, we get two anomalies.

In [6]:
config = MyConfig(tolerance=1)
detector = MyDetector(config=config)

detector.train(data=data[:3])
anomalies = detector.detect(data=data[3:])
print("Anomalies detected:", anomalies)

Anomalies detected: [{3: 'abcdefg'}]


In [7]:
detector._process(data=data[3:])

[{3: 'abcdefg'}]

... and only one anomaly if we set tolereance to 1.

### Test type checking and extra definitions for the config class:

In [8]:
config = MyConfig(not_allowed="this param should produce an error")

ValidationError: 1 validation error for MyConfig
not_allowed
  Extra inputs are not permitted [type=extra_forbidden, input_value='this param should produce an error', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden

In [None]:
config = MyConfig(tolerance="this param should also produce an error")

ValidationError: 1 validation error for MyConfig
tolerance
  Input should be a valid number, unable to parse string as a number [type=float_parsing, input_value='this param should also produce an error', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/float_parsing