In [1]:
# Google Colab Only
try:
    import google.colab  # noqa: F401

    # specify the version of DataEval (==X.XX.X) for versions other than the latest
    %pip install -q dataeval
except Exception:
    pass

In [2]:
import logging
import os

import sklearn.datasets as dsets

from dataeval.core._ber import ber_knn, ber_mst

In [3]:
# Create console handler with formatting
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter("%(name)s - %(levelname)s - %(message)s"))

In [4]:
# Configure logging to show INFO level messages to console
dataeval_logger = logging.getLogger("dataeval")
dataeval_logger.setLevel(logging.INFO)
dataeval_logger.addHandler(console_handler)

In [5]:
# Create sample dataset
embeddings, labels = dsets.make_blobs(n_samples=100, centers=3, n_features=5, random_state=42)

print("Running ber_knn with INFO logging:\n")
result = ber_knn(embeddings, labels, k=3)
print(f"\nResult: {result}")

dataeval.core._ber - INFO - Starting ber_knn calculation with k=3


dataeval.core._ber - INFO - BER_knn complete: upper_bound=0.0000, lower_bound=0.0000, misclassified=0


Running ber_knn with INFO logging:


Result: {'upper_bound': 0.0, 'lower_bound': 0.0}


In [6]:
# Clear previous handlers
for handler in dataeval_logger.handlers[:]:
    dataeval_logger.removeHandler(handler)

In [7]:
# Configure logging to show DEBUG level messages to console
dataeval_logger = logging.getLogger("dataeval")
dataeval_logger.setLevel(logging.DEBUG)
dataeval_logger.addHandler(console_handler)

In [8]:
print("Running ber_mst with DEBUG logging:\n")
result = ber_mst(embeddings, labels)
print(f"\nResult: {result}")

dataeval.core._ber - INFO - Starting ber_mst calculation


dataeval.core._ber - DEBUG - Number of classes: 3, Number of samples: 100


dataeval.core._mst - INFO - Starting minimum_spanning_tree calculation with k=15


dataeval.core._mst - DEBUG - Embeddings shape: (100, 5)


Running ber_mst with DEBUG logging:





dataeval.core._mst - INFO - MST calculation complete: 99 edges computed


dataeval.core._ber - INFO - BER_mst complete: upper_bound=0.0200, lower_bound=0.0101, mismatches=2



Result: {'upper_bound': 0.02, 'lower_bound': 0.010076146546926346}


In [9]:
# Clear previous handlers
for handler in dataeval_logger.handlers[:]:
    dataeval_logger.removeHandler(handler)

In [10]:
# Configure logging to write to a file
log_file = "dataeval_operations.log"

# Create file handler with formatting
file_handler = logging.FileHandler(log_file, mode="w")  # 'w' to overwrite, 'a' to append
file_handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))

dataeval_logger = logging.getLogger("dataeval")
dataeval_logger.setLevel(logging.INFO)
dataeval_logger.addHandler(file_handler)

In [11]:
print(f"Running operations with logging to {log_file}...\n")

# Run multiple operations
result1 = ber_mst(embeddings, labels)
result2 = ber_knn(embeddings, labels, k=5)

print(f"ber_mst result: {result1}")
print(f"ber_knn result: {result2}")
print(f"\nLogs have been saved to '{log_file}'")

# Display the log file contents
if os.path.exists(log_file):
    print("\n--- Log File Contents ---")
    with open(log_file) as f:
        print(f.read())

Running operations with logging to dataeval_operations.log...

ber_mst result: {'upper_bound': 0.02, 'lower_bound': 0.010076146546926346}
ber_knn result: {'upper_bound': 0.0, 'lower_bound': 0.0}

Logs have been saved to 'dataeval_operations.log'

--- Log File Contents ---
2025-12-03 08:18:05,444 - dataeval.core._ber - INFO - Starting ber_mst calculation
2025-12-03 08:18:05,444 - dataeval.core._mst - INFO - Starting minimum_spanning_tree calculation with k=15
2025-12-03 08:18:05,448 - dataeval.core._mst - INFO - MST calculation complete: 99 edges computed
2025-12-03 08:18:05,448 - dataeval.core._ber - INFO - BER_mst complete: upper_bound=0.0200, lower_bound=0.0101, mismatches=2
2025-12-03 08:18:05,448 - dataeval.core._ber - INFO - Starting ber_knn calculation with k=5
2025-12-03 08:18:05,454 - dataeval.core._ber - INFO - BER_knn complete: upper_bound=0.0000, lower_bound=0.0000, misclassified=0



In [12]:
# Clear previous handlers
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

In [13]:
# Create logger
logger = logging.getLogger("dataeval")
logger.setLevel(logging.DEBUG)

# Create file handler (DEBUG level)
log_file = "dataeval_detailed.log"
file_handler = logging.FileHandler(log_file, mode="w")
file_handler.setLevel(logging.DEBUG)
file_formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
file_handler.setFormatter(file_formatter)

# Create console handler (INFO level only)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
console_formatter = logging.Formatter("%(levelname)s - %(message)s")
console_handler.setFormatter(console_formatter)

# Add handlers to logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

In [14]:
print("Running with dual logging (INFO to console, DEBUG to file):\n")
result = ber_knn(embeddings, labels, k=7)
print(f"\nResult: {result}")
print("\nNote: Console shows only INFO messages, but file contains DEBUG details too.")

# Display the log file contents
if os.path.exists(log_file):
    print("\n--- Log File Contents ---")
    with open(log_file) as f:
        print(f.read())

INFO - Starting ber_knn calculation with k=7


INFO - BER_knn complete: upper_bound=0.0000, lower_bound=0.0000, misclassified=0


Running with dual logging (INFO to console, DEBUG to file):


Result: {'upper_bound': 0.0, 'lower_bound': 0.0}

Note: Console shows only INFO messages, but file contains DEBUG details too.

--- Log File Contents ---
2025-12-03 08:18:05,469 - dataeval.core._ber - INFO - Starting ber_knn calculation with k=7
2025-12-03 08:18:05,469 - dataeval.core._ber - DEBUG - Number of classes: 3, Number of samples: 100
2025-12-03 08:18:05,475 - dataeval.core._ber - INFO - BER_knn complete: upper_bound=0.0000, lower_bound=0.0000, misclassified=0



In [15]:
# Disable all logging at CRITICAL level and below
logging.disable(logging.CRITICAL)

In [16]:
print("Running with logging disabled:\n")
result = ber_mst(embeddings, labels)
print(f"Result: {result}")
print("(No log messages should appear above)\n")

Running with logging disabled:

Result: {'upper_bound': 0.02, 'lower_bound': 0.010076146546926346}
(No log messages should appear above)



In [17]:
# Re-enable logging
logging.disable(logging.NOTSET)

In [18]:
print("Running with logging re-enabled:\n")
result = ber_mst(embeddings, labels)
print(f"Result: {result}")

INFO - Starting ber_mst calculation


INFO - Starting minimum_spanning_tree calculation with k=15




INFO - MST calculation complete: 99 edges computed


INFO - BER_mst complete: upper_bound=0.0200, lower_bound=0.0101, mismatches=2


Running with logging re-enabled:

Result: {'upper_bound': 0.02, 'lower_bound': 0.010076146546926346}


In [19]:
# Clean up log files created during the notebook execution
log_files = ["dataeval_operations.log", "dataeval_detailed.log"]
for log_file in log_files:
    if os.path.exists(log_file):
        os.remove(log_file)
        print(f"Removed {log_file}")

print("\nCleanup complete!")

Removed dataeval_operations.log
Removed dataeval_detailed.log

Cleanup complete!
