In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import unittest


# Sample dataset for testing
def load_sample_data():
    data = {
        'id': [1, 2, 3, 4, 5],
        'name': ['Alice', 'Bob', 'Charlie', None, 'Eve'],
        'age': [25, 30, None, 22, 29],
        'timestamp': [
            datetime.now() - timedelta(hours=1),
            datetime.now() - timedelta(days=1),
            datetime.now() - timedelta(days=2),
            datetime.now() - timedelta(hours=23),
            datetime.now()
        ],
        'category': ['A', 'B', 'A', 'C', 'A'],
        'ref_category': ['A', 'B', 'A', 'B', 'A']  # Reference for consistency
    }
    return pd.DataFrame(data)


# ----------- Data Completeness SLA (>=95% fields filled) ----------------
def check_completeness(df: pd.DataFrame, threshold: float = 0.95) -> bool:
    try:
        if df.empty:
            raise ValueError("DataFrame is empty")
        
        total_fields = df.size
        filled_fields = df.count().sum()
        completeness_ratio = filled_fields / total_fields

        print(f"Completeness: {completeness_ratio:.2%}")
        return completeness_ratio >= threshold
    except Exception as e:
        print(f"Error in completeness check: {e}")
        return False


# ----------- Data Timeliness SLA (processed within 24 hours) -------------
def check_timeliness(df: pd.DataFrame, timestamp_col: str = 'timestamp') -> bool:
    try:
        if timestamp_col not in df.columns:
            raise KeyError(f"'{timestamp_col}' column is missing")
        
        now = datetime.now()
        within_24_hours = df[timestamp_col].apply(lambda t: (now - t).total_seconds() <= 86400)
        timeliness_ratio = within_24_hours.mean()

        print(f"Timeliness: {timeliness_ratio:.2%}")
        return timeliness_ratio == 1.0
    except Exception as e:
        print(f"Error in timeliness check: {e}")
        return False


# ----------- Data Consistency SLA (>=99% entries match reference) -------
def check_consistency(df: pd.DataFrame, col1: str = 'category', col2: str = 'ref_category', threshold: float = 0.99) -> bool:
    try:
        if col1 not in df.columns or col2 not in df.columns:
            raise KeyError(f"Required columns '{col1}' or '{col2}' are missing")
        
        matches = df[col1] == df[col2]
        consistency_ratio = matches.mean()

        print(f"Consistency: {consistency_ratio:.2%}")
        return consistency_ratio >= threshold
    except Exception as e:
        print(f"Error in consistency check: {e}")
        return False


# ----------------------------- Unit Tests ---------------------------------
class TestDataQuality(unittest.TestCase):

    def setUp(self):
        self.df = load_sample_data()

    def test_completeness(self):
        result = check_completeness(self.df)
        self.assertTrue(isinstance(result, bool))

    def test_timeliness(self):
        result = check_timeliness(self.df)
        self.assertTrue(isinstance(result, bool))

    def test_consistency(self):
        result = check_consistency(self.df)
        self.assertTrue(isinstance(result, bool))


# ----------------------------- Run Checks ---------------------------------
if __name__ == "__main__":
    df = load_sample_data()
    
    print("---- Running Data Quality SLA Checks ----")
    completeness_passed = check_completeness(df)
    timeliness_passed = check_timeliness(df)
    consistency_passed = check_consistency(df)

    print("\nSLA Results:")
    print(f"Completeness SLA passed: {completeness_passed}")
    print(f"Timeliness SLA passed: {timeliness_passed}")
    print(f"Consistency SLA passed: {consistency_passed}")
    
    print("\n---- Running Unit Tests ----")
    unittest.main(argv=['first-arg-is-ignored'], exit=False)


FFF
FAIL: test_completeness (__main__.TestDataQuality)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_6477/4172504733.py", line 84, in test_completeness
    self.assertTrue(isinstance(result, bool))
AssertionError: False is not true

FAIL: test_consistency (__main__.TestDataQuality)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_6477/4172504733.py", line 92, in test_consistency
    self.assertTrue(isinstance(result, bool))
AssertionError: False is not true

FAIL: test_timeliness (__main__.TestDataQuality)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_6477/4172504733.py", line 88, in test_timeliness
    self.assertTrue(isinstance(result, bool))
AssertionError: False is not true

------------------------------------------------------------------

---- Running Data Quality SLA Checks ----
Completeness: 93.33%
Timeliness: 60.00%
Consistency: 80.00%

SLA Results:
Completeness SLA passed: False
Timeliness SLA passed: False
Consistency SLA passed: False

---- Running Unit Tests ----
Completeness: 93.33%
Consistency: 80.00%
Timeliness: 60.00%
