In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import unittest

# ----------------- Simulate Financial Dataset -----------------

def generate_financial_data(n=1000, fraud_rate=0.05):
    np.random.seed(42)
    data = {
        'transaction_id': np.arange(n),
        'amount': np.round(np.random.exponential(scale=100, size=n), 2),
        'currency': np.random.choice(['USD', 'EUR', 'INR'], size=n),
        'is_fraud': np.random.choice([0, 1], size=n, p=[1 - fraud_rate, fraud_rate])
    }

    df = pd.DataFrame(data)

    # Simulate missingness and inaccuracy
    df.loc[np.random.choice(df.index, size=20, replace=False), 'amount'] = np.nan  # 2% missing
    df.loc[np.random.choice(df.index, size=10, replace=False), 'currency'] = 'INVALID'  # inaccurate
    return df


# --------------- SLA: Data Completeness Check -----------------

def check_completeness(df: pd.DataFrame, threshold: float = 0.98) -> bool:
    try:
        total_fields = df.size
        filled_fields = df.count().sum()
        completeness_ratio = filled_fields / total_fields
        print(f"Completeness Ratio: {completeness_ratio:.2%}")
        return completeness_ratio >= threshold
    except Exception as e:
        print(f"Error in completeness check: {e}")
        return False

# --------------- SLA: Data Accuracy Check ---------------------

def check_accuracy(df: pd.DataFrame, column: str = 'currency', valid_values=None, threshold=0.98) -> bool:
    try:
        if valid_values is None:
            valid_values = ['USD', 'EUR', 'INR']
        valid_entries = df[column].isin(valid_values)
        accuracy_ratio = valid_entries.mean()
        print(f"Accuracy Ratio for '{column}': {accuracy_ratio:.2%}")
        return accuracy_ratio >= threshold
    except Exception as e:
        print(f"Error in accuracy check: {e}")
        return False

# ---------------- Fraud Detection Model -----------------------

def train_fraud_model(df: pd.DataFrame):
    try:
        df_clean = df.dropna()
        df_clean = df_clean[df_clean['currency'].isin(['USD', 'EUR', 'INR'])]

        X = df_clean[['amount']]
        y = df_clean['is_fraud']

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
        model = IsolationForest(contamination=0.05, random_state=42)
        model.fit(X_train)

        y_pred = model.predict(X_test)
        y_pred = np.where(y_pred == -1, 1, 0)  # -1 => fraud

        print("\nFraud Detection Classification Report:")
        print(classification_report(y_test, y_pred))
        return model
    except Exception as e:
        print(f"Error training fraud detection model: {e}")
        return None

# -------------------------- Unit Tests ------------------------

class TestFraudSLA(unittest.TestCase):

    def setUp(self):
        self.df = generate_financial_data()

    def test_completeness(self):
        result = check_completeness(self.df)
        self.assertTrue(isinstance(result, bool))

    def test_accuracy(self):
        result = check_accuracy(self.df, column='currency')
        self.assertTrue(isinstance(result, bool))

    def test_model_training(self):
        model = train_fraud_model(self.df)
        self.assertIsNotNone(model)


# ---------------------- Main Execution ------------------------

if __name__ == "__main__":
    print("---- Real-World SLA Checks for Fraud Detection ----")

    df = generate_financial_data()

    print("\n1️⃣ Checking Data Completeness SLA:")
    completeness_passed = check_completeness(df)

    print("\n2️⃣ Checking Data Accuracy SLA:")
    accuracy_passed = check_accuracy(df)

    if completeness_passed and accuracy_passed:
        print("\n✅ Data Quality SLAs passed. Proceeding with model training...")
        train_fraud_model(df)
    else:
        print("\n❌ Data Quality SLAs failed. Aborting model training.")

    print("\n---- Running Unit Tests ----")
    unittest.main(argv=['first-arg-is-ignored'], exit=False)


FF

---- Real-World SLA Checks for Fraud Detection ----

1️⃣ Checking Data Completeness SLA:
Completeness Ratio: 99.50%

2️⃣ Checking Data Accuracy SLA:
Accuracy Ratio for 'currency': 99.00%

✅ Data Quality SLAs passed. Proceeding with model training...

Fraud Detection Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.96       277
           1       0.09      0.07      0.08        15

    accuracy                           0.92       292
   macro avg       0.52      0.52      0.52       292
weighted avg       0.91      0.92      0.91       292


---- Running Unit Tests ----
Accuracy Ratio for 'currency': 99.00%
Completeness Ratio: 99.50%


.
FAIL: test_accuracy (__main__.TestFraudSLA)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_8861/2760232105.py", line 91, in test_accuracy
    self.assertTrue(isinstance(result, bool))
AssertionError: False is not true

FAIL: test_completeness (__main__.TestFraudSLA)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/tmp/ipykernel_8861/2760232105.py", line 87, in test_completeness
    self.assertTrue(isinstance(result, bool))
AssertionError: False is not true

----------------------------------------------------------------------
Ran 3 tests in 0.182s

FAILED (failures=2)



Fraud Detection Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.96       277
           1       0.09      0.07      0.08        15

    accuracy                           0.92       292
   macro avg       0.52      0.52      0.52       292
weighted avg       0.91      0.92      0.91       292

