In [1]:
import requests
import json
import numpy as np
import joblib

# Load test data
X_test = np.load('../data/processed/X_test.npy')
y_test = np.load('../data/processed/y_test.npy')

# Load feature names
preprocessor = joblib.load('../models/preprocessor.pkl')
feature_columns = preprocessor['feature_columns']

# Find an actual attack sample
attack_indices = np.where(y_test == 1)[0]
benign_indices = np.where(y_test == 0)[0]

print(f"Found {len(attack_indices)} attacks and {len(benign_indices)} benign in test set")

# Test with a real attack
attack_sample = X_test[attack_indices[0]]
attack_features = {name: float(value) for name, value in zip(feature_columns, attack_sample)}

print("\n" + "="*60)
print("Testing ATTACK sample:")
print("="*60)

response = requests.post(
    "http://localhost:8000/predict/full",
    json={"features": attack_features}
)

print(json.dumps(response.json(), indent=2))
print(f"Actual label: ATTACK")

# Test with benign
benign_sample = X_test[benign_indices[0]]
benign_features = {name: float(value) for name, value in zip(feature_columns, benign_sample)}

print("\n" + "="*60)
print("Testing BENIGN sample:")
print("="*60)

response = requests.post(
    "http://localhost:8000/predict/full",
    json={"features": benign_features}
)

print(json.dumps(response.json(), indent=2))
print(f"Actual label: BENIGN")

Found 25603 attacks and 19014 benign in test set

Testing ATTACK sample:
{
  "prediction": "BENIGN",
  "confidence": 0.84,
  "attack_probability": 0.16,
  "benign_probability": 0.84,
  "model_used": "Random Forest (99.99% accuracy)",
  "timestamp": "2025-11-15T16:52:42.329602",
}
Actual label: ATTACK

Testing BENIGN sample:
{
  "prediction": "BENIGN",
  "confidence": 0.89,
  "attack_probability": 0.11,
  "benign_probability": 0.89,
  "model_used": "Random Forest (99.99% accuracy)",
  "timestamp": "2025-11-15T16:52:42.362380",
}
Actual label: BENIGN


In [2]:
# Check if data is scaled
print("Sample features (first 10):")
print(attack_sample[:10])
print("\nAre these scaled? (should be around -2 to +2 if scaled, or real numbers if raw)")

Sample features (first 10):
[-0.44909874 -0.49560494 -0.12578615  0.02151883 -0.28432716  0.15335437
 -0.27995722 -0.1698555  -0.31084298 -0.2584322 ]

Are these scaled? (should be around -2 to +2 if scaled, or real numbers if raw)


In [None]:
import requests
import json
import numpy as np
import joblib

# Load UNSCALED test data
X_test_unscaled = np.load('../data/processed/X_test_unscaled.npy')
y_test = np.load('../data/processed/y_test.npy')

# Load feature names
preprocessor = joblib.load('../models/preprocessor.pkl')
feature_columns = preprocessor['feature_columns']

# Find attack and benign samples
attack_indices = np.where(y_test == 1)[0]
benign_indices = np.where(y_test == 0)[0]

print("="*70)
print("TESTING API WITH UNSCALED DATA")
print("="*70)

# Test 1: Real Attack
print("\n[TEST 1] Real ATTACK sample:")
attack_sample = X_test_unscaled[attack_indices[0]]
attack_features = {name: float(value) for name, value in zip(feature_columns, attack_sample)}

response = requests.post(
    "http://localhost:8000/predict/full",
    json={"features": attack_features}
)

result = response.json()
print(f"Prediction: {result['prediction']}")
print(f"Confidence: {result['confidence']:.2%}")
print(f"Attack Probability: {result['attack_probability']:.2%}")
print(f"Actual Label: ATTACK")
print(f" CORRECT!" if result['prediction'] == 'ATTACK' else " WRONG!")

# Test 2: Real Benign
print("\n" + "="*70)
print("[TEST 2] Real BENIGN sample:")
benign_sample = X_test_unscaled[benign_indices[0]]
benign_features = {name: float(value) for name, value in zip(feature_columns, benign_sample)}

response = requests.post(
    "http://localhost:8000/predict/full",
    json={"features": benign_features}
)

result = response.json()
print(f"Prediction: {result['prediction']}")
print(f"Confidence: {result['confidence']:.2%}")
print(f"Attack Probability: {result['attack_probability']:.2%}")
print(f"Actual Label: BENIGN")
print(f" CORRECT!" if result['prediction'] == 'BENIGN' else " WRONG!")

# Test 3: Test multiple samples for accuracy
print("\n" + "="*70)
print("[TEST 3] Testing 100 random samples:")
print("="*70)

correct = 0
total = 100

for i in range(total):
    sample_idx = np.random.randint(0, len(X_test_unscaled))
    sample = X_test_unscaled[sample_idx]
    actual_label = y_test[sample_idx]
    
    features = {name: float(value) for name, value in zip(feature_columns, sample)}
    response = requests.post("http://localhost:8000/predict/full", json={"features": features})
    result = response.json()
    
    predicted_label = 1 if result['prediction'] == 'ATTACK' else 0
    if predicted_label == actual_label:
        correct += 1

accuracy = (correct / total) * 100
print(f"Accuracy on 100 samples: {accuracy:.1f}%")
print(f"Expected: ~99.99%")
print(f"{' API WORKING CORRECTLY!' if accuracy > 95 else ' Still has issues'}")

TESTING API WITH UNSCALED DATA

[TEST 1] Real ATTACK sample:
Prediction: ATTACK
Confidence: 100.00%
Attack Probability: 100.00%
Actual Label: ATTACK
✅ CORRECT!

[TEST 2] Real BENIGN sample:
Prediction: BENIGN
Confidence: 100.00%
Attack Probability: 0.00%
Actual Label: BENIGN
✅ CORRECT!

[TEST 3] Testing 100 random samples:
Accuracy on 100 samples: 100.0%
Expected: ~99.99%
✅ API WORKING CORRECTLY!
