# TensorFlow Autoencoder Anomaly Detection

In [None]:
import sys
sys.path.append('../src')

import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow_anomaly import AutoencoderAnomalyDetector, evaluate_anomaly_model

In [None]:
# Load data
X_train = np.load('../models/X_train.npy')
X_test = np.load('../models/X_test.npy')
y_train = np.load('../models/y_train.npy')
y_test = np.load('../models/y_test.npy')

with open('../models/feature_names.pkl', 'rb') as f:
    feature_names = pickle.load(f)

print(f"Training data: {X_train.shape}")
print(f"Test data: {X_test.shape}")

In [None]:
# Use only normal transactions for training (unsupervised)
X_train_normal = X_train[y_train == 0]
X_train_split, X_val_split = train_test_split(X_train_normal, test_size=0.2, random_state=42)

print(f"Normal training samples: {len(X_train_split)}")
print(f"Validation samples: {len(X_val_split)}")

In [None]:
# Initialize and train autoencoder
detector = AutoencoderAnomalyDetector(input_dim=X_train.shape[1], encoding_dim=16)

print("Training autoencoder...")
history = detector.train(X_train_split, X_val_split, epochs=50, batch_size=256)

print(f"Training completed. Threshold: {detector.threshold:.4f}")

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['lr'] if 'lr' in history.history else [])
plt.title('Learning Rate')
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')

plt.tight_layout()
plt.savefig('../reports/autoencoder_training.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Evaluate on test set
print("Evaluating on test set...")
test_auc, test_predictions, test_scores = evaluate_anomaly_model(detector, X_test, y_test)

# Plot reconstruction error distribution
plt.figure(figsize=(10, 6))
plt.hist(test_scores[y_test == 0], bins=50, alpha=0.7, label='Normal', density=True)
plt.hist(test_scores[y_test == 1], bins=50, alpha=0.7, label='Fraud', density=True)
plt.axvline(detector.threshold, color='red', linestyle='--', label='Threshold')
plt.xlabel('Reconstruction Error')
plt.ylabel('Density')
plt.title('Reconstruction Error Distribution')
plt.legend()
plt.yscale('log')
plt.savefig('../reports/autoencoder_reconstruction_error.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Save model and predictions
detector.save_model('../models/autoencoder_model')
np.save('../models/autoencoder_test_predictions.npy', test_predictions)
np.save('../models/autoencoder_test_scores.npy', test_scores)

print("Autoencoder training completed!")
print(f"Model saved to: ../models/autoencoder_model")
print(f"Test AUC: {test_auc:.4f}")