In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

from pipeline import AnomalyDetectionPipeline

In [None]:
data = 'data/TOTF.PA-book/2015-01-02-TOTF.PA-book.csv.gz'

In [None]:
# Initialize Pipeline
pipeline = AnomalyDetectionPipeline(seq_length=25, batch_size=128)

In [None]:
# Load Data
nrows = 100_000
pipeline.load_data(data, nrows=nrows)

In [None]:
# Engineer Features
selected_features = ['base', 'tao', 'hawkes', 'poutre']
pipeline.engineer_features(feature_sets=selected_features)

In [None]:
# Preprocess Data
pipeline.scale_and_sequence(method='minmax')

In [None]:
# Train Transformer + OC-SVM Model
pipeline.train_model(
    model_type='transformer_ocsvm', 
    epochs=10, 
    lr=1e-3,
    nu=0.01
)

# Evaluate Model
metrics_ae, cm_ae = pipeline.evaluate()

# Explainability
importance_df = pipeline.get_feature_importance(n_repeats=3)

top_20 = importance_df.head(20)
plt.figure(figsize=(10, 8))
plt.barh(top_20['Feature'], top_20['Importance'], color='#2a9d8f')
plt.xlabel('Importance Score')
plt.title('Top 20 Features driving Anomaly Detection - Transformer + OC-SVM')
plt.gca().invert_yaxis()
plt.show()

In [None]:
# Train PNN Model
pipeline.train_model(
    model_type='pnn', 
    epochs=10, 
    lr=1e-3,
    hidden_dim=64
)

metrics_pnn, cm_pnn = pipeline.evaluate()

# Explainability
importance_df = pipeline.get_feature_importance(n_repeats=3)

top_20 = importance_df.head(20)
plt.figure(figsize=(10, 8))
plt.barh(top_20['Feature'], top_20['Importance'], color='#2a9d8f')
plt.xlabel('Importance Score')
plt.title('Top 20 Features driving Anomaly Detection - PNN')
plt.gca().invert_yaxis()
plt.show()

In [None]:
# Compare Results
metrics_df = pd.DataFrame([metrics_ae, metrics_pnn], index=['Transformer + OC-SVM', 'PNN'])
print(metrics_df)

# Plot Confusion Matrices
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

ConfusionMatrixDisplay(cm_ae, display_labels=["Normal", "Anomaly"]).plot(ax=axes[0], colorbar=False)
axes[0].set_title('Transformer + OC-SVM Confusion Matrix')

ConfusionMatrixDisplay(cm_pnn, display_labels=["Normal", "Anomaly"]).plot(ax=axes[1], colorbar=False)
axes[1].set_title('PNN Confusion Matrix')

plt.tight_layout()
plt.show()

In [None]:
# Test pipeline with Box-Cox scaler
pipeline.scale_and_sequence(method='box-cox')
pipeline.train_model(
    model_type='transformer_ocsvm', 
    epochs=10, 
    lr=1e-3,
    nu=0.01
)
metrics_boxcox, cm_boxcox = pipeline.evaluate()

In [None]:
# Compare results with different scalers
results_df = pd.DataFrame([metrics_ae, metrics_boxcox], index=['Min-Max', 'Box-Cox'])
print(results_df)

# Plot Confusion Matrices for different scalers
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
ConfusionMatrixDisplay(cm_ae, display_labels=["Normal", "Anomaly"]).plot(ax=axes[0], colorbar=False)
axes[0].set_title('Min-Max Scaler Confusion Matrix')

ConfusionMatrixDisplay(cm_boxcox, display_labels=["Normal", "Anomaly"]).plot(ax=axes[1], colorbar=False)
axes[1].set_title('Box-Cox Scaler Confusion Matrix')

plt.tight_layout()
plt.show()

In [None]:
spoofing_opportunities = pipeline.detect_spoofing(
    Q_spoof=50000, 
    delta_ticks=5, 
    maker_fee=0.0, 
    taker_fee=0.0005
)

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(spoofing_opportunities['Index'], spoofing_opportunities['Expected_Gain'], '.', color='red', label='Spoof Opportunity')
plt.title('Detected Spoofing Opportunities (Positive Expected Gain)')
plt.xlabel('Time Step (Index)')
plt.ylabel('Expected Gain')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
    
print("\nTop 5 Most Profitable Spoofing Opportunities:")
print(spoofing_opportunities.sort_values(by='Expected_Gain', ascending=False).head())