# Example using DAQ plus AR Parameters plus Mahalanobis Distance

This notebook demonstrates an integrated approach combining:
- Data acquisition (DAQ) systems
- Autoregressive (AR) parameter extraction
- Mahalanobis distance-based outlier detection

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from shmtools.features import ar_model_shm
from shmtools.classification import learn_mahalanobis_shm, score_mahalanobis_shm

## Step 1: Simulate DAQ Data Acquisition

In [None]:
# Simulate data acquisition from multiple sensors
fs = 1000  # Sampling frequency (Hz)
duration = 1  # Duration (seconds) - reduced for numerical stability
n_channels = 4  # Number of sensor channels
n_instances = 20  # Number of data instances for feature extraction

# Generate simulated sensor data with multiple instances
time = np.linspace(0, duration, int(fs * duration))

# Create data in format expected by shmtools: (TIME, CHANNELS, INSTANCES)
# Set random seed for reproducibility
np.random.seed(42)

data_instances = []
for instance in range(n_instances):
    # Base random signal with smaller amplitude to avoid numerical issues
    instance_data = 0.1 * np.random.randn(len(time), n_channels)
    
    # Add structural response characteristics with some variation per instance
    for ch in range(n_channels):
        freq = 10 + ch * 5 + instance * 0.1  # Slight frequency variation per instance
        amplitude = 0.5 + np.random.normal(0, 0.05)  # Amplitude variation
        instance_data[:, ch] += amplitude * np.sin(2 * np.pi * freq * time)
    
    data_instances.append(instance_data)

# Stack into shape (TIME, CHANNELS, INSTANCES) as expected by ar_model_shm
data = np.stack(data_instances, axis=2)

print(f"DAQ data shape: {data.shape}")
print(f"Sampling rate: {fs} Hz")
print(f"Number of channels: {n_channels}")
print(f"Number of instances: {n_instances}")

## Step 2: Extract AR Parameters

In [None]:
# Extract AR model parameters using shmtools ar_model_shm function
ar_order = 5  # Reduced order for numerical stability

# ar_model_shm expects data in shape (TIME, CHANNELS, INSTANCES)
# It returns AR parameters as feature vectors
ar_parameters_fv, rms_residuals_fv, ar_params, ar_residuals, ar_prediction = ar_model_shm(
    data, ar_order=ar_order
)

# ar_parameters_fv has shape (INSTANCES, FEATURES) where FEATURES = CHANNELS * ar_order
print(f"AR feature vector shape: {ar_parameters_fv.shape}")
print(f"Expected features per instance: {n_channels * ar_order}")

# Use the AR parameter feature vectors for outlier detection
ar_features = ar_parameters_fv

## Step 3: Apply Mahalanobis Distance Detection

In [None]:
# Train Mahalanobis model on baseline features using shmtools functions
# Use first half as training (baseline) data
train_features = ar_features[:n_instances//2]
test_features = ar_features[n_instances//2:]

print(f"Training features shape: {train_features.shape}")
print(f"Test features shape: {test_features.shape}")

# Learn Mahalanobis model from training data
mahalanobis_model = learn_mahalanobis_shm(train_features)

# Score test data using the learned model
mahalanobis_scores_raw = score_mahalanobis_shm(test_features, mahalanobis_model)

# Flatten to 1D array for easier handling (score_mahalanobis_shm returns shape (N, 1))
mahalanobis_scores = mahalanobis_scores_raw.flatten()

# Simple threshold-based classification (can be refined)
threshold = np.percentile(mahalanobis_scores, 95)  # 95th percentile as threshold
predictions = (mahalanobis_scores > threshold).astype(int)

print(f"Mahalanobis scores: {mahalanobis_scores}")
print(f"Threshold: {threshold:.3f}")
print(f"Outlier predictions (1=outlier): {predictions}")

## Step 4: Visualize Results

In [None]:
# Plot the integrated results
fig, axes = plt.subplots(3, 1, figsize=(12, 10))

# Plot raw DAQ data sample from first instance
time_sample = time[:1000]  # First 1000 points for visualization
data_sample = data[:1000, :, 0]  # First instance, all channels
for ch in range(min(2, n_channels)):  # Plot first 2 channels
    axes[0].plot(time_sample, data_sample[:, ch], label=f'Channel {ch+1}')
axes[0].set_xlabel('Time (s)')
axes[0].set_ylabel('Amplitude')
axes[0].set_title('Raw DAQ Data Sample (Instance 1)')
axes[0].legend()
axes[0].grid(True)

# Plot AR coefficients for the first test instance
first_test_features = test_features[0]
axes[1].bar(range(len(first_test_features)), first_test_features)
axes[1].set_xlabel('AR Feature Index')
axes[1].set_ylabel('Value')
axes[1].set_title('AR Parameters for First Test Instance')
axes[1].grid(True)

# Plot Mahalanobis scores
test_indices = range(len(mahalanobis_scores))
colors = ['red' if p == 1 else 'green' for p in predictions]
axes[2].bar(test_indices, mahalanobis_scores, color=colors)
axes[2].set_xlabel('Test Sample Index')
axes[2].set_ylabel('Mahalanobis Score')
axes[2].set_title('Outlier Detection Results')
axes[2].axhline(y=threshold, color='r', linestyle='--', label=f'Threshold ({threshold:.3f})')
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.show()

## Conclusion

This integrated example demonstrates:
1. Data acquisition from multiple sensor channels
2. Feature extraction using AR model parameters
3. Outlier detection using Mahalanobis distance
4. Visualization of the complete pipeline

This approach is commonly used in structural health monitoring to detect anomalies or damage in structures by combining signal processing with statistical outlier detection methods.