In [1]:
import os
import random
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler

# --- Set seeds and environment for reproducibility ---
os.environ['PYTHONHASHSEED'] = '42'
os.environ['TF_DETERMINISTIC_OPS'] = '1'  # Force deterministic GPU ops

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# --- Load CSV data ---
df = pd.read_csv('ABN_MetroExodus_condor_calcs.csv')

# --- Filter where TestMarker == 0.1 ---
filtered_df = df[df['TestMarker'] == 0.1].reset_index(drop=True)

# --- Feature columns ---
features = [
    'GT Effective Freq', 'GPU_Busy', 'GTI_Busy',
    'PKG Reported Temp', 'PKG Reported Power',
    'GTI Rd BW', 'GTI Wr BW', 'GTI Total BW'
]

# --- Prepare feature matrix and scale ---
X = filtered_df.loc[:, features].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --- Build autoencoder ---
input_dim = X_scaled.shape[1]
encoding_dim = input_dim // 2 if input_dim > 1 else 1

input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation="relu")(input_layer)
decoder = Dense(input_dim, activation="linear")(encoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mse')

# --- Train autoencoder ---
autoencoder.fit(
    X_scaled, X_scaled,
    epochs=50,
    batch_size=32,
    shuffle=True,
    verbose=1
)

# --- Predict and compute reconstruction errors ---
reconstructions = autoencoder.predict(X_scaled)
diff = X_scaled - reconstructions

# Index of 'GT Effective Freq'
freq_idx = features.index('GT Effective Freq')

# Frequency-specific reconstruction error and difference
freq_recon_error = np.square(diff[:, freq_idx])   # squared errors for freq feature
freq_diff = diff[:, freq_idx]                      # signed difference for dip/spike direction

# Threshold for anomaly (top 15% error)
freq_threshold = np.percentile(freq_recon_error, 85)

# Detect dips and spikes based on freq error and direction
dip_mask = (freq_recon_error > freq_threshold) & (freq_diff < 0)
spike_mask = (freq_recon_error > freq_threshold) & (freq_diff > 0)

# Helper function to get continuous index ranges
def get_ranges(indexes):
    indexes = np.sort(np.unique(indexes))
    ranges = []
    if len(indexes) > 0:
        start = indexes[0]
        for prev, curr in zip(indexes, indexes[1:]):
            if curr != prev + 1:
                ranges.append((start, prev))
                start = curr
        ranges.append((start, indexes[-1]))
    return ranges

dip_indexes = np.where(dip_mask)[0]
spike_indexes = np.where(spike_mask)

dip_ranges = get_ranges(dip_indexes)
spike_ranges = get_ranges(spike_indexes)

# Format ranges as strings for printing
clean_dip_ranges = [f"{start}-{end}" for start, end in dip_ranges]
clean_spike_ranges = [f"{start}-{end}" for start, end in spike_ranges]

print("Dip ranges:", clean_dip_ranges)
print("Spike ranges:", clean_spike_ranges)

# --- Feature influence analysis ---
def feature_influence_per_range(df, feature_list, ranges, target_feature):
    overall_mean = df[feature_list].mean()
    overall_std = df[feature_list].std()

    results = []
    for start, end in ranges:
        subset = df.loc[start:end, feature_list]

        mean_diff = subset.mean() - overall_mean
        z_scores = mean_diff / overall_std

        # exclude the target feature itself
        influencers = z_scores.drop(target_feature)

        # top 3 influencers by absolute z-score
        top_influencers = influencers.abs().sort_values(ascending=False).index[:3]
        top_vals = influencers.loc[top_influencers].round(2)

        results.append({
            "Range": f"{start}-{end}",
            "Top Influencers": ", ".join(
                [f"{feat} ({val:+.2f} z)" for feat, val in zip(top_influencers, top_vals)]
            )
        })

    return pd.DataFrame(results)

dip_influencer_table = feature_influence_per_range(filtered_df, features, dip_ranges, 'GT Effective Freq')
spike_influencer_table = feature_influence_per_range(filtered_df, features, spike_ranges, 'GT Effective Freq')

spike_influencer_table

Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1.2320
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.1133
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0247
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.9550
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8938
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.8374
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7855
Epoch 8/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7360
Epoch 9/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6887
Epoch 10/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6444
Epoch 11/

Unnamed: 0,Range,Top Influencers
0,0-44,"PKG Reported Temp (-2.74 z), PKG Reported Powe..."
1,46-46,"PKG Reported Temp (-2.10 z), PKG Reported Powe..."
2,48-50,"PKG Reported Temp (-2.44 z), PKG Reported Powe..."
3,52-52,"PKG Reported Temp (-1.77 z), PKG Reported Powe..."
4,54-54,"PKG Reported Temp (-2.10 z), PKG Reported Powe..."
5,57-57,"PKG Reported Temp (-1.43 z), PKG Reported Powe..."
6,59-59,"PKG Reported Temp (-1.43 z), PKG Reported Powe..."
7,61-62,"PKG Reported Temp (-1.77 z), PKG Reported Powe..."
8,64-64,"PKG Reported Temp (-1.77 z), PKG Reported Powe..."
9,66-67,"PKG Reported Temp (-1.60 z), PKG Reported Powe..."


In [2]:
diff

array([[ 1.23311526,  1.10864367, -0.37002257, ..., -0.79417573,
        -0.61968795, -0.65917829],
       [ 1.28965664,  0.96706375, -0.2418516 , ..., -0.9061659 ,
        -0.56675284, -0.68923985],
       [ 1.24079091,  0.95856857, -0.3215478 , ..., -0.94129013,
        -0.57034433, -0.71380576],
       ...,
       [-2.68790472, -1.38787199,  0.89699926, ...,  0.45175662,
         0.8324735 ,  0.84474488],
       [-3.65351649, -1.22475936,  1.3588307 , ...,  0.38649479,
         0.76275774,  0.77116464],
       [-4.50516523, -1.33096198,  1.45191083, ...,  0.36526102,
         0.71402259,  0.73985699]], shape=(949, 8))

In [3]:
freq_diff

array([ 1.23311526e+00,  1.28965664e+00,  1.24079091e+00,  1.30695478e+00,
        1.30901570e+00,  1.23160114e+00,  1.30964883e+00,  1.31980336e+00,
        1.35442035e+00,  2.92058591e+00,  3.29032119e+00,  1.02609751e+00,
        1.17336366e+00,  1.14292458e+00,  1.33319964e+00,  1.25483007e+00,
        1.26291013e+00,  1.01843155e+00,  1.13658363e+00,  1.37073527e+00,
        1.23031538e+00,  9.66543834e-01,  1.25102008e+00,  1.24354744e+00,
        9.74829117e-01,  1.29024176e+00,  1.19359026e+00,  9.78746435e-01,
        1.07513211e+00,  1.15696482e+00,  1.18045843e+00,  8.83398312e-01,
        1.20266362e+00,  1.16017076e+00,  1.18121490e+00,  8.54576060e-01,
        1.11554592e+00,  1.25988432e+00,  1.09412036e+00,  1.05533121e+00,
        9.67460138e-01,  1.19160073e+00,  1.10681934e+00,  6.93166889e-01,
        7.24227518e-01,  3.66806731e-01,  7.01700157e-01,  4.40289322e-01,
        1.80429198e+00,  1.24971090e+00,  7.26209960e-01,  3.15981712e-01,
        7.34831132e-01,  

In [4]:
freq_recon_error

array([1.52057324e+00, 1.66321424e+00, 1.53956209e+00, 1.70813081e+00,
       1.71352210e+00, 1.51684136e+00, 1.71518006e+00, 1.74188091e+00,
       1.83445448e+00, 8.52982205e+00, 1.08262135e+01, 1.05287610e+00,
       1.37678229e+00, 1.30627659e+00, 1.77742129e+00, 1.57459851e+00,
       1.59494199e+00, 1.03720283e+00, 1.29182235e+00, 1.87891519e+00,
       1.51367594e+00, 9.34206982e-01, 1.56505123e+00, 1.54641024e+00,
       9.50291808e-01, 1.66472381e+00, 1.42465771e+00, 9.57944584e-01,
       1.15590905e+00, 1.33856760e+00, 1.39348212e+00, 7.80392578e-01,
       1.44639977e+00, 1.34599619e+00, 1.39526863e+00, 7.30300242e-01,
       1.24444269e+00, 1.58730850e+00, 1.19709936e+00, 1.11372396e+00,
       9.35979118e-01, 1.41991230e+00, 1.22504906e+00, 4.80480335e-01,
       5.24505497e-01, 1.34547178e-01, 4.92383110e-01, 1.93854687e-01,
       3.25546954e+00, 1.56177734e+00, 5.27380906e-01, 9.98444423e-02,
       5.39976793e-01, 1.80769775e-01, 4.02590986e-01, 3.78852650e-01,
      

In [5]:
freq_threshold

np.float64(0.38249564793337903)

In [6]:
reconstructions

array([[ 0.6114506 , -0.91801953, -0.0819836 , ...,  0.27727875,
         0.3321184 ,  0.21547191],
       [ 0.5721002 , -0.7421323 , -0.18788913, ...,  0.3019379 ,
         0.19972014,  0.16006763],
       [ 0.62239534, -0.71397746, -0.1654971 , ...,  0.339853  ,
         0.22885287,  0.19518703],
       ...,
       [-5.625688  , -6.246138  , -4.1805096 , ..., -5.100473  ,
        -4.1832676 , -5.095217  ],
       [-5.5232053 , -6.1112103 , -4.0875154 , ..., -4.995352  ,
        -4.093359  , -4.9880958 ],
       [-5.514535  , -6.099795  , -4.0796475 , ..., -4.9864583 ,
        -4.0857525 , -4.979033  ]], shape=(949, 8), dtype=float32)

In [7]:
X_scaled

array([[  1.84456587,   0.19062414,  -0.45200617, ...,  -0.51689698,
         -0.28756956,  -0.44370638],
       [  1.86175686,   0.22493144,  -0.42974073, ...,  -0.60422799,
         -0.36703269,  -0.52917222],
       [  1.86318625,   0.24459111,  -0.4870449 , ...,  -0.60143715,
         -0.34149146,  -0.51861873],
       ...,
       [ -8.31359279,  -7.63401008,  -3.2835103 , ...,  -4.6487163 ,
         -3.3507941 ,  -4.25047234],
       [ -9.17672177,  -7.33596971,  -2.72868465, ...,  -4.60885701,
         -3.33060125,  -4.21693113],
       [-10.01970018,  -7.43075684,  -2.62773671, ...,  -4.62119729,
         -3.3717299 ,  -4.23917601]], shape=(949, 8))

In [8]:
print(X)

[[2.68932409e+03 9.76768402e+01 6.08937651e+00 ... 4.93639530e+01
  1.88938540e+01 6.82578070e+01]
 [2.69219307e+03 9.80956595e+01 6.12881220e+00 ... 4.83239660e+01
  1.84129790e+01 6.67369450e+01]
 [2.69243162e+03 9.83356624e+01 6.02731726e+00 ... 4.83572010e+01
  1.85675430e+01 6.69247440e+01]
 ...
 [9.94042906e+02 2.15465312e+00 1.07432565e+00 ... 1.59919000e-01
  3.56602000e-01 5.16521000e-01]
 [8.49996494e+02 5.79309392e+00 2.05701143e+00 ... 6.34586000e-01
  4.78800000e-01 1.11338600e+00]
 [7.09312981e+02 4.63594408e+00 2.23580653e+00 ... 4.87631000e-01
  2.29908000e-01 7.17539000e-01]]
