In [1]:
import os
import random
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler

# --- Set seeds and environment for reproducibility ---
os.environ['PYTHONHASHSEED'] = '42'
os.environ['TF_DETERMINISTIC_OPS'] = '1'  # Force deterministic GPU ops

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# --- Load both CSVs ---
calcs_df = pd.read_csv("ABN_MetroExodus_condor_calcs.csv")
raw_df   = pd.read_csv("ABN_MetroExodus_raw_condor.csv")

# --- Merge side-by-side (same size, same row order) ---
merged_df = pd.concat([calcs_df, raw_df], axis=1)

# --- Filter where TestMarker == 0.1 (from calcs file) ---
filtered_df = merged_df[merged_df["TestMarker"] == 0.1].reset_index(drop=True)

# --- Feature columns ---
features = [
   'GT Effective Freq', 'GPU_Busy', 'GTI_Busy',
    'PKG Reported Temp', 'PKG Reported Power',
    'GTI Rd BW', 'GTI Wr BW', 'GTI Total BW'
]

# --- Prepare feature matrix and scale ---
X = filtered_df[features].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --- Build autoencoder ---
input_dim = X_scaled.shape[1]
encoding_dim = input_dim // 2 if input_dim > 1 else 1

input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation="relu")(input_layer)
decoder = Dense(input_dim, activation="linear")(encoder)

autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mse')

# --- Train autoencoder ---
autoencoder.fit(
    X_scaled, X_scaled,
    epochs=50,
    batch_size=32,
    shuffle=True,
    verbose=1
)

# --- Predict and compute reconstruction errors ---
reconstructions = autoencoder.predict(X_scaled)
diff = X_scaled - reconstructions

# Index of 'GT Effective Freq'
freq_idx = features.index('GT Effective Freq')

# Frequency-specific reconstruction error and difference
freq_recon_error = np.square(diff[:, freq_idx])   # squared errors for freq feature
freq_diff = diff[:, freq_idx]                      # signed difference for dip/spike direction

# Threshold for anomaly (top 10% error)
freq_threshold = np.percentile(freq_recon_error, 85)

# Detect dips and spikes based on freq error and direction
dip_mask = (freq_recon_error > freq_threshold) & (freq_diff < 0)
spike_mask = (freq_recon_error > freq_threshold) & (freq_diff > 0)

# Helper function to get continuous index ranges
def get_ranges(indexes):
    indexes = np.sort(np.unique(indexes))
    ranges = []
    if len(indexes) > 0:
        start = indexes[0]
        for prev, curr in zip(indexes, indexes[1:]):
            if curr != prev + 1:
                ranges.append((start, prev))
                start = curr
        ranges.append((start, indexes[-1]))
    return ranges

dip_indexes = np.where(dip_mask)[0]
spike_indexes = np.where(spike_mask)

dip_ranges = get_ranges(dip_indexes)
spike_ranges = get_ranges(spike_indexes)

# Format ranges as strings for printing
clean_dip_ranges = [f"{start}-{end}" for start, end in dip_ranges]
clean_spike_ranges = [f"{start}-{end}" for start, end in spike_ranges]

print("Dip ranges:", clean_dip_ranges)
print("Spike ranges:", clean_spike_ranges)

# --- Feature influence analysis ---
def feature_influence_per_range(df, feature_list, ranges, target_feature):
    overall_mean = df[feature_list].mean()
    overall_std = df[feature_list].std()

    results = []
    for start, end in ranges:
        subset = df.loc[start:end, feature_list]

        mean_diff = subset.mean() - overall_mean
        z_scores = mean_diff / overall_std

        # exclude the target feature itself
        influencers = z_scores.drop(target_feature)

        # top 3 influencers by absolute z-score
        top_influencers = influencers.abs().sort_values(ascending=False).index[:3]
        top_vals = influencers.loc[top_influencers].round(2)

        results.append({
            "Range": f"{start}-{end}",
            "Top Influencers": ", ".join(
                [f"{feat} ({val:+.2f} z)" for feat, val in zip(top_influencers, top_vals)]
            )
        })

    return pd.DataFrame(results)

dip_influencer_table = feature_influence_per_range(filtered_df, features, dip_ranges, 'GT Effective Freq')
spike_influencer_table = feature_influence_per_range(filtered_df, features, spike_ranges, 'GT Effective Freq')

pd.set_option('display.max_colwidth', 200)
spike_influencer_table

Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 1.2320
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.1133
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.0247
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.9550
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8938
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.8374
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7855
Epoch 8/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7360
Epoch 9/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6887
Epoch 10/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6444
Epoch 11/

Unnamed: 0,Range,Top Influencers
0,0-44,"PKG Reported Temp (-2.74 z), PKG Reported Power (+0.85 z), GTI_Busy (-0.70 z)"
1,46-46,"PKG Reported Temp (-2.10 z), PKG Reported Power (+1.34 z), GTI_Busy (-0.41 z)"
2,48-50,"PKG Reported Temp (-2.44 z), PKG Reported Power (+0.88 z), GTI_Busy (-0.65 z)"
3,52-52,"PKG Reported Temp (-1.77 z), PKG Reported Power (+1.11 z), GTI_Busy (-0.33 z)"
4,54-54,"PKG Reported Temp (-2.10 z), PKG Reported Power (+1.14 z), GTI_Busy (-0.29 z)"
5,57-57,"PKG Reported Temp (-1.43 z), PKG Reported Power (+1.40 z), GTI Rd BW (-0.27 z)"
6,59-59,"PKG Reported Temp (-1.43 z), PKG Reported Power (+1.38 z), GTI Rd BW (-0.23 z)"
7,61-62,"PKG Reported Temp (-1.77 z), PKG Reported Power (+1.20 z), GTI_Busy (-0.36 z)"
8,64-64,"PKG Reported Temp (-1.77 z), PKG Reported Power (+1.18 z), GTI_Busy (-0.54 z)"
9,66-67,"PKG Reported Temp (-1.60 z), PKG Reported Power (+1.28 z), GTI_Busy (-0.35 z)"


In [2]:
merged_df

Unnamed: 0,Delta_Time,Time,Running GPU Ticks,GT Effective Freq,GPU_Busy,GTI_Busy,GT Unslice Request Freq,GT Slice Request Freq,GT Slice Freq,GT Unslice Freq,...,hw.gal.gtlimit_ineff_op,hw.gal.gtlimit_psys_crit,hw.gal.d_pkg_joules,hw.gal.d_pkg_temp,hw.gal.rc6_res,hw.gal.pg_req,hw.gal.pg_status,hw.gal.ia_pstate_limit,hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE,hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE\n
0,0.000000,0.000000,4160274125,0.000000,0.000000,0.000000,70834.75,70834.75,0,0,...,0,0,1645274663,67,55073,40,2,255,0.000000,0.000000
1,0.109792,0.109792,4240711140,732.630929,9.257747,7.118951,6666.80,6666.80,0,0,...,0,0,1645288336,65,55073,40,2,255,0.300303,0.567075
2,0.109787,0.219579,4284619146,399.938116,5.500120,3.320255,6666.80,6666.80,0,0,...,0,0,1645289872,65,55073,40,2,255,0.196828,0.387230
3,0.109332,0.328911,33562439,401.626139,2.068557,1.985510,6666.80,6666.80,0,0,...,0,0,1645291856,65,55073,40,2,255,0.076479,0.228825
4,0.109393,0.438304,77141007,398.367062,2.160304,1.841246,6666.80,6666.80,0,0,...,0,0,1645293712,65,55073,40,2,255,0.003272,0.158602
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3142,0.111845,348.125647,3762929347,399.982637,0.000000,0.000000,6666.80,6666.80,0,0,...,0,0,1815043272,67,55073,40,2,255,0.000000,0.000000
3143,0.112002,348.237649,3807732922,400.024776,0.000000,0.000000,6666.80,6666.80,0,0,...,0,0,1815045256,67,55073,40,2,255,0.000000,0.000000
3144,0.112066,348.349715,3852557349,399.982394,0.241576,0.241576,6666.80,6666.80,0,0,...,0,0,1815046728,67,55073,40,2,255,0.000000,0.000000
3145,0.111942,348.461657,3915083441,558.557932,13.925005,12.365297,70834.75,70834.75,0,0,...,0,0,1815047888,68,55073,40,2,255,0.000161,0.001548
