In [1]:
import os
import random
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler

# --- Set seeds and environment for reproducibility ---
os.environ['PYTHONHASHSEED'] = '42'
os.environ['TF_DETERMINISTIC_OPS'] = '1'  # Force deterministic GPU ops
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# --- Load CSV data ---
calcs_df = pd.read_csv('ABN_MetroExodus_condor_calcs.csv')
raw_df = pd.read_csv('ABN_MetroExodus_raw_condor.csv')

# --- Filter where TestMarker == 0.1 (calcs file is the driver here) ---
filtered_calcs = calcs_df[calcs_df['TestMarker'] == 0.1].reset_index(drop=True)

# --- Align raw_df to same index as filtered_calcs ---
# Assumes both files have same row counts and order; if not, we may need a join key
aligned_raw = raw_df.loc[filtered_calcs.index, [
    'hw.raw.RenderBasic.GPU_MEMORY_BYTE_READ',
    'hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE'
]].reset_index(drop=True)

# --- Merge calcs and raw features ---
filtered_df = pd.concat([filtered_calcs, aligned_raw], axis=1)

# --- Feature columns ---
features = [
    'GT Effective Freq',
    'GPU_Busy',
    'GTI_Busy',
    'PKG Reported Temp',
    'PKG Reported Power',
    'hw.raw.RenderBasic.GPU_MEMORY_BYTE_READ',
    'hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE',
]

# --- Prepare feature matrix and scale ---
X = filtered_df[features].values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --- Build autoencoder ---
input_dim = X_scaled.shape[1]
encoding_dim = input_dim // 2 if input_dim > 1 else 1

input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation="relu")(input_layer)
decoder = Dense(input_dim, activation="linear")(encoder)
autoencoder = Model(inputs=input_layer, outputs=decoder)
autoencoder.compile(optimizer='adam', loss='mse')

# --- Train autoencoder ---
autoencoder.fit(
    X_scaled, X_scaled,
    epochs=50, batch_size=32,
    shuffle=True, verbose=1
)

# --- Predict and compute reconstruction errors ---
reconstructions = autoencoder.predict(X_scaled)
diff = X_scaled - reconstructions

# Index of 'GT Effective Freq'
freq_idx = features.index('GT Effective Freq')

# Frequency-specific reconstruction error and difference
freq_recon_error = np.square(diff[:, freq_idx])  # squared errors for freq feature
freq_diff = diff[:, freq_idx]  # signed difference for dip/spike direction

# Threshold for anomaly (top 20% error)
freq_threshold = np.percentile(freq_recon_error, 80)

# Detect dips and spikes based on freq error and direction
dip_mask = (freq_recon_error > freq_threshold) & (freq_diff < 0)
spike_mask = (freq_recon_error > freq_threshold) & (freq_diff > 0)

# Helper function to get continuous index ranges
def get_ranges(indexes):
    indexes = np.sort(np.unique(indexes))
    ranges = []
    if len(indexes) > 0:
        start = indexes[0]
        for prev, curr in zip(indexes, indexes[1:]):
            if curr != prev + 1:
                ranges.append((start, prev))
                start = curr
        ranges.append((start, indexes[-1]))
    return ranges

dip_indexes = np.where(dip_mask)[0]
spike_indexes = np.where(spike_mask)[0]

dip_ranges = get_ranges(dip_indexes)
spike_ranges = get_ranges(spike_indexes)

# Format ranges as strings for printing
clean_dip_ranges = [f"{start}-{end}" for start, end in dip_ranges]
clean_spike_ranges = [f"{start}-{end}" for start, end in spike_ranges]

print("Dip ranges:", clean_dip_ranges)
print("Spike ranges:", clean_spike_ranges)

# --- Feature influence analysis ---
def feature_influence_per_range(df, feature_list, ranges, target_feature):
    overall_mean = df[feature_list].mean()
    overall_std = df[feature_list].std()
    results = []
    for start, end in ranges:
        subset = df.loc[start:end, feature_list]
        mean_diff = subset.mean() - overall_mean
        z_scores = mean_diff / overall_std

        # exclude the target feature itself
        influencers = z_scores.drop(target_feature)

        # top 3 influencers by absolute z-score
        top_influencers = influencers.abs().sort_values(ascending=False).index[:3]
        top_vals = influencers.loc[top_influencers].round(2)

        results.append({
            "Range": f"{start}-{end}",
            "Top Influencers": ", ".join(
                [f"{feat} ({val:+.2f} z)" for feat, val in zip(top_influencers, top_vals)]
            )
        })
    return pd.DataFrame(results)

dip_influencer_table = feature_influence_per_range(filtered_df, features, dip_ranges, 'GT Effective Freq')
spike_influencer_table = feature_influence_per_range(filtered_df, features, spike_ranges, 'GT Effective Freq')

pd.set_option('display.max_colwidth', 200)
dip_influencer_table

Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 1.0300
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.9404 
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.8770
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.8332
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7985
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7689
Epoch 7/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7429
Epoch 8/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.7197
Epoch 9/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6986 
Epoch 10/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6769
Epoch 1

Unnamed: 0,Range,Top Influencers
0,108-108,"GTI_Busy (+2.21 z), PKG Reported Power (+1.41 z), PKG Reported Temp (-0.77 z)"
1,126-126,"GTI_Busy (+1.62 z), PKG Reported Power (+1.20 z), PKG Reported Temp (-1.10 z)"
2,129-131,"GTI_Busy (+1.71 z), PKG Reported Power (+1.18 z), PKG Reported Temp (-0.99 z)"
3,133-133,"GTI_Busy (+1.75 z), PKG Reported Power (+1.15 z), PKG Reported Temp (-1.10 z)"
4,136-136,"GTI_Busy (+1.57 z), PKG Reported Temp (-1.10 z), PKG Reported Power (+1.06 z)"
5,138-138,"GTI_Busy (+1.54 z), PKG Reported Temp (-1.10 z), PKG Reported Power (+1.09 z)"
6,142-142,"GTI_Busy (+1.64 z), PKG Reported Temp (-1.10 z), PKG Reported Power (+1.05 z)"
7,144-144,"GTI_Busy (+1.68 z), PKG Reported Temp (-1.10 z), PKG Reported Power (+1.05 z)"
8,147-147,"GTI_Busy (+1.88 z), PKG Reported Power (+1.02 z), PKG Reported Temp (-0.77 z)"
9,149-149,"GTI_Busy (+1.76 z), PKG Reported Power (+1.04 z), PKG Reported Temp (-0.77 z)"


In [2]:
spike_influencer_table

Unnamed: 0,Range,Top Influencers
0,0-73,"PKG Reported Temp (-2.34 z), PKG Reported Power (+0.99 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (+0.72 z)"
1,620-620,"PKG Reported Temp (+0.91 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (-0.59 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_READ (-0.45 z)"
2,635-635,"PKG Reported Temp (+0.91 z), GTI_Busy (-0.80 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (-0.48 z)"
3,641-644,"GTI_Busy (-0.84 z), PKG Reported Temp (+0.74 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (-0.56 z)"
4,650-650,"PKG Reported Temp (+0.91 z), GTI_Busy (-0.83 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (-0.58 z)"
5,684-684,"GPU_Busy (-0.99 z), PKG Reported Temp (+0.91 z), PKG Reported Power (-0.63 z)"
6,688-688,"GTI_Busy (-1.12 z), PKG Reported Power (-0.78 z), PKG Reported Temp (+0.57 z)"
7,695-695,"PKG Reported Temp (+1.24 z), GTI_Busy (-1.06 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (-0.59 z)"
8,702-702,"PKG Reported Temp (+1.24 z), GTI_Busy (-1.06 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (-0.59 z)"
9,706-706,"GTI_Busy (-1.21 z), PKG Reported Temp (+0.91 z), hw.raw.RenderBasic.GPU_MEMORY_BYTE_WRITE (-0.59 z)"
