In [3]:
import pandas as pd

# Load your CSV
df = pd.read_csv("ABN_AlanWake2_2.results.0.dx12-2160p-ultra-NewGameGameplay_._test_wishGranted.csv")  # make sure the file path is correct

# Filter for TestMarker == 0.1
df_filtered = df.loc[df["Test Markers"] == 0.1].reset_index(drop=True)

# Rolling window of 3 (two previous + current)
window_size = 3
spike_indexes = []
dip_indexes = []

for i in range(2, len(df_filtered)-10):  # start at index 2, because we need two previous values
    # Take two previous values + current
    window_vals = df_filtered.loc[i-2:i, "GT Effective Freq"]
    mean_val = window_vals.mean()
    std_val = df_filtered["GT Effective Freq"].std()

    curr_val = df_filtered.loc[i, "GT Effective Freq"]

    # Spike / Dip threshold (adjust multiplier as needed)
    if curr_val > mean_val + 0.1*std_val:
        spike_indexes.append(i)
    elif curr_val < mean_val - 0.1*std_val:
        dip_indexes.append(i)

print("Spike indices in filtered DataFrame:", spike_indexes)
print("Dip indices in filtered DataFrame:", dip_indexes)

Spike indices in filtered DataFrame: [218, 219, 280, 281]
Dip indices in filtered DataFrame: [217, 220, 279]


In [4]:
import pandas as pd

# Load Cyberpunk data
df = pd.read_csv("ABN_Cyberpunk2077_2.results.0.dx12-1440p-ultra-GuiBenchmark_._test_wishGranted.csv")
df_filtered = df.loc[df["Test Markers"] == 0.1].reset_index(drop=True)

# Load raw_condor data
raw_condor = pd.read_csv("ABN_Cyberpunk2077_2.results.0.dx12-1440p-ultra-GuiBenchmark_._test_wishGranted_raw.csv")
raw_condor_filtered = raw_condor.loc[raw_condor["Test Markers"] == 0.1].reset_index(drop=True)

# Select the two GPU memory features from raw_condor
gpu_mem_features = [
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Merge them into df_filtered (align by index since both are reset)
df_filtered = pd.concat([df_filtered, raw_condor_filtered[gpu_mem_features]], axis=1)

# Rolling window of 3 (two previous + current)
spike_indexes = []
dip_indexes = []

global_std = df_filtered["GT Effective Freq"].std()

for i in range(2, len(df_filtered)-10):  
    # Take two previous values + current
    window_vals = df_filtered.loc[i-2:i, "GT Effective Freq"]
    mean_val = window_vals.mean()
    
    curr_val = df_filtered.loc[i, "GT Effective Freq"]

    if curr_val > mean_val + 1.5 * global_std:
        spike_indexes.append(i)
    elif curr_val < mean_val - 1.5 * global_std:
        dip_indexes.append(i)

# Features to analyze for influence
features = [
    "GPU_Busy",
    "GTI_Busy",
    "PKG Reported Temp",
    "PKG Reported Power",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Compute global std for these features
feature_means = df_filtered[features].mean()
feature_stds = df_filtered[features].std()

def get_top_influencers(row):
    """Return top 3 influencers with z-scores."""
    z_scores = {}
    for f in features:
        z = (row[f] - feature_means[f]) / feature_stds[f]
        z_scores[f] = z
    # Sort by absolute z-score
    top3 = sorted(z_scores.items(), key=lambda x: abs(x[1]), reverse=True)[:3]
    return ", ".join([f"{f} ({z:+.2f} z)" for f, z in top3])

# Build spike and dip tables
spike_table = pd.DataFrame({
    "spike": spike_indexes,
    "influencers": [get_top_influencers(df_filtered.loc[i]) for i in spike_indexes]
})

dip_table = pd.DataFrame({
    "dip": dip_indexes,
    "influencers": [get_top_influencers(df_filtered.loc[i]) for i in dip_indexes]
})

print("Spike Table:\n", spike_table.head(10))
print("\nDip Table:\n", dip_table.head(10))

Spike Table:
    spike                                        influencers
0      5  PKG Reported Temp (-1.75 z), PKG Reported Powe...
1      8  PKG Reported Temp (-2.67 z), PKG Reported Powe...
2    233  GPU_Busy (-0.87 z), hw.md.RenderBasic.GPU_MEMO...
3    330  GPU_Busy (-1.92 z), hw.md.RenderBasic.GPU_MEMO...
4    421  GPU_Busy (+1.87 z), hw.md.RenderBasic.GPU_MEMO...
5    897  GPU_Busy (-2.04 z), hw.md.RenderBasic.GPU_MEMO...
6    898  GPU_Busy (-1.09 z), hw.md.RenderBasic.GPU_MEMO...
7    925  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (-...
8   1036  GPU_Busy (+1.70 z), hw.md.RenderBasic.GPU_MEMO...

Dip Table:
    dip                                        influencers
0    6  PKG Reported Temp (-2.67 z), PKG Reported Powe...
1  115  PKG Reported Temp (-1.75 z), PKG Reported Powe...
2  151  GTI_Busy (-0.61 z), GPU_Busy (-0.60 z), hw.md....
3  234  PKG Reported Power (+1.09 z), PKG Reported Tem...
4  323  PKG Reported Power (+1.31 z), GPU_Busy (-1.28 ...
5  324  GPU_Busy (-2.02 z

In [5]:
import pandas as pd
import numpy as np

# --- Load Cyberpunk data ---
df = pd.read_csv("ABN_Cyberpunk2077_2.results.0.dx12-1440p-ultra-GuiBenchmark_._test_wishGranted.csv")
df_filtered = df.loc[df["Test Markers"] == 0.1].reset_index(drop=True)

# --- Load raw_condor data ---
raw_condor = pd.read_csv("ABN_Cyberpunk2077_2.results.0.dx12-1440p-ultra-GuiBenchmark_._test_wishGranted_raw.csv")
raw_condor_filtered = raw_condor.loc[raw_condor["Test Markers"] == 0.1].reset_index(drop=True)

# --- Select GPU memory features from raw_condor ---
gpu_mem_features = [
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Merge into df_filtered
df_filtered = pd.concat([df_filtered, raw_condor_filtered[gpu_mem_features]], axis=1)

# --- Features to analyze for influence ---
features = [
    "GPU_Busy",
    "GTI_Busy",
    "PKG Reported Temp",
    "PKG Reported Power",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Global std for frequency
global_std = df_filtered["GT Effective Freq"].std()


# --- Detect spikes and dips ---
def detect_spikes_dips(spike_weight, dip_weight):
    spike_idxs, dip_idxs = [], []
    for i in range(2, len(df_filtered) - 10):
        window_vals = df_filtered.loc[i - 2:i, "GT Effective Freq"]
        mean_val = window_vals.mean()
        curr_val = df_filtered.loc[i, "GT Effective Freq"]

        if curr_val > mean_val + spike_weight * global_std:
            spike_idxs.append(i)
        elif curr_val < mean_val - dip_weight * global_std:
            dip_idxs.append(i)
    return spike_idxs, dip_idxs


# --- Binary search for weight given a target ---
def find_weight_for_target(target_count, mode="spike", max_iter=50):
    low, high = 0.1, 5.0
    for _ in range(max_iter):
        mid = (low + high) / 2
        spikes, dips = detect_spikes_dips(mid, mid)
        count = len(spikes) if mode == "spike" else len(dips)

        if count > target_count:
            low = mid  # too sensitive → raise weight
        else:
            high = mid  # not sensitive enough → lower weight

    # Check which bound is closer to target
    spikes_low, dips_low = detect_spikes_dips(low, low)
    count_low = len(spikes_low) if mode == "spike" else len(dips_low)

    spikes_high, dips_high = detect_spikes_dips(high, high)
    count_high = len(spikes_high) if mode == "spike" else len(dips_high)

    if abs(count_low - target_count) <= abs(count_high - target_count):
        return low, count_low
    else:
        return high, count_high


# --- Run search for both spikes and dips with the same target ---
target_count = 30

spike_weight, actual_spikes = find_weight_for_target(target_count, mode="spike")
dip_weight, actual_dips = find_weight_for_target(target_count, mode="dip")

print(f"Target={target_count}, Spikes={actual_spikes}, Weight={spike_weight:.6f}")
print(f"Target={target_count}, Dips={actual_dips}, Weight={dip_weight:.6f}")

# Final detection
spike_indexes, dip_indexes = detect_spikes_dips(spike_weight, dip_weight)


# --- Compute influencer stats ---
feature_means = df_filtered[features].mean()
feature_stds = df_filtered[features].std()


def get_top_influencers(row):
    """Return top 3 influencers with z-scores."""
    z_scores = {}
    for f in features:
        z = (row[f] - feature_means[f]) / feature_stds[f]
        z_scores[f] = z
    top3 = sorted(z_scores.items(), key=lambda x: abs(x[1]), reverse=True)[:3]
    return ", ".join([f"{f} ({z:+.2f} z)" for f, z in top3])


# --- Build spike and dip tables ---
spike_table = pd.DataFrame({
    "spike": spike_indexes,
    "influencers": [get_top_influencers(df_filtered.loc[i]) for i in spike_indexes]
})

dip_table = pd.DataFrame({
    "dip": dip_indexes,
    "influencers": [get_top_influencers(df_filtered.loc[i]) for i in dip_indexes]
})

print("\nSpike Table:\n", spike_table.head(10))
print("\nDip Table:\n", dip_table.head(10))


Target=30, Spikes=30, Weight=0.987949
Target=30, Dips=30, Weight=1.163745

Spike Table:
    spike                                        influencers
0      5  PKG Reported Temp (-1.75 z), PKG Reported Powe...
1      8  PKG Reported Temp (-2.67 z), PKG Reported Powe...
2    114  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (+...
3    150  PKG Reported Temp (-0.82 z), GTI_Busy (-0.70 z...
4    233  GPU_Busy (-0.87 z), hw.md.RenderBasic.GPU_MEMO...
5    236  PKG Reported Power (+1.03 z), hw.md.RenderBasi...
6    330  GPU_Busy (-1.92 z), hw.md.RenderBasic.GPU_MEMO...
7    347  GPU_Busy (-1.28 z), hw.md.RenderBasic.GPU_MEMO...
8    357  GTI_Busy (+2.65 z), hw.md.RenderBasic.GPU_MEMO...
9    361  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (-...

Dip Table:
    dip                                        influencers
0    6  PKG Reported Temp (-2.67 z), PKG Reported Powe...
1  115  PKG Reported Temp (-1.75 z), PKG Reported Powe...
2  151  GTI_Busy (-0.61 z), GPU_Busy (-0.60 z), hw.md....
3  234

In [6]:
import pandas as pd
import numpy as np

# --- Load Cyberpunk data ---
df = pd.read_csv("ABN_Cyberpunk2077_2.results.0.dx12-1440p-ultra-GuiBenchmark_._test_wishGranted.csv")
df_filtered = df.loc[df["Test Markers"] == 0.1].reset_index(drop=True)

# --- Load raw_condor data ---
raw_condor = pd.read_csv("ABN_Cyberpunk2077_2.results.0.dx12-1440p-ultra-GuiBenchmark_._test_wishGranted_raw.csv")
raw_condor_filtered = raw_condor.loc[raw_condor["Test Markers"] == 0.1].reset_index(drop=True)

# --- Merge GPU memory features ---
gpu_mem_features = [
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]
df_filtered = pd.concat([df_filtered, raw_condor_filtered[gpu_mem_features]], axis=1)

# --- Features to analyze for influence ---
features = [
    "GPU_Busy",
    "GTI_Busy",
    "PKG Reported Temp",
    "PKG Reported Power",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Global std for frequency
global_std = df_filtered["GT Effective Freq"].std()

# --- Detect spikes and dips using moving average + global std ---
def detect_spikes_dips(spike_weight, dip_weight):
    spike_idxs, dip_idxs = [], []
    for i in range(len(df_filtered)):
        # Adjust rolling window for start of dataframe
        start_idx = max(0, i-2)
        window_vals = df_filtered.loc[start_idx:i, "GT Effective Freq"]
        mean_val = window_vals.mean()
        curr_val = df_filtered.loc[i, "GT Effective Freq"]

        if curr_val > mean_val + spike_weight * global_std:
            spike_idxs.append(i)
        elif curr_val < mean_val - dip_weight * global_std:
            dip_idxs.append(i)
    return spike_idxs, dip_idxs

# --- Binary search for weight given a target ---
def find_weight_for_target(target_count, mode="spike", max_iter=50):
    low, high = 0.1, 5.0
    for _ in range(max_iter):
        mid = (low + high) / 2
        spikes, dips = detect_spikes_dips(mid, mid)
        count = len(spikes) if mode == "spike" else len(dips)

        if count > target_count:
            low = mid
        else:
            high = mid

    spikes_low, dips_low = detect_spikes_dips(low, low)
    count_low = len(spikes_low) if mode == "spike" else len(dips_low)
    spikes_high, dips_high = detect_spikes_dips(high, high)
    count_high = len(spikes_high) if mode == "spike" else len(dips_high)

    return low if abs(count_low - target_count) <= abs(count_high - target_count) else high

# --- Run search for both spikes and dips with same target ---
target_count = 30
spike_weight = find_weight_for_target(target_count, "spike")
dip_weight = find_weight_for_target(target_count, "dip")
spike_indexes, dip_indexes = detect_spikes_dips(spike_weight, dip_weight)

# --- Compute moving-average z-scores for influencers ---
def get_top_influencers_moving_avg(idx):
    start_idx = max(0, idx-2)
    window = df_filtered.loc[start_idx:idx, features]
    mean_window = window.mean()
    global_means = df_filtered[features].mean()
    global_stds = df_filtered[features].std()

    z_scores = {}
    for f in features:
        z = (mean_window[f] - global_means[f]) / global_stds[f]
        z_scores[f] = z

    top3 = sorted(z_scores.items(), key=lambda x: abs(x[1]), reverse=True)[:3]
    return ", ".join([f"{f} ({z:+.2f} z)" for f, z in top3])

# --- Build spike and dip tables ---
spike_table = pd.DataFrame({
    "spike": spike_indexes,
    "influencers": [get_top_influencers_moving_avg(i) for i in spike_indexes]
})

dip_table = pd.DataFrame({
    "dip": dip_indexes,
    "influencers": [get_top_influencers_moving_avg(i) for i in dip_indexes]
})

print("\nSpike Table:\n", spike_table.head(10))
print("\nDip Table:\n", dip_table.head(10))



Spike Table:
    spike                                        influencers
0      5  PKG Reported Temp (-2.36 z), PKG Reported Powe...
1      8  PKG Reported Temp (-2.36 z), PKG Reported Powe...
2    114  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (+...
3    150  PKG Reported Temp (-0.82 z), GTI_Busy (-0.68 z...
4    233  GPU_Busy (-0.96 z), PKG Reported Temp (-0.52 z...
5    236  PKG Reported Power (+0.74 z), GPU_Busy (-0.70 ...
6    330  GPU_Busy (-1.69 z), GTI_Busy (-0.41 z), PKG Re...
7    347  GPU_Busy (-1.38 z), hw.md.RenderBasic.GPU_MEMO...
8    357  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (-...
9    361  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (-...

Dip Table:
    dip                                        influencers
0    6  PKG Reported Temp (-2.05 z), PKG Reported Powe...
1  115  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (+...
2  151  GTI_Busy (-0.65 z), PKG Reported Temp (-0.52 z...
3  234  GPU_Busy (-0.81 z), PKG Reported Power (+0.65 ...
4  247  GPU_Busy (-1.0

In [7]:
# --- Example: assume df_filtered is already defined and has your features --- 
features = [
    "GPU_Busy",
    "GTI_Busy",
    "PKG Reported Temp",
    "PKG Reported Power",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Function to compute z-scores for a specific index using moving average of past 2 + current
def compute_moving_avg_zscores(idx, df_filtered, features):
    """
    Returns a dict of z-scores for all features at a given row index.
    """
    window = df_filtered.loc[idx-2:idx, features]
    mean_window = window.mean()
    global_means = df_filtered[features].mean()
    global_stds = df_filtered[features].std()
    
    z_scores = {}
    for f in features:
        z_scores[f] = (mean_window[f] - global_means[f]) / global_stds[f]
    return z_scores

# Function to generate one-hot positive table (only positive labels: 0 or 1)
def generate_one_hot_table(df_table, df_filtered, features):
    """
    df_table: spike_table or dip_table with indices as 'spike' or 'dip'
    Returns a DataFrame with one-hot columns for positive z-scores for each feature
    """
    one_hot_rows = []
    
    # Determine the column in df_table that has the row index
    row_col = df_table.columns[0]  # should be 'spike' or 'dip'
    
    for idx in df_table[row_col]:
        z_scores = compute_moving_avg_zscores(idx, df_filtered, features)
        row_dict = {"row": idx}
        for f, z in z_scores.items():
            row_dict[f"positive_{f.lower().replace(' ', '_')}"] = int(z > 0)
        one_hot_rows.append(row_dict)
    
    return pd.DataFrame(one_hot_rows)

# --- Example usage ---
spike_one_hot = generate_one_hot_table(spike_table, df_filtered, features)
dip_one_hot = generate_one_hot_table(dip_table, df_filtered, features)


In [8]:
spike_one_hot

Unnamed: 0,row,positive_gpu_busy,positive_gti_busy,positive_pkg_reported_temp,positive_pkg_reported_power,positive_hw.md.renderbasic.gpu_memory_byte_read_rate,positive_hw.md.renderbasic.gpu_memory_byte_write_rate
0,5,1,0,0,0,1,1
1,8,1,0,0,0,1,1
2,114,1,0,0,0,1,1
3,150,1,0,0,0,1,1
4,233,0,0,0,1,0,1
5,236,0,0,1,1,0,1
6,330,0,0,1,1,0,0
7,347,0,0,1,1,0,0
8,357,0,1,1,1,0,0
9,361,0,0,1,1,0,0


In [9]:
dip_one_hot

Unnamed: 0,row,positive_gpu_busy,positive_gti_busy,positive_pkg_reported_temp,positive_pkg_reported_power,positive_hw.md.renderbasic.gpu_memory_byte_read_rate,positive_hw.md.renderbasic.gpu_memory_byte_write_rate
0,6,1,0,0,0,1,1
1,115,1,0,0,0,1,1
2,151,1,0,0,0,1,1
3,234,0,0,1,1,0,1
4,247,0,1,1,1,1,1
5,298,0,0,1,0,1,1
6,323,0,0,1,1,0,0
7,324,0,0,1,1,0,0
8,340,0,0,1,1,0,0
9,351,0,1,1,1,0,0


In [10]:
# Function to generate one-hot positive table with an extra "signature" column
def generate_one_hot_table_with_signatures(df_table, df_filtered, features):
    """
    df_table: spike_table or dip_table with indices as 'spike' or 'dip'
    Returns a DataFrame with one-hot columns + a signature string for each row.
    """
    one_hot_rows = []
    
    row_col = df_table.columns[0]  # 'spike' or 'dip'
    
    for idx in df_table[row_col]:
        z_scores = compute_moving_avg_zscores(idx, df_filtered, features)
        row_dict = {"row": idx}
        signature_bits = []
        
        for f in features:
            val = int(z_scores[f] > 0)
            col_name = f"positive_{f.lower().replace(' ', '_')}"
            row_dict[col_name] = val
            signature_bits.append(str(val))   # collect signature bit
            
        # Join into a string like '101011'
        row_dict["signature"] = "".join(signature_bits)
        
        one_hot_rows.append(row_dict)
    
    return pd.DataFrame(one_hot_rows)

# --- Example usage ---
spike_one_hot = generate_one_hot_table_with_signatures(spike_table, df_filtered, features)
dip_one_hot   = generate_one_hot_table_with_signatures(dip_table, df_filtered, features)

# If you just want the signatures as a Series:
spike_signatures = spike_one_hot.set_index("row")["signature"]
dip_signatures   = dip_one_hot.set_index("row")["signature"]


In [11]:
spike_one_hot[["signature"]].value_counts()

signature
001100       9
011100       5
100011       5
111011       3
011001       1
001101       1
000101       1
011110       1
100010       1
011111       1
111000       1
111100       1
Name: count, dtype: int64

In [12]:
dip_one_hot[["signature"]].value_counts()

signature
001100       12
100011        5
011100        3
111011        3
001011        1
001101        1
011111        1
101011        1
110000        1
111010        1
111100        1
Name: count, dtype: int64

In [13]:
import pandas as pd
import numpy as np

# --- Load Cyberpunk data ---
df = pd.read_csv("ABN_Furmark2_3_0_1.results.0.ogl-1440p-high-Benchmark_ (version 1).xlsb.csv")
df_filtered = df.loc[df["Test Markers"] == 0.1].reset_index(drop=True)

# --- Load raw_condor data ---
raw_condor = pd.read_csv("ABN_Furmark2_3_0_1.results.0.ogl-1440p-high-Benchmark_ (version 1) (version 1).xlsb_raw_condor.csv")
raw_condor_filtered = raw_condor.loc[raw_condor["Test Markers"] == 0.1].reset_index(drop=True)

# --- Merge GPU memory features ---
gpu_mem_features = [
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]
df_filtered = pd.concat([df_filtered, raw_condor_filtered[gpu_mem_features]], axis=1)

# --- Features to analyze for influence ---
features = [
    "GPU_Busy",
    "GTI_Busy",
    "PKG Reported Temp",
    "PKG Reported Power",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Global std for frequency
global_std = df_filtered["GT Effective Freq"].std()

# --- Detect spikes and dips using moving average + global std ---
def detect_spikes_dips(spike_weight, dip_weight):
    spike_idxs, dip_idxs = [], []
    for i in range(len(df_filtered)):
        # Adjust rolling window for start of dataframe
        start_idx = max(0, i-2)
        window_vals = df_filtered.loc[start_idx:i, "GT Effective Freq"]
        mean_val = window_vals.mean()
        curr_val = df_filtered.loc[i, "GT Effective Freq"]

        if curr_val > mean_val + spike_weight * global_std:
            spike_idxs.append(i)
        elif curr_val < mean_val - dip_weight * global_std:
            dip_idxs.append(i)
    return spike_idxs, dip_idxs

# --- Binary search for weight given a target ---
def find_weight_for_target(target_count, mode="spike", max_iter=50):
    low, high = 0.1, 5.0
    for _ in range(max_iter):
        mid = (low + high) / 2
        spikes, dips = detect_spikes_dips(mid, mid)
        count = len(spikes) if mode == "spike" else len(dips)

        if count > target_count:
            low = mid
        else:
            high = mid

    spikes_low, dips_low = detect_spikes_dips(low, low)
    count_low = len(spikes_low) if mode == "spike" else len(dips_low)
    spikes_high, dips_high = detect_spikes_dips(high, high)
    count_high = len(spikes_high) if mode == "spike" else len(dips_high)

    return low if abs(count_low - target_count) <= abs(count_high - target_count) else high

# --- Run search for both spikes and dips with same target ---
target_count = 30
spike_weight = find_weight_for_target(target_count, "spike")
dip_weight = find_weight_for_target(target_count, "dip")
spike_indexes, dip_indexes = detect_spikes_dips(spike_weight, dip_weight)

# --- Compute moving-average z-scores for influencers ---
def get_top_influencers_moving_avg(idx):
    start_idx = max(0, idx-2)
    window = df_filtered.loc[start_idx:idx, features]
    mean_window = window.mean()
    global_means = df_filtered[features].mean()
    global_stds = df_filtered[features].std()

    z_scores = {}
    for f in features:
        z = (mean_window[f] - global_means[f]) / global_stds[f]
        z_scores[f] = z

    top3 = sorted(z_scores.items(), key=lambda x: abs(x[1]), reverse=True)[:3]
    return ", ".join([f"{f} ({z:+.2f} z)" for f, z in top3])

# --- Build spike and dip tables ---
spike_table = pd.DataFrame({
    "spike": spike_indexes,
    "influencers": [get_top_influencers_moving_avg(i) for i in spike_indexes]
})

dip_table = pd.DataFrame({
    "dip": dip_indexes,
    "influencers": [get_top_influencers_moving_avg(i) for i in dip_indexes]
})

print("\nSpike Table:\n", spike_table.head(10))
print("\nDip Table:\n", dip_table.head(10))

# --- Example: assume df_filtered is already defined and has your features --- 
features = [
    "GPU_Busy",
    "GTI_Busy",
    "PKG Reported Temp",
    "PKG Reported Power",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE",
    "hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE"
]

# Function to compute z-scores for a specific index using moving average of past 2 + current
def compute_moving_avg_zscores(idx, df_filtered, features):
    """
    Returns a dict of z-scores for all features at a given row index.
    """
    window = df_filtered.loc[idx-2:idx, features]
    mean_window = window.mean()
    global_means = df_filtered[features].mean()
    global_stds = df_filtered[features].std()
    
    z_scores = {}
    for f in features:
        z_scores[f] = (mean_window[f] - global_means[f]) / global_stds[f]
    return z_scores

# Function to generate one-hot positive table (only positive labels: 0 or 1)
def generate_one_hot_table(df_table, df_filtered, features):
    """
    df_table: spike_table or dip_table with indices as 'spike' or 'dip'
    Returns a DataFrame with one-hot columns for positive z-scores for each feature
    """
    one_hot_rows = []
    
    # Determine the column in df_table that has the row index
    row_col = df_table.columns[0]  # should be 'spike' or 'dip'
    
    for idx in df_table[row_col]:
        z_scores = compute_moving_avg_zscores(idx, df_filtered, features)
        row_dict = {"row": idx}
        for f, z in z_scores.items():
            row_dict[f"positive_{f.lower().replace(' ', '_')}"] = int(z > 0)
        one_hot_rows.append(row_dict)
    
    return pd.DataFrame(one_hot_rows)

# --- Example usage ---
spike_one_hot = generate_one_hot_table(spike_table, df_filtered, features)
dip_one_hot = generate_one_hot_table(dip_table, df_filtered, features)




Spike Table:
    spike                                        influencers
0     24  hw.md.RenderBasic.GPU_MEMORY_BYTE_READ_RATE (+...
1     28  GPU_Busy (+0.69 z), GTI_Busy (+0.54 z), hw.md....
2     36  GTI_Busy (-0.93 z), hw.md.RenderBasic.GPU_MEMO...
3     40  hw.md.RenderBasic.GPU_MEMORY_BYTE_WRITE_RATE (...
4     80  GPU_Busy (+1.14 z), PKG Reported Temp (+1.07 z...
5     84  PKG Reported Temp (+1.52 z), hw.md.RenderBasic...
6     96  PKG Reported Temp (+1.52 z), PKG Reported Powe...
7    100  PKG Reported Temp (+1.52 z), GPU_Busy (+0.55 z...
8    108  PKG Reported Temp (+1.97 z), GPU_Busy (+1.49 z...
9    116  PKG Reported Temp (+1.97 z), PKG Reported Powe...

Dip Table:
    dip                                        influencers
0   14  PKG Reported Temp (-1.61 z), PKG Reported Powe...
1   29  GPU_Busy (+0.75 z), hw.md.RenderBasic.GPU_MEMO...
2   38  GTI_Busy (-1.32 z), hw.md.RenderBasic.GPU_MEMO...
3   82  PKG Reported Temp (+1.07 z), GTI_Busy (+0.52 z...
4   85  PKG Reported T

In [14]:
spike_one_hot

Unnamed: 0,row,positive_gpu_busy,positive_gti_busy,positive_pkg_reported_temp,positive_pkg_reported_power,positive_hw.md.renderbasic.gpu_memory_byte_read_rate,positive_hw.md.renderbasic.gpu_memory_byte_write_rate
0,24,1,0,0,0,1,1
1,28,1,1,0,0,1,0
2,36,0,0,0,0,1,0
3,40,1,0,1,0,0,0
4,80,1,1,1,1,1,1
5,84,0,1,1,1,0,0
6,96,0,0,1,1,1,1
7,100,1,1,1,1,1,1
8,108,1,1,1,1,1,1
9,116,1,1,1,0,0,1


In [15]:
# Function to generate one-hot positive table with an extra "signature" column
def generate_one_hot_table_with_signatures(df_table, df_filtered, features):
    """
    df_table: spike_table or dip_table with indices as 'spike' or 'dip'
    Returns a DataFrame with one-hot columns + a signature string for each row.
    """
    one_hot_rows = []
    
    row_col = df_table.columns[0]  # 'spike' or 'dip'
    
    for idx in df_table[row_col]:
        z_scores = compute_moving_avg_zscores(idx, df_filtered, features)
        row_dict = {"row": idx}
        signature_bits = []
        
        for f in features:
            val = int(z_scores[f] > 0)
            col_name = f"positive_{f.lower().replace(' ', '_')}"
            row_dict[col_name] = val
            signature_bits.append(str(val))   # collect signature bit
            
        # Join into a string like '101011'
        row_dict["signature"] = "".join(signature_bits)
        
        one_hot_rows.append(row_dict)
    
    return pd.DataFrame(one_hot_rows)

# --- Example usage ---
spike_one_hot = generate_one_hot_table_with_signatures(spike_table, df_filtered, features)
dip_one_hot   = generate_one_hot_table_with_signatures(dip_table, df_filtered, features)

# If you just want the signatures as a Series:
spike_signatures = spike_one_hot.set_index("row")["signature"]
dip_signatures   = dip_one_hot.set_index("row")["signature"]


In [16]:
spike_one_hot[["signature"]].value_counts()

signature
111111       3
010000       2
000110       2
011000       2
101111       2
110111       2
000010       1
001111       1
001000       1
000111       1
000100       1
000000       1
011110       1
010111       1
011100       1
100110       1
100100       1
100011       1
101000       1
110010       1
111001       1
111011       1
111110       1
Name: count, dtype: int64

In [17]:
dip_one_hot[["signature"]].value_counts()

signature
010001       5
011100       4
110111       3
000100       2
000110       2
111110       2
010011       1
010100       1
010000       1
001000       1
110000       1
010110       1
110100       1
110010       1
111001       1
111011       1
111100       1
111111       1
Name: count, dtype: int64

In [18]:
import pandas as pd
hfs_frame_fortnite = pd.read_csv("FortniteGroupedByFrameData.csv")
hfs_frame_fortnite.head()

Unnamed: 0,sw.intercept.QueryHW.data.Triggers.FrameNumber,FrameRate,Mean(HWContext),Mean(GPUBusyTicksDelta),Mean(SliceFreq),Mean(UnsliceFreq),Mean(hw.gpu.eu_count),Mean(hw.gpu.eu_threads),Mean(hw.gpu.info.timestamp_freq),Mean(hw.gpu.symbols.GpuTimestampFrequency),...,Mean(hw.md.BMG_AMX_20_WW30.XVE_INST_EXECUTED_XMX_INT8),Mean(hw.raw.BMG_AMX_20_WW30.XVE_THREADS_OCCUPANCY_ALL_CYCLES),Mean(hw.raw.BMG_AMX_20_WW30.XVE_THREADS_OCCUPANCY_ASYNC_CS_CYCLES),Mean(hw.raw.BMG_AMX_20_WW30.XVE_THREADS_OCCUPANCY_CS_CYCLES),Mean(hw.raw.BMG_AMX_20_WW30.XVE_THREADS_OCCUPANCY_PS_CYCLES),Mean(hw.raw.BMG_AMX_20_WW30.XVE_THREADS_OCCUPANCY_VS_CYCLES),Mean(ReportReason),Mean(Context),Min(GPUTimestamp),Max(GPUTimestamp)
0,0,33.783099,2298.377488,0,0,2800,160,8,19200000,19200000,...,0,3778818.0,226923.87646,2393526.0,1047945.0,6535.691146,17.69046,577593600.0,41055511961,41055807967
1,1,19.594396,21354.403292,0,0,2800,160,8,19200000,19200000,...,0,2592270.0,114644.95233,1783075.0,625584.5,20832.099108,30.123457,1008018000.0,41055807968,41056318318
2,2,16.555798,21169.647553,0,0,2800,160,8,19200000,19200000,...,0,2624828.0,101348.24647,1883479.0,575837.9,18506.952267,27.062444,902067500.0,41056318320,41056922338
3,3,17.142808,21350.912962,0,0,2800,160,8,19200000,19200000,...,0,2797864.0,106489.45085,2045858.0,583299.3,18312.552599,26.661553,888207600.0,41056922340,41057505675
4,4,19.94909,21476.364664,0,0,2800,160,8,19200000,19200000,...,0,2719027.0,121865.03852,1857695.0,666641.6,20866.926502,29.472438,985500900.0,41057505677,41058006953
