In [23]:
!pip install numpy




In [5]:
import numpy as np
import pickle

# Function to extract bursts
def extract_bursts(timestamps, directions):
    bursts = []
    n = len(timestamps)
    dirs = np.array(directions)
    sign = np.where(dirs > 0, 1, -1)
    i = 0
    while i < n:
        start_sign = sign[i]
        start_t = timestamps[i]
        j = i
        while j + 1 < n and sign[j + 1] == start_sign:
            j += 1
        end_t = timestamps[j]
        bursts.append((start_sign, start_t, end_t))
        i = j + 1
    return bursts

# Function to extract burst features for one trace
def extract_burst_features_for_trace(timestamps, directions, max_first_bursts=10):
    bursts = extract_bursts(timestamps, directions)
    feat = [len(bursts)]  # number of bursts
    for sgn, start_t, end_t in bursts[:max_first_bursts]:
        feat.append(end_t - start_t)
    while len(feat) < 1 + max_first_bursts:
        feat.append(0.0)
    return np.array(feat)

# Flatten nested traces
def flatten_trace(trace):
    flat = []
    for t in trace:
        if isinstance(t, list):
            flat.extend(t)
        else:
            flat.append(t)
    return flat


In [6]:
import pickle
import numpy as np  # if you also need NumPy

# Load dataset
with open("/content/mon_standard.pkl", "rb") as f:
    data = pickle.load(f)

print("Loaded dataset!")


features_list = []

for key, trace in data.items():
    flat_trace = flatten_trace(trace)

    time_seq = []
    dir_seq = []

    for c in flat_trace:
        dr = 1 if c > 0 else -1
        time_seq.append(abs(c))
        dir_seq.append(dr * 512)

    # Extract burst features
    burst_feat = extract_burst_features_for_trace(time_seq, dir_seq, max_first_bursts=10)

    # Extract in/out features
    in_count = sum(1 for x in dir_seq if x < 0)
    out_count = sum(1 for x in dir_seq if x > 0)
    total = len(dir_seq)
    in_frac = in_count / total if total > 0 else 0
    out_frac = out_count / total if total > 0 else 0

    # Combine features
    features_list.append(list(burst_feat) + [in_count, out_count, in_frac, out_frac])

# Convert to NumPy array
features_array = np.array(features_list)
print("Features shape:", features_array.shape)
print("First 5 feature rows:\n", features_array[:5])


Loaded dataset!
Features shape: (950, 15)
First 5 feature rows:
 [[2.48400000e+03 1.40000000e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.30000000e-01
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.83940000e+04
  1.94000000e+03 9.04593292e-01 9.54067080e-02]
 [5.93400000e+03 9.30000000e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e-01
  0.00000000e+00 0.00000000e+00 0.00000000e+00 5.11120000e+04
  2.10730000e+04 7.08069544e-01 2.91930456e-01]
 [8.55100000e+03 1.50000000e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.10000000e-01
  0.00000000e+00 0.00000000e+00 0.00000000e+00 8.59320000e+04
  8.36400000e+03 9.11300585e-01 8.86994146e-02]
 [3.92200000e+03 1.00000000e-01 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 1.00000000e-01
  0.00000000e+00 0.00000000e+00 0.00000000e+00 4.08970000e+04
  3.07100000e+03 9.30153748e-01 6.98462518e-02]