In [None]:
import pandas as pd
import numpy as np
import onnxruntime as ort
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Params
INPUT_PATH = "./progress_added2.csv"
MODEL_PATH = "./progress_lstm_model.onnx"
EMBED_DIM = 384

# Load and prepare data
df = pd.read_csv(INPUT_PATH)
df = df[df["text"].notnull() & df["text"].str.strip().astype(bool)]
df["embedding"] = df["embedding"].apply(lambda x: np.array(eval(x), dtype=np.float32))

Sort rows by time (not grouped, just one file)
df = df.sort_values("startsec")

# Build progressive sequences
X = []
sequence = []
for _, row in df.iterrows():
    sequence.append(row["embedding"])
    X.append(sequence.copy())  # Important: use .copy()

# Pad
X_padded = pad_sequences(X, dtype="float32", padding="post")

# Load ONNX model
session = ort.InferenceSession("progress_lstm_model.onnx", providers=["CPUExecutionProvider"])
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

# Predict
outputs = session.run([output_name], {input_name: X_padded})
y_pred = np.argmax(outputs[0], axis=1)

df["predicted_progress_class"] = y_pred
print(df[["start", "end", "text", "predicted_progress_class"]])