In [1]:
from setup_env import setup_environment

setup_environment()

In [2]:
import pandas as pd
import numpy as np

from project.config import HIDDEN_CELL

In [3]:
file = "20260124_163324.csv"
df = pd.read_csv(f"data/history/raw/{file}")

df.sample(3)

Unnamed: 0,generation_uid,generation_success,step_number,action_type,action_x,action_y,num_possible_patterns,chosen_pattern_uid,chosen_pattern_is_walkable,cell_0_0_entropy,...,cell_14_10_entropy,cell_14_10_is_walkable,cell_14_11_entropy,cell_14_11_is_walkable,cell_14_12_entropy,cell_14_12_is_walkable,cell_14_13_entropy,cell_14_13_is_walkable,cell_14_14_entropy,cell_14_14_is_walkable
10944,2aeb3884-3de8-4067-bb07-2181825c8f46,True,144,PLACE,2,11,3,1,1,27,...,3,-1,0,1,0,0,0,0,0,0
1504,3ec43f08-2960-4bc3-8800-1a605cf6d77d,True,154,PLACE,4,12,3,12,1,27,...,0,1,0,1,0,1,7,-1,27,-1
220,19739913-12ec-4e39-84e2-d4b6281b9f81,True,220,PLACE,1,14,1,12,1,11,...,0,0,0,0,0,0,0,0,0,0


In [5]:
len(df)

30015

In [6]:
entropy_cols = [col for col in df.columns if col.endswith("_entropy")]
walkability_cols = [col for col in df.columns if col.endswith("_is_walkable")]

print("Grid size (inferred):", int(np.sqrt(len(entropy_cols))))

Grid size (inferred): 15


In [7]:
def extract_grid_features(
    row: pd.Series, entropy_cols: list[str], walkability_cols: list[str]
) -> pd.Series:
    entropies = row[entropy_cols].values
    walkable = row[walkability_cols].values

    entropies = np.array([float(x) for x in entropies])
    walkable = np.array([float(x) for x in walkable])

    entropies[entropies == HIDDEN_CELL] = np.nan
    walkable[walkable == HIDDEN_CELL] = np.nan

    features: dict[str, float] = {}

    features["entropy_mean"] = np.nanmean(entropies)
    features["entropy_std"] = np.nanstd(entropies)
    features["entropy_max"] = np.nanmax(entropies)

    valid_entropies = entropies[~np.isnan(entropies)]
    sorted_entropies = np.sort(valid_entropies)

    if len(sorted_entropies) >= 2:
        features["entropy_min"] = sorted_entropies[1]
        features["entropy_premax"] = sorted_entropies[-2]
    else:
        features["entropy_min"] = np.nan
        features["entropy_premax"] = np.nan

    features["n_cells"] = np.sum(~np.isnan(entropies))
    features["p_collapsed"] = np.nansum(entropies == 0) / features["n_cells"]
    features["p_uncollapsed"] = np.nansum(entropies > 1) / features["n_cells"]

    min_len = min(len(entropies), len(walkable))
    entropies_trimmed = entropies[:min_len]
    walkable_trimmed = walkable[:min_len]

    collapsed_mask = (entropies_trimmed == 0) & (~np.isnan(entropies_trimmed))
    if np.any(collapsed_mask):
        walkable_collapsed = walkable_trimmed[collapsed_mask]
        features["p_walkable_collapsed"] = np.nansum(walkable_collapsed == 1) / np.sum(
            collapsed_mask
        )
    else:
        features["p_walkable_collapsed"] = np.nan

    for k in [1, 2, 3]:
        features[f"p_entropy_{k}"] = np.nansum(entropies == k) / features["n_cells"]

    grid_n = int(np.sqrt(len(entropy_cols)))
    grid = entropies.reshape((grid_n, grid_n))

    grad_y = np.abs(np.diff(grid, axis=0))
    grad_x = np.abs(np.diff(grid, axis=1))

    features["entropy_gradient_mean"] = np.nanmean(
        np.concatenate([grad_x.flatten(), grad_y.flatten()])
    )
    features["entropy_gradient_max"] = np.nanmax(
        np.concatenate([grad_x.flatten(), grad_y.flatten()])
    )

    return pd.Series(features)

In [30]:
N = 10
df["should_continue"] = 1

failed_gids = df[df["generation_success"] == False]["generation_uid"].unique()
for gid in failed_gids:
    idx = df[df["generation_uid"] == gid].tail(N).index
    df.loc[idx, "should_continue"] = 0

In [None]:
feature_df = df.apply(
    lambda row: extract_grid_features(row, entropy_cols, walkability_cols), axis=1
)

In [31]:
feature_df["should_continue"] = df["should_continue"]

In [32]:
feature_df["should_continue"].value_counts()

should_continue
1    29035
0      980
Name: count, dtype: int64

In [37]:
feature_df.to_csv(f"data/training/judge/{file}", index=False)