In [1]:
from setup_env import setup_environment

setup_environment()

In [2]:
import pandas as pd
import numpy as np

from project.config import HIDDEN_CELL

In [19]:
file = "20260124_171024"
df = pd.read_csv(f"data/history/{file}.csv")

df.sample(3)

Unnamed: 0,generation_uid,generation_success,step_number,action_type,action_x,action_y,num_possible_patterns,chosen_pattern_uid,chosen_pattern_is_walkable,cell_0_0_entropy,...,cell_14_10_entropy,cell_14_10_is_walkable,cell_14_11_entropy,cell_14_11_is_walkable,cell_14_12_entropy,cell_14_12_is_walkable,cell_14_13_entropy,cell_14_13_is_walkable,cell_14_14_entropy,cell_14_14_is_walkable
23027,f8e9416c-02f4-47fc-9d72-18cdd6354594,False,30,PLACE,3,4,2,8,0,27,...,27,-1,27,-1,27,-1,27,-1,27,-1
16326,57b49aac-58f2-402f-8aa9-17fde7363cc2,True,126,PLACE,13,10,2,19,0,27,...,2,-1,1,-1,7,-1,14,-1,27,-1
10488,1d3b4bd0-5030-4f7f-95af-97c0079b7b50,True,138,PLACE,10,10,3,1,1,27,...,27,-1,27,-1,27,-1,27,-1,27,-1


In [4]:
len(df)

29085

In [20]:
entropy_cols = [col for col in df.columns if col.endswith("_entropy")]
walkability_cols = [col for col in df.columns if col.endswith("_is_walkable")]

print("Grid size (inferred):", int(np.sqrt(len(entropy_cols))))

Grid size (inferred): 15


In [21]:
def extract_grid_features(
    row: pd.Series, entropy_cols: list[str], walkability_cols: list[str]
) -> pd.Series:
    entropies = row[entropy_cols].values
    walkable = row[walkability_cols].values

    entropies = np.array([float(x) for x in entropies])
    walkable = np.array([float(x) for x in walkable])

    entropies[entropies == HIDDEN_CELL] = np.nan
    walkable[walkable == HIDDEN_CELL] = np.nan

    features: dict[str, float] = {}

    features["entropy_mean"] = np.nanmean(entropies)
    features["entropy_std"] = np.nanstd(entropies)

    n_cells = int(np.sqrt(len(entropy_cols)))
    features["p_collapsed"] = np.nansum(entropies == 0) / n_cells
    features["p_uncollapsed"] = np.nansum(entropies > 1) / n_cells

    min_len = min(len(entropies), len(walkable))
    entropies_trimmed = entropies[:min_len]
    walkable_trimmed = walkable[:min_len]

    collapsed_mask = (entropies_trimmed == 0) & (~np.isnan(entropies_trimmed))
    if np.any(collapsed_mask):
        walkable_collapsed = walkable_trimmed[collapsed_mask]
        features["p_walkable_collapsed"] = np.nansum(walkable_collapsed == 1) / np.sum(
            collapsed_mask
        )
    else:
        features["p_walkable_collapsed"] = np.nan

    for k in [1, 2, 3]:
        features[f"p_entropy_{k}"] = np.nansum(entropies == k) / n_cells

    grid_n = int(np.sqrt(len(entropy_cols)))
    grid = entropies.reshape((grid_n, grid_n))

    grad_y = np.abs(np.diff(grid, axis=0))
    grad_x = np.abs(np.diff(grid, axis=1))

    features["entropy_gradient_mean"] = np.nanmean(
        np.concatenate([grad_x.flatten(), grad_y.flatten()])
    )
    features["entropy_gradient_max"] = np.nanmax(
        np.concatenate([grad_x.flatten(), grad_y.flatten()])
    )

    return pd.Series(features)

In [22]:
N = 20
df["should_continue"] = 1

failed_gids = df[df["generation_success"] == False]["generation_uid"].unique()
for gid in failed_gids:
    idx = df[df["generation_uid"] == gid].tail(N).index
    df.loc[idx, "should_continue"] = 0

In [23]:
feature_df = df.apply(
    lambda row: extract_grid_features(row, entropy_cols, walkability_cols), axis=1
)

In [24]:
feature_df["should_continue"] = df["should_continue"]

In [25]:
feature_df.to_csv(f"data/training/judge/{file}_20.csv", index=False)