In [31]:
import pandas as pd
from sympy import symbols
from sympy.logic.boolalg import And, Or, Not, simplify_logic, BooleanTrue, BooleanFalse

A, R, E = symbols('A R E')


def dnf_expr_from_8bit(vector_str: str):
    """
    Return a simplified SymPy DNF *expression* for an 8-bit vector.
    Bit order: DB_mean, DB_nice, DG_mean, DG_nice, CB_mean, CB_nice, CG_mean, CG_nice
    Index mapping: i = 4*A + 2*R + E  with A=0/1(Defect/Cooperate), R=0/1(Bad/Good), E=0/1(Mean/Nice)
    """
    s = ''.join(ch for ch in str(vector_str).strip() if ch in '01')
    if len(s) != 8:
        return BooleanFalse  # or raise

    bits = [int(b) for b in s]
    terms = []
    for i, b in enumerate(bits):
        if b == 1:
            a = (i >> 2) & 1
            r = (i >> 1) & 1
            e =  i       & 1
            terms.append(And(A if a else Not(A),
                             R if r else Not(R),
                             E if e else Not(E)))
    if not terms:
        return BooleanFalse
    return simplify_logic(Or(*terms), form='dnf', force=True)

def count_literals_expr(expr) -> int:
    """
    Count literal occurrences in a SymPy boolean expression in DNF.
    A literal is a Symbol or its negation (~Symbol).
    """
    if expr is BooleanFalse:
        return 0
    if expr is BooleanTrue:   # shouldn't happen with DNF of 3 vars, but safe
        return 1

    count = 0

    def recurse(node):
        nonlocal count
        # literal = Symbol or Not(Symbol)
        if node.is_Symbol:
            count += 1
        elif isinstance(node, Not) and node.args and node.args[0].is_Symbol:
            count += 1
        else:
            for arg in getattr(node, 'args', ()):
                recurse(arg)

    recurse(expr)
    return count

# ---------- config ----------
INPUT_CSV  = "data/all_norms_16variants_L.csv"          # adjust path if needed
OUTPUT_CSV = "data/all_norms_16variants_L_with_dnf.csv" # output path
# ----------------------------

# ---- Load, compute, save ----
df = pd.read_csv(INPUT_CSV, dtype={"4bit_orig": str, "8bit_vector": str})
df["8bit_vector"] = df["8bit_vector"].astype(str).str.replace(r"[^01]", "", regex=True).str.zfill(8)
df["4bit_orig"]   = df["4bit_orig"].astype(str).str.replace(r"[^01]", "", regex=True).str.zfill(4)
df = df[df["8bit_vector"].str.len() == 8]

# Compute DNF expressions
dnf_exprs = df["8bit_vector"].apply(dnf_expr_from_8bit)

# Add columns
df["DNF"] = dnf_exprs.astype(str)                 # human-readable DNF
df["DNF_literals"] = dnf_exprs.apply(count_literals_expr)

df.to_csv(OUTPUT_CSV, index=False)
print(f"Saved with DNF + literal counts -> {OUTPUT_CSV}")

Saved with DNF + literal counts -> data/all_norms_16variants_L_with_dnf.csv
