In [1]:
include("../src/SymmetryMiner.jl")
using .SymmetryMiner
using DataFrames, Plots, StatsBase
using DataFrames, CSV
using Printf
using IterTools: product
using Dates
import .SymmetryMiner: compute_symmetries

In [2]:
results_dict = Dict{String, Vector{Int}}()
batch = DataFrame(BinaryKey=String[], MatrixData=Vector{Int}[])
counter = 0
save_interval = 50;  # Save every 50 entries

In [3]:
# Dictionary to count occurrences
symmetry_counts = Dict{String, Int}()

# Loop over all possible 2x2 matrices with values 0–3
for a in 0:3, b in 0:3, c in 0:3, d in 0:3
    mat = [a b; c d]
    sym = compute_symmetries(mat)  # Returns Vector{Bool}, like [true, false, true, ...]
    
    # Convert to binary string like "10100100"
    key = join(Int.(sym), "")
    
    # Accumulate the count
    symmetry_counts[key] = get(symmetry_counts, key, 0) + 1
end

# Convert to DataFrame
df = DataFrame(BinaryKey = collect(keys(symmetry_counts)),
               Count = collect(values(symmetry_counts)))

# Save as CSV
CSV.write("../data/symmetry_counts_2x2.csv", df);

In [4]:
function format_seconds(s::Real)
    total = round(Int, s)
    h = div(total, 3600)
    m = div(mod(total, 3600), 60)
    s = mod(total, 60)
    return @sprintf("%02d:%02d:%02d", h, m, s)
end;

In [5]:
valrange = 0:8
n = 3
batch_size = 100_000
output_file = "../data/symmetry_counts_3x3.csv"

total_combinations = length(valrange)^n^2  # 9^9 = 387,420,489

# Progress
t0 = time()
counter = 0

batch_counts = Dict{String, Int}()
global_counts = Dict{String, Int}()

# Remove previous CSV
isfile(output_file) && rm(output_file)

# General iterator
all_combos = product(fill(valrange, n^2)...)

for tup in all_combos
    mat = reshape(collect(tup), n, n)
    sym = compute_symmetries(mat)
    key = join(Int.(sym), "")

    batch_counts[key] = get(batch_counts, key, 0) + 1
    global_counts[key] = get(global_counts, key, 0) + 1

    counter += 1

    # Show progress every 1%
    if counter % 3_874_205 == 0 || counter == 1
        tf = time()
        elapsed = tf - t0
        eta = (elapsed / counter) * (total_combinations - counter)
        perc = counter / total_combinations * 100
        et_str  = format_seconds(elapsed)
        eta_str = format_seconds(eta)
        println("Elapsed Time: $et_str | ETA: $eta_str | $counter/$total_combinations ($(round(perc, digits=2))%)")
    end

    # Batch saving
    if counter % batch_size == 0
        df = DataFrame(BinaryKey = collect(keys(batch_counts)),
                       Count = collect(values(batch_counts)))
        CSV.write(output_file, df; append=false, writeheader=true)
    end
end

# Save what's left
if !isempty(batch_counts)
    df = DataFrame(BinaryKey = collect(keys(batch_counts)),
                   Count = collect(values(batch_counts)))
    CSV.write(output_file, df; append=false, writeheader=true)
end

println("✅ Process completed. Total: $counter combinations.")

Elapsed Time: 00:00:00 | ETA: 21523:21:56 | 1/387420489 (0.0%)
Elapsed Time: 00:00:07 | ETA: 00:11:51 | 3874205/387420489 (1.0%)
Elapsed Time: 00:00:13 | ETA: 00:10:33 | 7748410/387420489 (2.0%)
Elapsed Time: 00:00:19 | ETA: 00:10:05 | 11622615/387420489 (3.0%)
Elapsed Time: 00:00:25 | ETA: 00:09:52 | 15496820/387420489 (4.0%)
Elapsed Time: 00:00:31 | ETA: 00:09:41 | 19371025/387420489 (5.0%)
Elapsed Time: 00:00:36 | ETA: 00:09:28 | 23245230/387420489 (6.0%)
Elapsed Time: 00:00:42 | ETA: 00:09:20 | 27119435/387420489 (7.0%)
Elapsed Time: 00:00:48 | ETA: 00:09:12 | 30993640/387420489 (8.0%)
Elapsed Time: 00:00:54 | ETA: 00:09:05 | 34867845/387420489 (9.0%)
Elapsed Time: 00:01:00 | ETA: 00:08:59 | 38742050/387420489 (10.0%)
Elapsed Time: 00:01:06 | ETA: 00:08:53 | 42616255/387420489 (11.0%)
Elapsed Time: 00:01:12 | ETA: 00:08:46 | 46490460/387420489 (12.0%)
Elapsed Time: 00:01:18 | ETA: 00:08:39 | 50364665/387420489 (13.0%)
Elapsed Time: 00:01:23 | ETA: 00:08:33 | 54238870/387420489 (14.

In [6]:
using Combinatorics

"""
    find_minimal_feature_subset(code_matrix, max_features)

Given an array of binary vectors (Bool or Int), finds the minimal combination of columns
that generates a unique key for each row.
"""
function find_minimal_feature_subset(code_matrix::Vector{Vector{Int}}, max_features::Int=10)
    num_features = length(code_matrix[1])
    for r in 1:max_features
        for combo in combinations(1:num_features, r)
            projections = Set{Tuple{Vararg{Int}}}()
            for row in code_matrix
                push!(projections, Tuple(row[i] for i in combo))
            end
            if length(projections) == length(code_matrix)
                return combo
            end
        end
    end
    return nothing  # No subset found (should not happen)
end;

In [19]:
rows = CSV.read("../data/symmetry_counts_3x3.csv", DataFrame).BinaryKey

code_matrix = [parse.(Int, collect(lpad(string(x), 6, '0'))) for x in rows]

minimal = find_minimal_feature_subset(code_matrix, 6)
println("Minimal subset of bits that distinguishes all classes: $(minimal)")

Minimal subset of bits that distinguishes all classes: [1, 2, 3, 4, 6]
