In [166]:
from polars import DataFrame, Series, read_csv, col, struct, lit, max as plmax, when, concat
from itertools import product, starmap, cycle, chain, repeat
from functools import reduce, cmp_to_key, partial
from operator import itemgetter, ne
from collections import OrderedDict
from typing import Any


def prepare_table(
    df: DataFrame, input_var_cols: list[str], output_var_cols: list[str]
) -> DataFrame:
    output_var_mappings: list[tuple[int]] = []
    for col_name in output_var_cols:
        output_var_mappings.append(
            tuple(sorted(df.select(col(col_name).unique()).to_dict()[col_name]))
        )
    output_map_dict: dict[tuple[int], int] = dict(
        starmap(lambda i, prod: (prod, i), enumerate(product(*output_var_mappings)))
    )

    def output_series_to_int(s: Series) -> int:
        return output_map_dict[tuple(s.values())]

    input_map_dict: dict[str, dict[Any, int]] = dict(
        map(
            lambda col_name: (
                col_name,
                dict(
                    starmap(
                        lambda i, x: (x, i),
                        enumerate(
                            sorted(
                                df.select(col(col_name).unique()).to_dict()[col_name]
                            )
                        ),
                    )
                ),
            ),
            input_var_cols,
        )
    )

    return DataFrame(
        dict(
            (
                *map(
                    lambda col_name: (
                        col_name,
                        df.get_column(col_name).replace(input_map_dict[col_name]),
                    ),
                    input_var_cols,
                ),
                (
                    "output",
                    df.with_columns(
                        struct(output_var_cols)
                        .map_elements(output_series_to_int)
                        .alias("output")
                    ).get_column("output"),
                ),
                # df.select(output_var_cols).replace(output_map_dict)),
                ("filtered", map(lambda x: False, range(df.height))),
                # ('order', map(lambda x: 0, range(df.height))),
            )
        )
    ).with_row_count("row_nr")
    # df.with_columns(
    #     struct(output_var_cols).map_elements(series_to_int).alias("output"),
    #     lit(False).alias("filtered"),
    #     lit(0).alias("order"),
    # )

In [167]:
# def main():
table_path: str = "./sample_solution_table.csv"
input_var_cols: list[str] = list(map(lambda i: f"i{i}", range(5)))
output_var_cols: list[str] = list(map(lambda i: f"o{i}", range(1)))
old_df: DataFrame = read_csv(table_path)
df: DataFrame = prepare_table(old_df, input_var_cols, output_var_cols)
print(df)

shape: (9, 8)
┌────────┬─────┬─────┬─────┬─────┬─────┬────────┬──────────┐
│ row_nr ┆ i0  ┆ i1  ┆ i2  ┆ i3  ┆ i4  ┆ output ┆ filtered │
│ ---    ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---    ┆ ---      │
│ u32    ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64    ┆ bool     │
╞════════╪═════╪═════╪═════╪═════╪═════╪════════╪══════════╡
│ 0      ┆ 0   ┆ 0   ┆ 1   ┆ 0   ┆ 2   ┆ 2      ┆ false    │
│ 1      ┆ 1   ┆ 0   ┆ 0   ┆ 0   ┆ 2   ┆ 3      ┆ false    │
│ 2      ┆ 2   ┆ 0   ┆ 1   ┆ 1   ┆ 1   ┆ 0      ┆ false    │
│ 3      ┆ 2   ┆ 1   ┆ 2   ┆ 2   ┆ 1   ┆ 1      ┆ false    │
│ 4      ┆ 2   ┆ 1   ┆ 2   ┆ 2   ┆ 2   ┆ 1      ┆ false    │
│ 5      ┆ 1   ┆ 0   ┆ 0   ┆ 1   ┆ 0   ┆ 1      ┆ false    │
│ 6      ┆ 1   ┆ 0   ┆ 0   ┆ 1   ┆ 0   ┆ 0      ┆ false    │
│ 7      ┆ 2   ┆ 0   ┆ 2   ┆ 0   ┆ 1   ┆ 1      ┆ false    │
│ 8      ┆ 2   ┆ 0   ┆ 2   ┆ 0   ┆ 1   ┆ 2      ┆ false    │
└────────┴─────┴─────┴─────┴─────┴─────┴────────┴──────────┘


In [168]:
def select_indices(df: DataFrame, indices: list[int]):
    return df.filter(col("row_nr").is_in(indices)).limit(len(indices))


def select_index(df: DataFrame, index: int):
    return df.filter(col("row_nr") == index).limit(1)

def select_ordered_indices(df: DataFrame, indices: list[int]):
        return concat(map(partial(select_index, df), indices))

def indiscernibility_partition(df: DataFrame, input_var_cols: list[str]):
    # Algorithm 1. Computing U/IND(A) algorithm
    # Step 2
    current_orders = [list(range(df.height))]
    maxs = df.select(input_var_cols).max().row(0)
    for col_name, mx in zip(input_var_cols, maxs):
        print("---\nprocessing", col_name)
        print('current_orders', current_orders)
        new_orders: list[int] = []
        for order in current_orders:
            print('old_order', order)
            local_series = select_indices(df, order)
            local_order = list(range(local_series.height))
            # 2.2
            value_counts: list[int] = list(
                OrderedDict(
                    chain(
                        zip(range(mx), cycle([0])),
                        local_series.select(col(col_name)).get_column(col_name).value_counts().rows()
                    )
                ).values()
            )
            # 2.3
            # print('value_counts before: ', value_counts)
            # print(f'vc[0] {value_counts[0]} vc[1:] {value_counts[1:]}')
            value_counts = reduce(
                lambda acc, x: [*acc, acc[-1] + x], value_counts[1:], [value_counts[0]]
            )
            print('value_counts: ', value_counts)
            temp_order = local_order.copy()
            # assert value_counts == [1, 4, 9], value_counts
            reversed_order = list(reversed(local_order))
            print('reversed_order', reversed_order)
            print('reversed global order', list(map(lambda i: order[i], reversed_order)))
            print('select_ordered_indices', select_ordered_indices(df, map(lambda i: order[i], reversed_order)))
            for j, (item,) in zip(reversed_order, select_ordered_indices(df, map(lambda i: order[i], reversed_order)).select(col_name).rows()):
                print(f"item: {item} j:{j}")
                local_order[value_counts[item] - 1] = temp_order[j]
                value_counts[item] -= 1
            order = list(map(lambda i: order[i], local_order))
            # Step 3
            # Группировка по неразличимости
            def reducer(acc: tuple, x: list):
                # Проверка неразличимости
                def not_indiscernible(a, b):
                    result = any(
                        starmap(
                            ne,
                            map(
                                lambda f: map(f, (a, b)),
                                map(itemgetter, input_var_cols),
                            ),
                        )
                    )
                    return result
                if len(acc) == 0 or not_indiscernible(acc[-1][-1][1], x[1]):
                    return (*acc, [x])
                else:
                    return (*acc[:-1], [*acc[-1], x])

            print('starting reducing')
            new_orders += list(
                map(
                    lambda group: list(map(itemgetter(0), group)),
                    reduce(
                        reducer, map(lambda i: (i, df.row(i, named=True)), order), []
                    ),
                )
            )
            print("new_orders: ", new_orders)
        current_orders = new_orders
        print("current_orders", current_orders)
    return current_orders


def simplified_decision_table(
    df: DataFrame, indiscernibility_parts: list[list[int]]
) -> None:
    print(indiscernibility_parts)
    max_output = df.select(plmax("output"))
    for group in indiscernibility_parts:
        if len(group) > 1:
            rows = list(map(lambda i: df.row(i, named=True), group))
            # Удаление лишних строк
            df = (
                df.lazy()
                .with_columns(
                    when(col("row_nr").is_in(group[1:]))
                    .then(True)
                    .otherwise(col("filtered"))
                    .alias("filtered")  # originally: blocked
                )
                .collect()
            )
            # Если выходных значений несколько
            if len(set(map(itemgetter("output"), rows))) > 1:
                # print(df)
                df = (
                    df.lazy()
                    .with_columns(
                        when(col("row_nr") == group[0])
                        .then(max_output + 1)
                        .otherwise(col("output"))
                        .alias("output")
                    )
                    .collect()
                )
    return df


def positive_region(df: DataFrame, indiscernibility_parts: list[list[int]]):
    temp_value = 0
    for part in indiscernibility_parts:
        if (
            select_indices(df, part)
            .filter(col("filtered") == False)
            .group_by("output")
            .all()
            .height
            <= 1
        ):
            temp_value += 1
    return temp_value


def core_attributes(df: DataFrame, input_var_cols: list[tuple[int]]):
    core_cols = []
    indiscernibility_parts = indiscernibility_partition(df, input_var_cols)
    simple_table = simplified_decision_table(df, indiscernibility_parts)
    common_pr = positive_region(simple_table, indiscernibility_parts)
    for col_name in input_var_cols:


In [169]:
indiscernibility_parts = indiscernibility_partition(df, input_var_cols)
print("indiscernibility_parts", indiscernibility_parts)
simple_table = simplified_decision_table(df, indiscernibility_parts)
print(simple_table)
pcd = positive_region(simple_table, indiscernibility_parts)
print(pcd)
# if __name__ == "__main__":
#     main()

---
processing i0
current_orders [[0, 1, 2, 3, 4, 5, 6, 7, 8]]
old_order [0, 1, 2, 3, 4, 5, 6, 7, 8]
value_counts:  [1, 4, 9]
reversed_order [8, 7, 6, 5, 4, 3, 2, 1, 0]
reversed global order [8, 7, 6, 5, 4, 3, 2, 1, 0]
select_ordered_indices shape: (9, 8)
┌────────┬─────┬─────┬─────┬─────┬─────┬────────┬──────────┐
│ row_nr ┆ i0  ┆ i1  ┆ i2  ┆ i3  ┆ i4  ┆ output ┆ filtered │
│ ---    ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ ---    ┆ ---      │
│ u32    ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64    ┆ bool     │
╞════════╪═════╪═════╪═════╪═════╪═════╪════════╪══════════╡
│ 8      ┆ 2   ┆ 0   ┆ 2   ┆ 0   ┆ 1   ┆ 2      ┆ false    │
│ 7      ┆ 2   ┆ 0   ┆ 2   ┆ 0   ┆ 1   ┆ 1      ┆ false    │
│ 6      ┆ 1   ┆ 0   ┆ 0   ┆ 1   ┆ 0   ┆ 0      ┆ false    │
│ 5      ┆ 1   ┆ 0   ┆ 0   ┆ 1   ┆ 0   ┆ 1      ┆ false    │
│ 4      ┆ 2   ┆ 1   ┆ 2   ┆ 2   ┆ 2   ┆ 1      ┆ false    │
│ 3      ┆ 2   ┆ 1   ┆ 2   ┆ 2   ┆ 1   ┆ 1      ┆ false    │
│ 2      ┆ 2   ┆ 0   ┆ 1   ┆ 1   ┆ 1   ┆ 0      ┆ false    │
│ 1      ┆ 1