In [None]:
from dataclasses import dataclass
from functools import cached_property
import hexaly.optimizer
import opt_note.scsp as scsp

In [None]:
import marimo as mo
import nbformat

# WMM_HEXALY モデルを max 演算で考えてみる

元の `WMM_HEXALY` モデルは `WMM` をパラメータ化しただけだったため,
一部のパラメータを動かしただけでは解が更新されにくかった. Majority Merge の部分を「最も大きい Weight を持つ文字を採用する」にすれば Weight の変換に解が追従しやすくなるのではないか.

In [None]:
@dataclass
class Model:
    instance: list[str]
    solution: str | None = None
    best_bound: float = 0.0

    @cached_property
    def chars(self) -> str:
        return "".join(sorted(list(set("".join(self.instance)))))

    @cached_property
    def indices_1d_to_2d(self) -> list[tuple[int, int]]:
        ans: list[tuple[int, int]] = []
        counter = 0
        for s in self.instance:
            ans.append((counter, counter + len(s)))
            counter += len(s)
        return ans

    def priorities_1d_to_2d[T](self, priorities1d: list[T]) -> list[list[T]]:
        return [priorities1d[start:end] for start, end in self.indices_1d_to_2d]


    def wmm(self, priorities2d: list[list[int]]) -> str:
        max_len = len(self.instance) * max(len(s) for s in self.instance)
        indices = tuple(0 for _ in self.instance)
        solution = ""

        # while not all(idx == len(s) for idx, s in zip(indices, self.instance)):
        for _ in range(max_len):
            if all(idx == len(s) for idx, s in zip(indices, self.instance)):
                break

            counts = [
                max(
                    [0]
                    + [
                        priorities2d[sidx][idx]
                        for sidx, (idx, s) in enumerate(zip(indices, self.instance))
                        if idx < len(s) and s[idx] == c
                    ]
                )
                for c in self.chars
            ]
            next_char = self.chars[counts.index(max(counts))]

            solution += next_char
            indices = tuple(
                idx + 1 if idx < len(s) and s[idx] == next_char else idx
                for idx, s in zip(indices, self.instance)
            )

        return solution

    def objective(self, priorities1d: list[int]) -> int:
        priorities2d = self.priorities_1d_to_2d(
            [priorities1d[i] for i in range(len(priorities1d))]
        )
        solution = self.wmm(priorities2d)
        return len(solution)

    def solve(
        self,
        time_limit: int | None = 60,
        log: bool = False,
        initial_weights: list[list[int]] | None = None,
        *args,
        **kwargs
    ) -> str | None:
        # 重みの最大値は初期重みが与えられた場合は初期重みの最大値の 2 倍,
        # 初期重みが与えられなかった場合は文字種数とする.
        max_weight = (
            max(max(w, len(s)) for s, ws in zip(self.instance, initial_weights) for w in ws)
            if initial_weights
            else len(self.chars)
        )

        with hexaly.optimizer.HexalyOptimizer() as hxoptimizer:
            hxmodel = hxoptimizer.model
            hxparam = hxoptimizer.param

            priorities1d = [
                hxmodel.int(1, max_weight) for s in self.instance for cidx, _ in enumerate(s)
            ]

            func = hxmodel.create_int_external_function(self.objective)
            func.external_context.lower_bound = 0
            func.external_context.upper_bound = sum(len(s) for s in self.instance)

            indices_1d_to_2d: list[tuple[int, int]] = []
            counter = 0
            for s in self.instance:
                indices_1d_to_2d.append((counter, counter + len(s)))
                counter += len(s)

            hxmodel.minimize(func(*priorities1d))
            hxmodel.close()

            if initial_weights:
                priorities2d = self.priorities_1d_to_2d(priorities1d)
                for ps, ws in zip(priorities2d, initial_weights):
                    for p, w in zip(ps, ws):
                        p.set_value(w)

            if time_limit is not None:
                hxparam.time_limit = time_limit
            hxparam.verbosity = 1 if log else 0
            hxoptimizer.solve()

            solution = hxoptimizer.solution
            status = solution.status
            if status in {
                hexaly.optimizer.HxSolutionStatus.OPTIMAL,
                hexaly.optimizer.HxSolutionStatus.FEASIBLE,
            }:
                priorities1d_value: list[int] = [p.value for p in priorities1d]
                priorities2d_value = self.priorities_1d_to_2d(priorities1d_value)
                self.solution = self.wmm(priorities2d_value)

        return self.solution

In [None]:
scsp.util.bench(Model, example_filename="uniform_q26n004k015-025.txt")

--- Condition (with 25 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf

--- Solution (of length 62) ---
 Sol: ultcikgenykcosouhmvajiqfozpxplnhtqgbxzrddxbcvxsuqpvissbxrvnngf
str1: --t--kg-n-k----uhm--------px--nhtqg-xz------vx-----is---------
str2: ----i-------o-------jiqfo----ln----bx----x-cv-suqpvissbx-----f
str3: ul-ci---ny-coso---v-----ozp-pl-------------------p------------
str4: ----i-ge----------va-----z--------gb--rdd-bc--s---v-----rvnngf

example file name: 'uniform_q26n004k015-025.txt'
best objective: 62
best bound: 0.0
wall time: 59.396025s


In [None]:
scsp.util.bench(Model, example_filename="uniform_q26n008k015-025.txt")

--- Condition (with 26 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf
str5: pyplrzxucpmqvgtdfuivcdsbo
str6: pbdevdcvdpfzsmsbroqvbbh
str7: enbczfjtvxerzbrvigple
str8: rxwxqkrdrlctodtmprpxwd

--- Solution (of length 105) ---
 Sol: ipbdtkgevnkulahmzgbrdcvinyzdbcopfjtisvxnhtqfolgervonbnwxxczvxsuqckpmsbroqvigtdfprlsuivctodtmpesbobhrpxwfd
str1: ----tkg--nku--hm---------------p------xnhtq---g--------x--zvx-------------i-------s----------------------
str2: i-----------------------------o--j-i------qfol-----nb--xxc-v-suq--p------vi-------s-----------sb-----x-f-
str3: -----------ul--------c-iny---co-----s-------o----vo-------z-------p------------p-l----------p------------
str4: i-----gev----a--zgbrd------dbc------sv----------rv-n-n---------------------g--f--------------------------
str5: -p-----------------------y-----p-------------l--r---------z-x-u-c-pm----qv-gtdf----uivc--d----sbo--------
str6: -pbd---ev-----------dcv

In [None]:
scsp.util.bench(Model, example_filename="uniform_q26n016k015-025.txt")

--- Condition (with 26 chars) ---
str01: tkgnkuhmpxnhtqgxzvxis
str02: iojiqfolnbxxcvsuqpvissbxf
str03: ulcinycosovozpplp
str04: igevazgbrddbcsvrvnngf
str05: pyplrzxucpmqvgtdfuivcdsbo
str06: pbdevdcvdpfzsmsbroqvbbh
str07: enbczfjtvxerzbrvigple
str08: rxwxqkrdrlctodtmprpxwd
str09: kkqafigqjwokkskrblg
str10: lxxpabivbvzkozzvd
str11: krifsavncdqwhzc
str12: qaxudgqvqcewbfgijowwy
str13: rsxqjnfpadiusiqbezhkohmg
str14: iwshvhcomiuvddm
str15: htxxqjzqbctbakn
str16: xusfcfzpeecvwantfmgqzu

--- Solution (of length 153) ---
  Sol: kriwshtxxuokqjaznfpbadigeqfjwolkcinbkycoplrzxxusfhcpfaqkrdbigjzmqpxvgnctbvxedqwrazhkcotvodpfzsmiuiqgspbrviddbgplecstvrwaodqvsbbhntnfmgqpijowrpxzvxiswfyud
str01: ------t----k-----------g----------n-k---------u--h-------------m-px--n------------h---t-----------qg------------------------------------------xzvxis-----
str02: --i-------o--j--------i--qf--ol---nb--------xx----c----------------v-------------------------s--u-q--p--vi--------s---------sb----------------x------f--

In [None]:
scsp.util.bench(Model, example_filename="uniform_q05n010k010-010.txt")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad

--- Solution (of length 28) ---
  Sol: baedcbacdbeeecdabdcebadeaced
str01: ---dcb-c-----cd-b-c------ce-
str02: b--d----dbeee------eb-d-----
str03: ----c-acd-ee-c-----eb--e----
str04: -aed----d-----d--d-eb-d----d
str05: -a--cb----ee-c-ab-ce--------
str06: b----ba--be-----bdc-ba------
str07: b----ba---e----a---ebad-a---
str08: --e-------eeec--bd--b--e--e-
str09: ----c--cd-ee--da-dc---d-----
str10: b--d--a--b----d-b--e-a--a--d

example file name: 'uniform_q05n010k010-010.txt'
best objective: 28
best bound: 0.0
wall time: 59.974849s


In [None]:
scsp.util.bench(Model, example_filename="uniform_q05n050k010-010.txt")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad
str11: ededaaaeaa
str12: aaeaabeeac
str13: eaabcaccdb
str14: bdeeadeade
str15: caedadeeed
str16: ebcadbabbe
str17: ddceeabdea
str18: dabcddeaec
str19: aadceedaab
str20: aeecceeeaa
str21: bbdaecaade
str22: dacedaedab
str23: aaeabbbbce
str24: dedbcbcaab
str25: dbdaaebbcb
str26: debedbebac
str27: ceebcdcbde
str28: dbedaadaab
str29: cccdcbebdc
str30: aeeacdbcbd
str31: dacbeacccd
str32: ecebccdbdb
str33: ddbbcedabb
str34: aaeabaaeba
str35: ecbbcaadcd
str36: debccecdbc
str37: daacbaeebc
str38: adabeaacce
str39: daecdbacaa
str40: dacbbdcedc
str41: dedbeebbde
str42: cdadcdcdaa
str43: ceedcbaeed
str44: ceaecaaaca
str45: dcccebbbad
str46: baeeaebbde
str47: dbdebaccdb
str48: ebcbeedaea
str49: aeeebbdbca
str50: dbdabcecbb

--- Solution (of length 36) ---
  Sol: adcebdabcedabceadbcaebdceba

In [None]:
scsp.util.bench(Model, example_filename="nucleotide_n010k010.txt")

--- Condition (with 4 chars) ---
str01: ATGGGATACG
str02: ATACCTTCCC
str03: CACGAATTGA
str04: TAAAATCTGT
str05: AGGTAACAAA
str06: TTCCTAGGTA
str07: TTGTAGATCT
str08: TGGGAAGTTC
str09: TTCCACAACT
str10: TCTAAACGAA

--- Solution (of length 24) ---
  Sol: ATGGTGACCTACGAATCTGATACC
str01: ATGG-GA--TACG-----------
str02: AT----ACCT-----TC-----CC
str03: -------C--ACGAAT-TGA----
str04: -T----A---A--AATCTG-T---
str05: A-GGT-A---AC-AA----A----
str06: -T--T--CCTA-G-----G-TA--
str07: -T--TG---TA-GA-TCT------
str08: -TGG-GA---A-G--T-T----C-
str09: -T--T--CC-AC-AA-CT------
str10: -T-----C-TA--AA-C-GA-A--

example file name: 'nucleotide_n010k010.txt'
best objective: 24
best bound: 0.0
wall time: 59.973164s


In [None]:
scsp.util.bench(Model, example_filename="nucleotide_n050k050.txt")

--- Condition (with 5 chars) ---
str01: TAGTAGTAGACTCCGGAAGTGACAAACCCTGAAAAGAATGGATAAATATA
str02: GGATAAACACTCCCGAAAATAATTTGACTTAAACAACGCGACAGTTCAAG
str03: ATACCTTCCTAGGTAACAAACCAACCAACTTTTGATCTCTTGTAGATCTG
str04: TAAATTATAATCTTATACTAGTAAAAAATAGGGTGTAACCGAAAACGGTC
str05: TTAAAACAGCCTGTGGGTTGCACCCACTCACAGGGCCCACTGGGCGCAAG
str06: ATGACTTCCAATGGATCCCAACCTCAAGCTTCCACCCCAATGGTTTCAGC
str07: AACAAACCAACCAACTTTTGATCTCTTGTAGATCTGTTCTCTAAACGAAC
str08: ATGAAAACGAAAATTATTATCAAGGGTATGGAAGTGGAAGCTGACGAAAT
str09: ACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTA
str10: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str11: GCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTC
str12: ATGAGCCAAGATCCGACGAAGAGCCCCAAGGAGGAGAAGGAGGGACCCCC
str13: TCTCACAGTTCAAGAACCCAAAGTACCCCCCATAGCCCTCTTAAAGCCAC
str14: AGGTTTATACCTTCCTAGGTAACAAACCAACCAACTTTCGATCTCTTGTA
str15: AGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTA
str16: TAAAACAACTCAATACAACATAAGAAAATCAACGCAAAAACACTCACAAA
str17: CCGCCCATTTGGGCGGCTCTCGAGCGATAGCT

In [None]:
scsp.util.bench(Model, example_filename="protein_n010k010.txt")

--- Condition (with 19 chars) ---
str01: MALSYCPKGT
str02: MQSSLNAIPV
str03: MPLSYQHFRK
str04: MEEHVNELHD
str05: MSNFDAIRAL
str06: MFRNQNSRNG
str07: MFYAHAFGGY
str08: MSKFTRRPYQ
str09: MSFVAGVTAQ
str10: MESLVPGFNE

--- Solution (of length 45) ---
  Sol: MAEPQSSKLSNFDVEHYCAIPGVTRNQHAFGNELSRNPKGYTQHD
str01: MA------LS------YC--P-----------------KG-T---
str02: M---QSS-L-N-------AIP-V----------------------
str03: M--P----LS------Y---------QH-F-----R--K------
str04: M-E-----------EH------V--N------EL---------HD
str05: M----S----NFD-----AI----R---A----L-----------
str06: M----------F------------RNQ----N--SRN--G-----
str07: M----------F----Y-A--------HAFG--------GY----
str08: M----S-K---F-----------TR----------R-P--Y-Q--
str09: M----S-----F-V----A--GVT----A-------------Q--
str10: M-E--S--L----V------PG-------F-NE------------

example file name: 'protein_n010k010.txt'
best objective: 45
best bound: 0.0
wall time: 59.898153s


In [None]:
scsp.util.bench(Model, example_filename="protein_n050k050.txt")

--- Condition (with 20 chars) ---
str01: MRHLNIDIETYSSNDIKNGVYKYADAEDFEILLFAYSIDGGEVECLDLTR
str02: MERRAHRTHQNWDATKPRERRKQTQHRLTHPDDSIYPRIEKAEGRKEDHG
str03: MEPGAFSTALFDALCDDILHRRLESQLRFGGVQIPPEVSDPRVYAGYALL
str04: MGKFYYSNRRLAVFAQAQSRHLGGSYEQWLACVSGDSAFRAEVKARVQKD
str05: FFRENLAFQQGKAREFPSEEARANSPTSRELWVRRGGNPLSEAGAERRGT
str06: MDPSLTQVWAVEGSVLSAAVDTAETNDTEPDEGLSAENEGETRIIRITGS
str07: MAFDFSVTGNTKLDTSGFTQGVSSMTVAAGTLIADLVKTASSQLTNLAQS
str08: MAVILPSTYTDGTAACTNGSPDVVGTGTMWVNTILPGDFFWTPSGESVRV
str09: MNTGIIDLFDNHVDSIPTILPHQLATLDYLVRTIIDENRSVLLFHIMGSG
str10: MFVFLVLLPLVSSQCVNLRTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str11: MDSKETILIEIIPKIKSYLLDTNISPKSYNDFISRNKNIFVINLYNVSTI
str12: MLLSGKKKMLLDNYETAAARGRGGDERRRGWAFDRPAIVTKRDKSDRMAH
str13: MNGEEDDNEQAAAEQQTKKAKREKPKQARKVTSEAWEHFDATDDGAECKH
str14: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str15: MRYIVSPQLVLQVGKGQEVERALYLTPYDYIDEKSPIYYFLRSHLNIQRP
str16: MPRVPVYDSPQVSPNTVPQARLATPSFATPTFRGADAPAFQDTANQQARQ
str17: MFVFLVLLPLVSSQCVNLRTRTQLPLAYTNS

元の `WMM_HEXALY` よりもだいぶ悪い結果となった.