In [None]:
import copy
import math
import functools

In [None]:
import marimo as mo
import nbformat
import util

# `IBS_SCS` アルゴリズム

参考: Sayyed Rasoul Mousavi, Fateme Bahri, Farzaneh Sadat Tabataba, 2012, An enhanced beam search algorithm for the Shortest Common Supersequence Problem,
Engineering Applications of Artificial Intelligence,
Volume 25, Issue 3, Pages 457-467, https://www.sciencedirect.com/science/article/pii/S0952197611001497

- 計算量: $O(k^2 \log_2 q + L^* (n \kappa \beta q + \beta q \log_2 \left( \beta q \right)))$
    - (補足) $L^*$: このアルゴリズムによって返される解の長さ. $O(nk)$
    - (補足) $\kappa$, $\beta$: ビームサーチのパラメータ.
- 近似精度: ?

In [None]:
@functools.cache
def probability(len_sub: int, len_super: int, num_alphabet: int) -> float:
    """
    一様ランダムに生成された文字列 w (長さ len_sub) と y (長さ len_super) について, 
    y が w の supersequence である確率を返す. 
    """

    if len_sub == 0:
        return 1.0
    elif len_sub > len_super:
        return 0.0
    else:
        tmp1 = 1.0 / num_alphabet * probability(len_sub - 1, len_super - 1, num_alphabet)
        tmp2 = (num_alphabet - 1) / num_alphabet * probability(len_sub, len_super - 1, num_alphabet)

    return tmp1 + tmp2

In [None]:
class State:
    def __init__(self, instance: list[str]):
        self.instance = instance
        self.positions = [0 for _ in instance]
        self.solution = ""

    def is_feasible(self) -> bool:
        return all(pos == len(s) for s, pos in zip(self.instance, self.positions))

    def is_usable(self, c: str) -> bool:
        for pos, s in zip(self.positions, self.instance):
            if pos < len(s) and s[pos] == c:
                return True

        return False

    def dominate(self, other: "State") -> bool:
        geq = all(pos1 >= pos2 for pos1, pos2 in zip(self.positions, other.positions))
        neq = any(pos1 != pos2 for pos1, pos2 in zip(self.positions, other.positions))
        return geq and neq

In [None]:
def solve(instance: list[str], beta: int = 100, kappa: int = 7) -> str:
    chars = sorted(list(set("".join(instance))))
    initial_state = State(instance)
    b: list[State] = [initial_state]

    while True:
        # Step 1: Extension
        c: list[State] = []
        for state in b:
            for char in chars:
                if not state.is_usable(char):
                    continue

                new_state = copy.deepcopy(state)
                new_state.solution += char
                for idx, (s, pos) in enumerate(zip(instance, new_state.positions)):
                    if pos < len(s) and s[pos] == char:
                        new_state.positions[idx] += 1

                if new_state.is_feasible():
                    return new_state.solution
                else:
                    c.append(new_state)

        # Step 2: Evaluation of candidate solutions
        k = round(
            math.log2(len(chars))
            * max(
                len(s) - pos
                for state in c
                for s, pos in zip(instance, state.positions)
            )
        )

        heuristics = []
        for state in c:
            tmp_h = 1.0
            for s, pos in zip(instance, state.positions):
                tmp_h *= probability(len(s) - pos, k, len(chars))
            heuristics.append(tmp_h)

        # Step 3: Dominance pruning
        sorted_c = [
            state for (idx, state) in sorted(
                list(enumerate(c)),
                key=lambda tmp: heuristics[tmp[0]],
                reverse=True,
            )
        ]
        kappa_best_list = sorted_c[:kappa]
        for idx in range(len(sorted_c) - 1, len(kappa_best_list) - 1, -1):
            if any(better.dominate(sorted_c[idx]) for better in kappa_best_list):
                sorted_c.pop(idx)

        # Step 4: Selection
        b = sorted_c[:beta]

In [None]:
_instance = util.parse("uniform_q26n004k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 25 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf



--- Solution (of length 73) ---
 Sol: itkgnkuojiqfolevnazbcghxbimxcnprdvyxdcnshboucstqovpgvorxzvisnpsbxnpgfilps
str1: -tkgnku---------------h---m---p----x--n-h-----tq---g---xzv------x----i--s
str2: i------ojiqfol--n--b---x---xc----v-----s---u---q--p-v-----is--sbx---f----
str3: ------u------l------c----i---n----y--c----o--s--ov---o--z----p----p---lp-
str4: i--g----------ev-az--g--b------rd---d----b--cs---v----r--v--n----n-gf----

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q26n008k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 26 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf
str5: pyplrzxucpmqvgtdfuivcdsbo
str6: pbdevdcvdpfzsmsbroqvbbh
str7: enbczfjtvxerzbrvigple
str8: rxwxqkrdrlctodtmprpxwd



--- Solution (of length 117) ---
 Sol: pigenboyjplrzxucidqtevfwkxolgnqkdcazvrdgbpfrujhmpxqtvxlcingtdvzsycodehtmsbfuroqpvizbrvcsgvoxzrvpxisbgplndsnwgxfbdehop
str1: -------------------t----k---gn-k------------u-hmpx-------n-----------ht-------q---------g--xz-v-xis------------------
str2: -i----o-j-------i-q---f---ol-n----------b--------x---x-c-----v-s-----------u--qpvi-----s----------sb---------xf------
str3: --------------u------------l-----c----------------------in------yco-----s----o--v---------o-z--p-----pl-------------p
str4: -ige-----------------v------------az---gb--r----------------d------d-----b------------cs-v---rv--------n--n-g-f------
str5: p------y-plrzxuc-------------------------p-----m--q-v-----gtd-------------fu-----i---vc-----------------ds-----b---o-
str6: p----b-----------d--ev----------dc--v-d--pf-------------------zs-------msb--roq-v--b---------------b--------------h--
str7: ---enb---------c-------------------z------f--j-----tvx--------------e-------r-----zbrv-------

In [None]:
_instance = util.parse("uniform_q26n016k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 26 chars) ---
str01: tkgnkuhmpxnhtqgxzvxis
str02: iojiqfolnbxxcvsuqpvissbxf
str03: ulcinycosovozpplp
str04: igevazgbrddbcsvrvnngf
str05: pyplrzxucpmqvgtdfuivcdsbo
str06: pbdevdcvdpfzsmsbroqvbbh
str07: enbczfjtvxerzbrvigple
str08: rxwxqkrdrlctodtmprpxwd
str09: kkqafigqjwokkskrblg
str10: lxxpabivbvzkozzvd
str11: krifsavncdqwhzc
str12: qaxudgqvqcewbfgijowwy
str13: rsxqjnfpadiusiqbezhkohmg
str14: iwshvhcomiuvddm
str15: htxxqjzqbctbakn
str16: xusfcfzpeecvwantfmgqzu



--- Solution (of length 163) ---
  Sol: krixuwslxqpkfchtxojiqfanypfxlrzpeabdigevudcosvaqjzgbrlnbxcdqvzkwognkuhcpmfqvdbctozsmipxqjbewtvanhtzcvsbuqgxfrpvdtmgideksrzfburviphkoqvrblgpxncjdsbowhmngwzxfildesuy
str01: ---------------t----------------------------------------------k--gnkuh--m------------px--------nht------qgx--------------z----v------------x----------------i---s--
str02: --i--------------ojiqf---------------------o---------lnbx-----------------------------x------------cvs-uq----pv----i---s------------------------sb--------xf-------
str03: ----u--l-----c-----i---ny-----------------cos-------------------o----------v----oz---p-----------------------p--------------------------l-p------------------------
str04: --i----------------------------------gev------a--zgbr-----d-----------------dbc---s----------v--------------r-v-----------------------------n---------ng---f-------
str05: ----------p-------------yp--lrz-------------------------x-----------u-cpm-qv-----------------------------

In [None]:
_instance = util.parse("uniform_q05n010k010-010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad



--- Solution (of length 28) ---
  Sol: bdcbacedbeeecdabddecbadceade
str01: -dcb-c------cd-b---c---ce---
str02: bd-----dbeee------e-b-d-----
str03: --c-ac-d-ee-c-----e-b---e---
str04: ----a-ed-----d--dde-b-d---d-
str05: ----ac--bee-c-ab---c----e---
str06: b--ba---be-----bd--cba------
str07: b--ba-e-------a---e-bad--a--
str08: ------e--eeec--bd---b---e--e
str09: --c--c-d-ee--da-d--c--d-----
str10: bd--a---b----d-b--e--a---ad-

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q05n050k010-010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad
str11: ededaaaeaa
str12: aaeaabeeac
str13: eaabcaccdb
str14: bdeeadeade
str15: caedadeeed
str16: ebcadbabbe
str17: ddceeabdea
str18: dabcddeaec
str19: aadceedaab
str20: aeecceeeaa
str21: bbdaecaade
str22: dacedaedab
str23: aaeabbbbce
str24: dedbcbcaab
str25: dbdaaebbcb
str26: debedbebac
str27: ceebcdcbde
str28: dbedaadaab
str29: cccdcbebdc
str30: aeeacdbcbd
str31: dacbeacccd
str32: ecebccdbdb
str33: ddbbcedabb
str34: aaeabaaeba
str35: ecbbcaadcd
str36: debccecdbc
str37: daacbaeebc
str38: adabeaacce
str39: daecdbacaa
str40: dacbbdcedc
str41: dedbeebbde
str42: cdadcdcdaa
str43: ceedcbaeed
str44: ceaecaaaca
str45: dcccebbbad
str46: baeeaebbde
str47: dbdebaccdb
str48: ebcbeedaea
str49: aeeebbdbca
str50: dbdabcecbb



--- Solution (of length 34) ---
  Sol: daebcdabecdaebceadcbaedcbeabdceabd
str01: d---c--b-c----c--d-b---c-----ce---
str02: ---b-d----d--b-e-----e---e----e-bd
str03: ----c-a--cd-e--e--c--e--be--------
str04: -ae--d----d------d----d--e-bd----d
str05: -a--c--be---e-c-a--b---c-e--------
str06: ---b---b---a-b-e---b--dcb-a-------
str07: ---b---b---ae---a----e--b-a-d--a--
str08: --e-----e---e--e--cb--d-be----e---
str09: ----c----cd-e--e-d--a-dc----d-----
str10: ---b-dab--d--b-ea---a-d-----------
str11: --e--d--e-da----a---ae----a----a--
str12: -a----a-e--a----a--b-e---ea--c----
str13: --e---a----a-bc-a-c----c----d---b-
str14: ---b-d--e---e---ad---e----a-d-e---
str15: ----c-a-e-da-----d---e---e----e--d
str16: --ebc-a---d--b--a--b----be--------
str17: d----d---c--e--ea--b--d--ea-------
str18: da-bcd----d-e---a----e-c----------
str19: -a----a---d---ce-----ed---a----ab-
str20: -ae-----ec----ce-----e---ea----a--
str21: ---b---b--dae-c-a---a-d--e--------
str22: da--c---e-dae----d--a---b---------
st