In [None]:
import marimo as mo
import nbformat
import util

# Weighted Majority Merge アルゴリズム

- 計算量: ?
- 近似精度: なし

与えられた文字列たちの先頭の文字列を調べ, 優先度の高い順に採用していく流れは Majority Merge と同じ.
Weighted Majority Merge では出現するか否かの代わりに残された文字列長を考えた重み付き出現回数を採用する.

- 解 $\mathrm{sol}$ を空文字列で初期化する.
- 各文字 $c$ に対し重要度 $\sum_{i=1, \ s_i[0] = c}^n |s_i|$ を計算し, 重要度が最大である $c$ を求める.
- $\mathrm{sol}$ の後ろに $c$ を追加する.
- 各文字列 $s_1, s_2, \dots, s_n$ に対し, 先頭の文字が $c$ である場合は先頭 1 文字を削除する.
- $s_1, s_2, \dots, s_n$ が全て空文字列になれば終了.

In [None]:
def solve(instance: list[str]) -> str:
    chars = sorted(list(set("".join(instance))))
    indices = [0 for _ in instance]
    solution = ""

    while not all(idx == len(s) for idx, s in zip(indices, instance)):
        counts = [
            sum(
                len(s) - idx
                for idx, s in zip(indices, instance)
                if idx < len(s) and s[idx] == c
            )
            for c in chars
        ]
        next_char = chars[counts.index(max(counts))]

        solution += next_char
        for jdx in range(len(instance)):
            s = instance[jdx]
            idx = indices[jdx]
            if idx < len(s) and s[idx] == next_char:
                indices[jdx] += 1

    return solution

In [None]:
_instance = util.parse("uniform_q26n004k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 25 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf

--- Solution (of length 75) ---
 Sol: iojiqtfgkegolnvakublzcghxbimxcnprdvxycdnsbhoucsqtovpqgviorxzvsnpsbxnpgfilps
str1: -----t--k-g--n--ku-----h---m---p---x---n--h-----t---qg----xzv-----x----i--s
str2: iojiq-f----oln----b-----x---xc----v-----s---u--q---p--vi-----s--sbx---f----
str3: -----------------u-l-c----i---n-----yc-----o--s--ov-----o--z---p----p---lp-
str4: i------g-e----va----z-g--b------rd----d--b---cs---v------r--v-n----n-gf----

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q26n008k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 26 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf
str5: pyplrzxucpmqvgtdfuivcdsbo
str6: pbdevdcvdpfzsmsbroqvbbh
str7: enbczfjtvxerzbrvigple
str8: rxwxqkrdrlctodtmprpxwd

--- Solution (of length 128) ---
 Sol: pioyjpbilrdeqtxfgknwzxubcevgolndcqkazbfpruvdghmpxjbrinqtvxcflcdgtdvyzscoebhmsrucdtqfzbrovpsviguimprvoxzpcqvxsdsbnnplbgiwxfdehops
str1: -------------t---k---------g--n---k------u---hmpx----n--------------------h------tq----------g-------xz---vx----------i--------s
str2: -io-j--i----q--f------------oln------b----------x--------xc-------v--s--------u---q------p-vi---------------s-sb--------xf------
str3: ----------------------u------l--c-------------------in-------------y--co----s----------ov-----------o-zp----------pl----------p-
str4: -i--------------g--------ev--------az-------g-----br----------d--d-------b-----c----------sv------rv------------nn---g---f------
str5: p--y-p--lr----------

In [None]:
_instance = util.parse("uniform_q26n016k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 26 chars) ---
str01: tkgnkuhmpxnhtqgxzvxis
str02: iojiqfolnbxxcvsuqpvissbxf
str03: ulcinycosovozpplp
str04: igevazgbrddbcsvrvnngf
str05: pyplrzxucpmqvgtdfuivcdsbo
str06: pbdevdcvdpfzsmsbroqvbbh
str07: enbczfjtvxerzbrvigple
str08: rxwxqkrdrlctodtmprpxwd
str09: kkqafigqjwokkskrblg
str10: lxxpabivbvzkozzvd
str11: krifsavncdqwhzc
str12: qaxudgqvqcewbfgijowwy
str13: rsxqjnfpadiusiqbezhkohmg
str14: iwshvhcomiuvddm
str15: htxxqjzqbctbakn
str16: xusfcfzpeecvwantfmgqzu

--- Solution (of length 176) ---
  Sol: iprxuskwxqklrojiqfaxcfigenbdyczpevfolrsazxudcpgahvtdqbjinvbxxqcekvwgnkuhmpscovqtvxfzklrdcgtdobfuiqwenhsmzpvcsbrdtizjavzqbgrvniesksmptzcfhkorbxjmgqpzvluxotbwvdahngbdmfikweghnpsy
str01: --------------------------------------------------t-------------k--gnkuhmp-------x------------------nh----------t------q-g-------------------x-----zv--x--------------i-------s-
str02: i------------ojiqf-----------------ol-------------------n-bxx-c--v--------s--------------------u-q-------p

-----q--------ax----------------------ud--g-----q----v---qce--w--------------------------bf--------------------------g---i----------------j---------o--w------------w------y
str13: --r--s--xq----j----------n--------f----------p-a---d---i--------------u---s---------------------iq-----------b----------------e------z--hko--------------------h----m-----g-----
str14: i------w------------------------------s---------hv---------------------h---co--------------------------m---------i------------------------------------u-----vd-----dm-----------
str15: ------------------------------------------------h-t--------xxq-----------------------------------------------------j--zqb-------------c------------------tb---a--------k----n---
str16: ---xus-----------f--cf--------zpe------------------------------e-----------c-v--------------------w-----------------a-------n-------t--f-------mgq-z--u-------------------------

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q05n010k010-010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad

--- Solution (of length 32) ---
  Sol: bdcabcdebeaecdaebcdebdcdbeabdade
str01: -dc-bc------cd--bc----c--e------
str02: bd----d-be-e---e---ebd----------
str03: --ca-cde-e--c--eb--e------------
str04: ---a---e-----d----d--d-d-e-bd-d-
str05: ---a-c--be-ec-a-bc-e------------
str06: b---b-----a-----b--ebdc-b-a-----
str07: b---b-----ae--aeb---------a-da--
str08: -------e-e-e---e-c--bd--be-----e
str09: --c--cde-e---da---d---cd--------
str10: bd-ab-d-bea---a---d-------------

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q05n050k010-010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad
str11: ededaaaeaa
str12: aaeaabeeac
str13: eaabcaccdb
str14: bdeeadeade
str15: caedadeeed
str16: ebcadbabbe
str17: ddceeabdea
str18: dabcddeaec
str19: aadceedaab
str20: aeecceeeaa
str21: bbdaecaade
str22: dacedaedab
str23: aaeabbbbce
str24: dedbcbcaab
str25: dbdaaebbcb
str26: debedbebac
str27: ceebcdcbde
str28: dbedaadaab
str29: cccdcbebdc
str30: aeeacdbcbd
str31: dacbeacccd
str32: ecebccdbdb
str33: ddbbcedabb
str34: aaeabaaeba
str35: ecbbcaadcd
str36: debccecdbc
str37: daacbaeebc
str38: adabeaacce
str39: daecdbacaa
str40: dacbbdcedc
str41: dedbeebbde
str42: cdadcdcdaa
str43: ceedcbaeed
str44: ceaecaaaca
str45: dcccebbbad
str46: baeeaebbde
str47: dbdebaccdb
str48: ebcbeedaea
str49: aeeebbdbca
str50: dbdabcecbb

--- Solution (of length 37) ---
  Sol: daecbdeacbedacebadecbadebac

In [None]:
_instance = util.parse("nucleotide_n010k010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 4 chars) ---
str01: CTTTTGATCT
str02: CTGCATGCTT
str03: TTGTAGATCT
str04: ATAACTAATT
str05: AGGTTTATAC
str06: CTTTTGATCT
str07: TTGTAGATCT
str08: TTGTAGATCT
str09: TTGTAGATCT
str10: TAATATTACC

--- Solution (of length 25) ---
  Sol: TTGCTAGTATCATGTATCTATACTT
str01: ---CT--T-T--TG-ATCT------
str02: ---CT-G---CATG---CT-T----
str03: TTG-TAG-ATC-T------------
str04: -----A-TA--A-----CTA-A-TT
str05: -----AG------GT-T-TATAC--
str06: ---CT--T-T--TG-ATCT------
str07: TTG-TAG-ATC-T------------
str08: TTG-TAG-ATC-T------------
str09: TTG-TAG-ATC-T------------
str10: T----A--AT-AT-TA-C----C--

solution is feasible: True


In [None]:
_instance = util.parse("nucleotide_n050k050.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 4 chars) ---
str01: CTTTTGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGT
str02: CTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCG
str03: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str04: ATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTG
str05: AGGTTTATACCTTCCTAGGTAACAAACCAACCAACTTTCGATCTCTTGTA
str06: CTTTTGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGT
str07: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str08: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str09: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str10: TAATATTACCTGATGGCCGCGCCCCTCAAAAAGTGGGCCCTTGGACAGAT
str11: TAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTT
str12: ACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAA
str13: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str14: ACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAA
str15: GTTAACAATAATCACACCATCACCGTTTTTTCAAGCGGGAAAAAATAGCC
str16: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str17: AGATCTGTTCTCTAAACGAACTTTAAAATCTG

-CGA-ACTT-TA-AA-ATCTGT---G-TG-----GCTG--T-CACT--C--------------------------------------------------------------------
str25: -AT---CG-T--G---CT--T---T--T-G-T-C--GT-C----T-T-T---G----A-CA-CT---T----T--CGA--CT-A--A--C--TA-A--A--G-A---T-GA--T--T--T---CT-------------------
str26: -AT-TA--A-----A------G-------G-T--TT--A-----T-----A--C--C-T----T--C--C-C--A-G-----G-T-A-AC-A-A-A-C-C---A-AC-C-AACT--T--TTGA-T-CT--C-------------
str27: TA---A--A---G--------G--T--T---TA-T---AC-----CT-TC---CT--A---G--G--TA-AC--A--A-AC----CA-AC----CA--ACT-T----T-----TG--A-T---CT-CT----T-----------
str28: -A----C--T----A---A---A------G------GT------T-T---AT-----A-C--CT---T-C-C---C-A----G-------G-TA-A-CA----A-AC-C-AAC--C-A----ACT--T----T-TGATC---TC
str29: ------C---C---A---ACT---T--TCGAT-CT----C----T-TGT-A-G----ATC---TG--T----T--C--T-CT-A--A-ACGA-AC-T---T-TA-A----AA-T-CT---------------------------
str30: -AT-TA--A-----A------G-------G-T--TT--A-----T-----A--C--C-T----T--C--C-C--A-G-----G-T-A-AC-A-A-A-C-C---A-AC-C-AACT-

In [None]:
_instance = util.parse("protein_n010k010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 9 chars) ---
str01: MESLVPGFNE
str02: MESLVPGFNE
str03: MFVFLVLLPL
str04: MESLVPGFNE
str05: MESLVPGFNE
str06: MESLVPGFNE
str07: MFVFLVLLPL
str08: MFVFLVLLPL
str09: MESLVPGFNE
str10: MFVFLVLLPL

--- Solution (of length 18) ---
  Sol: MESLFVPFGLVFLLNPEL
str01: MESL-VP-G--F--N-E-
str02: MESL-VP-G--F--N-E-
str03: M---FV-F-LV-LL-P-L
str04: MESL-VP-G--F--N-E-
str05: MESL-VP-G--F--N-E-
str06: MESL-VP-G--F--N-E-
str07: M---FV-F-LV-LL-P-L
str08: M---FV-F-LV-LL-P-L
str09: MESL-VP-G--F--N-E-
str10: M---FV-F-LV-LL-P-L

solution is feasible: True


In [None]:
_instance = util.parse("protein_n050k050.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 20 chars) ---
str01: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str02: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str03: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str04: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str05: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str06: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str07: MFVFLVLLPLVSSQCVNLITRTQSYTNSFTRGVYYPDKVFRSSVLHSTKD
str08: MFVFLVLLPLVSSQCVNLRTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str09: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str10: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str11: MFVFLVLLPLVSSQCVMPLFNLITTTQSYTNFTRGVYYPDKVFRSSVLHL
str12: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str13: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str14: MDPIINGSSANVYLTDSYLKGVISFSECNALGSYLFNGPYLKNDYTNLIS
str15: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str16: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str17: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF

-------------------------------------------------------------------------------------------------------
str03: M------F-----V----------------FLVL-LP-LVSSQ-CVN--L-------T-TRTQLPPAY-TNSF--TRGVYY---PDK------------V----F------------------RS-S-----V---L------------------------HS----------------------------------------------------------------------------------------------------------------------------------------------------
str04: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str05: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------

---V----------------FLVL-LP-LVSSQ-CVN--L-------T-TRTQLPPAY-TNSF--TRGVYY---PDK------------V----F------------------RS-S-----V---L------------------------HS----------------------------------------------------------------------------------------------------------------------------------------------------
str16: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str17: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str18: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLS

-------------------------------------------------------------------------------------------------------------------------
str25: M------F-----V----------------FLVL-LP-LVSSQ-CVN--L-----R-T--RTQLPPAY-TNSF--TRGVYY---PDK------------V----F------------------RS-S-----V---L------------------------HS----------------------------------------------------------------------------------------------------------------------------------------------------
str26: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str27: M-----------------------D-----------P-------------------I-------------------------------ING--S--SANV--YL----T-DSYLK---G-V-I-SFSEC--------NA--L-GS------------Y------------L--------------FN---G------PYL-------------------K---ND--------

-S---T------A--------LFD-----------------------------AL-----------------------C---D---D--I--L--H-----R--------RLE-----S-Q-L----R---FG----------G-----V-Q----IP----PE----------VS--D-----P-----RVY------A----G--------YAL-------L-----
str38: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str39: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str40: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------

--------------------------------------------------------------------------------
str47: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str48: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF---G--D--S--V---EE-VLSEAR--Q---------H----LK----------D----G-T--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str49: M------F-----V----------------FLVL-LP-LVSSQ-CVN--L------IT--RTQ--------S-------Y-TNS-------FT--R----G--------V--Y----Y-P-D---------KVFR---------S-------S-VL-----HS---T------------------------Q---D------------------------------------------------------------------------------