In [None]:
import marimo as mo
import nbformat
import util

# Majority Merge アルゴリズム

- 計算量: $O(qkn)$. この実装ではもっとかかる.
- 近似精度: なし

与えられた文字列たちの先頭を調べ, 最も出現頻度が高い文字を採用し,
文字列たちの先頭から削除する操作を全ての文字列が空になるまで繰り返す.

- 解 $\mathrm{sol}$ を空文字列で初期化する.
- 各文字列の先頭の文字 $s_1[0], s_2[0], \dots, s_n[0]$ を調べ, 最も多い文字を $c$ とする.
- $\mathrm{sol}$ の後ろに $c$ を追加する.
- 各文字列 $s_1, s_2, \dots, s_n$ に対し, 先頭の文字が $c$ である場合は先頭 1 文字を削除する.
- $s_1, s_2, \dots, s_n$ が全て空文字列になれば終了.

In [None]:
def solve(instance: list[str]) -> str:
    chars = sorted(list(set("".join(instance))))
    indices = [0 for _ in instance]
    solution = ""

    while not all(idx == len(s) for idx, s in zip(indices, instance)):
        fronts = [s[idx] for idx, s in zip(indices, instance) if idx < len(s)]
        counts = [fronts.count(c) for c in chars]
        next_char = chars[counts.index(max(counts))]

        solution += next_char
        for jdx in range(len(instance)):
            s = instance[jdx]
            idx = indices[jdx]
            if idx < len(s) and s[idx] == next_char:
                indices[jdx] += 1

    return solution

In [None]:
_instance = util.parse("uniform_q26n004k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 25 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf

--- Solution (of length 74) ---
 Sol: igeojiqfolnbtkgnkuhlcimnpxnhtqgxcvazgbrddbcsvruqpvinngfssbxfisycosovozpplp
str1: ------------tkgnkuh---m-pxnhtqgx---z--------v-------------x-is------------
str2: i--ojiqfolnb-------------x-----xcv---------s--uqpvi----ssbxf--------------
str3: -----------------u-lci-n--------------------------------------ycosovozpplp
str4: ige------------------------------vazgbrddbcsvr---v-nngf-------------------

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q26n008k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 26 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf
str5: pyplrzxucpmqvgtdfuivcdsbo
str6: pbdevdcvdpfzsmsbroqvbbh
str7: enbczfjtvxerzbrvigple
str8: rxwxqkrdrlctodtmprpxwd

--- Solution (of length 148) ---
 Sol: ipbdegevadcnbczfgbrddbcjtvxdekgnkuhlcimpfnycosjiovrzboplrvzxnhigplengfplpqfolnbsmsbroqtqgxucpmqvbbghtdfuivcdsbowxcqkrdrlctodtmprpvsuqpvissbxfwdzvxis
str1: ------------------------t----kgnkuh---mp-------------------xnh------------------------tqgx-----------------------------------------------------zvxis
str2: i-------------------------------------------o-ji-------------------------qfolnb----------x----------------------xc---------------vsuqpvissbxf-------
str3: ---------------------------------u-lci---nycos--ov---o----z-----p-----plp---------------------------------------------------------------------------
str4: i----geva-----z-gbrddbc----------------------s---vr------v--n------ngf-----

---smsbroq---------vbb-h------------------------------------------------
str7: ----e------nbczf-------jtvx-e---------------------rzb---rv----igple---------------------------------------------------------------------------------
str8: ------------------r-------x------------------------------------------------------------------------------------wx-qkrdrlctodtmprp----------x-wd-----

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q26n016k015-025.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 26 chars) ---
str01: tkgnkuhmpxnhtqgxzvxis
str02: iojiqfolnbxxcvsuqpvissbxf
str03: ulcinycosovozpplp
str04: igevazgbrddbcsvrvnngf
str05: pyplrzxucpmqvgtdfuivcdsbo
str06: pbdevdcvdpfzsmsbroqvbbh
str07: enbczfjtvxerzbrvigple
str08: rxwxqkrdrlctodtmprpxwd
str09: kkqafigqjwokkskrblg
str10: lxxpabivbvzkozzvd
str11: krifsavncdqwhzc
str12: qaxudgqvqcewbfgijowwy
str13: rsxqjnfpadiusiqbezhkohmg
str14: iwshvhcomiuvddm
str15: htxxqjzqbctbakn
str16: xusfcfzpeecvwantfmgqzu

--- Solution (of length 198) ---
  Sol: ikrpxulswxqkaxbcifshtavncdeqypafbigvbcojnbczfpevadiqjuswhzcokgvdozpfzbrddbcsmivrlctodvnksuxerzbrvxqbcddeghmplnbxgfigpjzqvbctbaknhkoqcewabfghmgijlentfmgqgxzucvprbbhlgmopxwdisquqpvgissbtdfuivcdsbowxfy
str01: --------------------t---------------------------------------kg------------------------nk-u---------------hmp---x---------------nh------------------t---qgxz--v----------x--is-------------------------
str02: i-------------------------------------oj----------iq----------

----vn----------------------n--gf------------------------------------------------------------------------------------
str05: ---p------------------------yp--------------------------------------------------l-----------rz---x---------------------------------------------------------uc-p------m-------q---vg----tdfuivcdsbo----
str06: ---p----------b----------de--------v-------------d--------c---vd--pfz------sm-----------s-----br----------------------------------oq-------------------------v--bbh-----------------------------------
str07: --------------------------e-------------nbczf-------j-----------------------------t--v----xerzbrv-----------------igp---------------------------le----------------------------------------------------
str08: --r-x---wxqk----------------------------------------------------------rd-------rlctod--------------------------------------t----------------m-----------------pr-------pxwd---------------------------
str09: -k---------k---------------q--af-ig----------------

In [None]:
_instance = util.parse("uniform_q05n010k010-010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad

--- Solution (of length 32) ---
  Sol: bdacbaecdebecaebdadcbdeabebdcced
str01: -d-cb--c----c---d---b-------cce-
str02: bd------d-be--e-------e--ebd----
str03: ---c-a-cde-ec-eb------e---------
str04: --a---e-d-------d-d--de-b--d---d
str05: --acb-e--e--ca-b---c--e---------
str06: b---ba----be---bd--cb--a--------
str07: b---bae------aeb-ad----a--------
str08: ------e--e-e--e----cbd--be----e-
str09: ---c---cde-e----dadc-d----------
str10: bda-b---d-be-a---ad-------------

solution is feasible: True


In [None]:
_instance = util.parse("uniform_q05n050k010-010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad
str11: ededaaaeaa
str12: aaeaabeeac
str13: eaabcaccdb
str14: bdeeadeade
str15: caedadeeed
str16: ebcadbabbe
str17: ddceeabdea
str18: dabcddeaec
str19: aadceedaab
str20: aeecceeeaa
str21: bbdaecaade
str22: dacedaedab
str23: aaeabbbbce
str24: dedbcbcaab
str25: dbdaaebbcb
str26: debedbebac
str27: ceebcdcbde
str28: dbedaadaab
str29: cccdcbebdc
str30: aeeacdbcbd
str31: dacbeacccd
str32: ecebccdbdb
str33: ddbbcedabb
str34: aaeabaaeba
str35: ecbbcaadcd
str36: debccecdbc
str37: daacbaeebc
str38: adabeaacce
str39: daecdbacaa
str40: dacbbdcedc
str41: dedbeebbde
str42: cdadcdcdaa
str43: ceedcbaeed
str44: ceaecaaaca
str45: dcccebbbad
str46: baeeaebbde
str47: dbdebaccdb
str48: ebcbeedaea
str49: aeeebbdbca
str50: dbdabcecbb

--- Solution (of length 36) ---
  Sol: daecbdeabcedacbeadcbedabeac

------
str23: -a-----a--e-a-b----b---b---b---c--e-
str24: d-e--d--bc----b---c---a--a-b--------
str25: d---bd-a----a--e---b---b--cb--------
str26: d-e-b-e----d--be---b--a---c---------
str27: ---c--e---e---b---c--d----cbde------
str28: d---b-e----da---ad----a--a-b--------
str29: ---c-----c---c---dcbe--b----d--c----
str30: -ae---ea-c-d--b---cb-d--------------
str31: da-cb-ea-c---c----c--d--------------
str32: --ec--e-bc---c---d-b-d-b------------
str33: d----d--b-----b---c-edab---b--------
str34: -a-----a--e-a-b-a-----a-e--b--a-----
str35: --ecb---bc--a---adc--d--------------
str36: d-e-b----c---c-e--c--d-b--c---------
str37: da-----a-c----b-a---e---e--b---c----
str38: -a---d-ab-e-a---a-c-------c--e------
str39: daec-d--b---ac--a-----a-------------
str40: da-cb---b--d-c-e-dc-----------------
str41: d-e--d--b-e----e---b---b----de------
str42: ---c-d-a---d-c---dc--da--a----------
str43: ---c--e---ed-cb-a---e---e---d-------
str44: ---c--ea--e--c--a-----a--ac---a-----
str45: d--c-----c---c-e--

In [None]:
_instance = util.parse("nucleotide_n010k010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 4 chars) ---
str01: CTTTTGATCT
str02: CTGCATGCTT
str03: TTGTAGATCT
str04: ATAACTAATT
str05: AGGTTTATAC
str06: CTTTTGATCT
str07: TTGTAGATCT
str08: TTGTAGATCT
str09: TTGTAGATCT
str10: TAATATTACC

--- Solution (of length 31) ---
  Sol: TTGTAGATCTATTACTGATCTATATACGCTT
str01: --------CT-TT--TGATCT----------
str02: --------CT------G--C-AT----GCTT
str03: TTGTAGATCT---------------------
str04: ----A--T--A--ACT-A---AT-T------
str05: ----AG----------G-T-T-TATAC----
str06: --------CT-TT--TGATCT----------
str07: TTGTAGATCT---------------------
str08: TTGTAGATCT---------------------
str09: TTGTAGATCT---------------------
str10: T---A-AT--ATTAC----C-----------

solution is feasible: True


In [None]:
_instance = util.parse("nucleotide_n050k050.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 4 chars) ---
str01: CTTTTGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGT
str02: CTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCG
str03: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str04: ATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTG
str05: AGGTTTATACCTTCCTAGGTAACAAACCAACCAACTTTCGATCTCTTGTA
str06: CTTTTGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGT
str07: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str08: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str09: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str10: TAATATTACCTGATGGCCGCGCCCCTCAAAAAGTGGGCCCTTGGACAGAT
str11: TAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTT
str12: ACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAA
str13: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str14: ACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAA
str15: GTTAACAATAATCACACCATCACCGTTTTTTCAAGCGGGAAAAAATAGCC
str16: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str17: AGATCTGTTCTCTAAACGAACTTTAAAATCTG

TTTA----T------A--C----C-CT--T-C-C--CAGGTA-ACAA---AC--C-A--AC-CAACTT---T-T-GATCTC-T-------------------------------------------------------
str23: T-TGTA-GATCTGT-TCTCTAAACGAACTTTAA-AATCTGTGT-GGCTG-TCACTC--------------------------------------------------------------------------------------------------------------
str24: T-TGTA-GATCTGT-TCTCTAAACGAACTTTAA-AATCTGTGT-GGCTG-TCACTC--------------------------------------------------------------------------------------------------------------
str25: -AT---CG-T--G---CT-T--------TT---------GT-----C-G-TC--T--TT-G--ACA----C----T----T--T----C-------GACTA---A-CT---A----AA-G-AT--GA----T---T--TC-T------------------------
str26: -AT-TA--A-----A---------G--------------GT-T----T-AT-AC-C-TTC-C--CAGGTA-ACAA---AC--C-A--AC-CAACTT---T-T-GATCTC---------------------------------------------------------
str27: TA---A--A---G-----------G---TTTA----T------A--C----C--T--T-C-CTA--GGTA-ACAA---AC--C-A--AC-CAACTT---T-T-GATCTC-T-T---------------------------------------------

---C--T--G-AT--------G---T--A-C-----G---AG----AT---TCGT-----ACA--TAC-C--T----T----GTAC-------GA--A---AT-TCGTA-C--AAC--A-C----C-AT----------------------------------
str47: TA---A--A---G-----------G---TTTA----T------A--C----C--T--T-C-C--CAGGTA-ACAA---AC--C-A--AC-CAACTT---T--CGATCTC-T-T-----------------------------------------------------
str48: -A----C-A-----A---------GA-------------G---A-----AT-AC---------A-AGG---------GA------G----------GAC---C--T---G---C-T---G---CT-A----TACG-C-----G-C-TGCCT-C-------TTTCGC
str49: ------C--T-T-T-T--------GA--T----C--TCT-TGTAG----ATC--T-GTTC--T-C---TA-A-A--CGA--ACT--T--T-AA----A--ATC--T---GT---GT--------------------------------------------------
str50: T-TGTA-GATCTGT-TCTCTAAACGAACTTTAA-AATCTGTGT-GGCTG-TCACTC--------------------------------------------------------------------------------------------------------------

solution is feasible: True


In [None]:
_instance = util.parse("protein_n010k010.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 9 chars) ---
str01: MESLVPGFNE
str02: MESLVPGFNE
str03: MFVFLVLLPL
str04: MESLVPGFNE
str05: MESLVPGFNE
str06: MESLVPGFNE
str07: MFVFLVLLPL
str08: MFVFLVLLPL
str09: MESLVPGFNE
str10: MFVFLVLLPL

--- Solution (of length 18) ---
  Sol: MESLVPGFNEVFLVLLPL
str01: MESLVPGFNE--------
str02: MESLVPGFNE--------
str03: M------F--VFLVLLPL
str04: MESLVPGFNE--------
str05: MESLVPGFNE--------
str06: MESLVPGFNE--------
str07: M------F--VFLVLLPL
str08: M------F--VFLVLLPL
str09: MESLVPGFNE--------
str10: M------F--VFLVLLPL

solution is feasible: True


In [None]:
_instance = util.parse("protein_n050k050.txt")
util.show(_instance)
_solution = solve(_instance)
util.show(_instance, _solution)
print(f"solution is feasible: {util.is_feasible(_instance, _solution)}")

--- Condition (with 20 chars) ---
str01: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str02: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str03: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str04: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str05: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str06: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str07: MFVFLVLLPLVSSQCVNLITRTQSYTNSFTRGVYYPDKVFRSSVLHSTKD
str08: MFVFLVLLPLVSSQCVNLRTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str09: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str10: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str11: MFVFLVLLPLVSSQCVMPLFNLITTTQSYTNFTRGVYYPDKVFRSSVLHL
str12: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str13: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str14: MDPIINGSSANVYLTDSYLKGVISFSECNALGSYLFNGPYLKNDYTNLIS
str15: MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str16: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str17: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGF

----------------------------------------------------------------------------------
str07: M------F-----V----------------F-------L-----------VLLPLVSSQCVNLITRT--Q-------S-----Y-------------TNSFTRGVYYPDKVFRSSVLHST-----K----------D----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str08: M------F-----V----------------F-------L-----------VLLPLVSSQCVNL--RTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str09: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT----------------------------------------------------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------------------------------------
str21: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str22: M--L----N--------------R---------------------------------------I-----Q-----T------------------L-----------------------------MK---------T--------------A-------N-----N-Y------ET---IEIL------R--N--------Y--L----RL-----------------Y-IILA-RNEEG-------------------RGILIYDDNIDSV--------------------------------------------------------------------------------------
str23: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-----------------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------
str29: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str30: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str31: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-----------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------
str37: ME---PG----------------------------------A------------------------------------F------------S-----T------------------------A-----------L----------F-------------------D------A--------LCDDILHR-----RLES--------Q--L------R-FGG----V-----------------QIPPEVS----D--PR-----------V---------------------------------YAGYALL----------------------------------------------
str38: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str39: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT----------------------------------------------------------------------------------------------------------------------------------------------------------

-------
str48: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str49: M------F-----V----------------F-------L-----------VLLPLVSSQCVNLITRT--Q-------S-----Y-------------TNSFTRGVYYPDKVFRSSVLHSTQD-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
str50: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------