In [None]:
import opt_note.scsp as scsp

In [None]:
import marimo as mo
import nbformat

# ベンチマーク

## 注意点

Dual bound を表示しているが, これはアルファベットアルゴリズムで構築した解の部分配列の中で最短のものを求める問題の dual bound であり, 与えられた SCSP に対する dual bound ではない事に注意.

最適性に関しても同様で, `OPTIMAL` と出ている場合はアルファベットアルゴリズムで構築した解の部分列の中では最短であるというだけであり, 実際に最短とは限らない.

実際に簡単なケースで実験をする. `ba`, `cb` の最短共通超配列は `cba` である:

In [None]:
_instance = ["ba", "cb"]
_model = scsp.model.didp.Model(_instance).solve()
_solution = _model.to_solution()
scsp.util.show(_instance)
scsp.util.show(_instance, _solution)

print(f"solution is optimal: {_model.solution.is_optimal}")
print(f"bset bound: {_model.solution.best_bound}")

--- Condition (with 3 chars) ---
str1: ba
str2: cb

--- Solution (of length 3) ---
 Sol: cba
str1: -ba
str2: cb-

solution is optimal: True
bset bound: 3


一方, この方法では長さが 4 の共通超配列が最適となってしまう:

In [None]:
_instance = ["ba", "cb"]
_model = scsp.model.dr_alphabet_cpsat.Model(_instance).solve()
_solution = _model.to_solution()
scsp.util.show(_instance)
scsp.util.show(_instance, _solution)

print(f"solution status: {_model.cpsolver.status_name()}")
print(f"best bound: {_model.cpsolver.best_objective_bound}")

--- Condition (with 3 chars) ---
str1: ba
str2: cb

--- Solution (of length 4) ---
 Sol: bcab
str1: b-a-
str2: -c-b

solution status: OPTIMAL
best bound: 4.0


## 本題

In [None]:
def bench(instance: list[str]) -> None:
    model = scsp.model.dr_alphabet_cpsat.Model(instance).solve()
    solution = model.to_solution()
    scsp.util.show(instance)
    if solution is not None:
        scsp.util.show(instance, solution)
        print(f"solution is feasible: {scsp.util.is_feasible(instance, solution)}")
    else:
        print("--- Solution not found ---\n")

    print(f"solution status: {model.cpsolver.status_name()}")
    print(f"best objective: {model.cpsolver.objective_value}")
    print(f"best bound: {model.cpsolver.best_objective_bound}")

In [None]:
bench(scsp.example.load("uniform_q26n004k015-025.txt"))

--- Condition (with 25 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf

--- Solution (of length 62) ---
 Sol: tuklcignycekosuhjmovaiqfozpxplnhtqgbrxzdvxdbcvsuvqprvinsnsbgxf
str1: t-k---gn---k--uh-m--------px--nhtqg--xz-vx-----------i-s------
str2: -----i------o---j----iqfo----ln----b-x---x--cvsu-qp-vi-s-sb-xf
str3: -u-lci-nyc--os----ov----ozp-pl--------------------p-----------
str4: -----ig---e--------va----z--------gbr--d--dbc-s-v--rv-n-n--g-f

solution is feasible: True
solution status: OPTIMAL
best objective: 62.0
best bound: 62.0


In [None]:
bench(scsp.example.load("uniform_q26n008k015-025.txt"))

--- Condition (with 26 chars) ---
str1: tkgnkuhmpxnhtqgxzvxis
str2: iojiqfolnbxxcvsuqpvissbxf
str3: ulcinycosovozpplp
str4: igevazgbrddbcsvrvnngf
str5: pyplrzxucpmqvgtdfuivcdsbo
str6: pbdevdcvdpfzsmsbroqvbbh
str7: enbczfjtvxerzbrvigple
str8: rxwxqkrdrlctodtmprpxwd

--- Solution (of length 102) ---
 Sol: igpybdepuvalortxzjkwxcginqbkrcduyzcfhjtvdmopxerflnzbchstmqvgostxdtxbcmprvfnosunqzpvxisvwcdgpsblpxbefho
str1: --------------t---k---g-n--k---u----h----m-px----n---h-t-q-g---x----------------z-vxis----------------
str2: i-----------o----j-----i-q---------f------o-----ln-b-----------x--x-c---v---su-q-pv-is------sb--x--f--
str3: --------u--l---------c-in-------y-c-------o-----------s-----o-----------v--o----zp---------p--lp------
str4: ig----e--va-----z-----g---b-r-d---------d----------bc-s---v------------rv-n---n-----------g--------f--
str5: --py---p---l-r--z---x----------u--c--------p------------mqvg--t-d--------f---u------i-v-cd--sb-------o
str6: --p-bde--v--------------------d---c----vd

In [None]:
bench(scsp.example.load("uniform_q26n016k015-025.txt"))

--- Condition (with 26 chars) ---
str01: tkgnkuhmpxnhtqgxzvxis
str02: iojiqfolnbxxcvsuqpvissbxf
str03: ulcinycosovozpplp
str04: igevazgbrddbcsvrvnngf
str05: pyplrzxucpmqvgtdfuivcdsbo
str06: pbdevdcvdpfzsmsbroqvbbh
str07: enbczfjtvxerzbrvigple
str08: rxwxqkrdrlctodtmprpxwd
str09: kkqafigqjwokkskrblg
str10: lxxpabivbvzkozzvd
str11: krifsavncdqwhzc
str12: qaxudgqvqcewbfgijowwy
str13: rsxqjnfpadiusiqbezhkohmg
str14: iwshvhcomiuvddm
str15: htxxqjzqbctbakn
str16: xusfcfzpeecvwantfmgqzu

--- Solution (of length 145) ---
  Sol: hkrstxikloqwxagjnpquxybdefikpqrsuvadforlnrzbcghimpqvxzbfjnqrtuvxyzcdhopsteimqwdbeovcfgkotzdfrsuhkmqsxzbprvxikrjoqsuvcdswabgpxbhwdlntfkmngpqzefouy
str01: ----t--k------g-n----------k----u-------------h-mp--x----n----------h---t---q--------g--------------xz---vxi-----s-------------------------------
str02: ------i--o-----j----------i--q------fo-ln--b--------x----------x--c---------------v----------su---q----p-v-i-----s----s--b--x-------f------------
str03: ---------------

In [None]:
bench(scsp.example.load("uniform_q05n010k010-010.txt"))

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad

--- Solution (of length 29) ---
  Sol: abedcdebeacdeabcebdbceabdacde
str01: ---dc--b--c----c--dbc-----c-e
str02: -b-d-d-be---e---e----e-bd----
str03: ----c----acde---e---ce-b----e
str04: a-ed-d-----d------d--e-bd--d-
str05: a---c--be---e--c------ab--c-e
str06: -b-----b-a----b-ebd-c--b-a---
str07: -b-----b-a--ea--eb----a-da---
str08: --e---e-e---e--c-bdb-e------e
str09: ----c-----cde---e-d---a-d-cd-
str10: -b-d-----a----b---db-ea--a-d-

solution is feasible: True
solution status: OPTIMAL
best objective: 29.0
best bound: 29.0


In [None]:
bench(scsp.example.load("uniform_q05n050k010-010.txt"))

--- Condition (with 5 chars) ---
str01: dcbccdbcce
str02: bddbeeeebd
str03: cacdeecebe
str04: aeddddebdd
str05: acbeecabce
str06: bbabebdcba
str07: bbaeaebada
str08: eeeecbdbee
str09: ccdeedadcd
str10: bdabdbeaad
str11: ededaaaeaa
str12: aaeaabeeac
str13: eaabcaccdb
str14: bdeeadeade
str15: caedadeeed
str16: ebcadbabbe
str17: ddceeabdea
str18: dabcddeaec
str19: aadceedaab
str20: aeecceeeaa
str21: bbdaecaade
str22: dacedaedab
str23: aaeabbbbce
str24: dedbcbcaab
str25: dbdaaebbcb
str26: debedbebac
str27: ceebcdcbde
str28: dbedaadaab
str29: cccdcbebdc
str30: aeeacdbcbd
str31: dacbeacccd
str32: ecebccdbdb
str33: ddbbcedabb
str34: aaeabaaeba
str35: ecbbcaadcd
str36: debccecdbc
str37: daacbaeebc
str38: adabeaacce
str39: daecdbacaa
str40: dacbbdcedc
str41: dedbeebbde
str42: cdadcdcdaa
str43: ceedcbaeed
str44: ceaecaaaca
str45: dcccebbbad
str46: baeeaebbde
str47: dbdebaccdb
str48: ebcbeedaea
str49: aeeebbdbca
str50: dbdabcecbb

--- Solution (of length 34) ---
  Sol: adeabcdebdacebcdeacebdeabcd

In [None]:
bench(scsp.example.load("nucleotide_n010k010.txt"))

--- Condition (with 4 chars) ---
str01: ATGGGATACG
str02: ATACCTTCCC
str03: CACGAATTGA
str04: TAAAATCTGT
str05: AGGTAACAAA
str06: TTCCTAGGTA
str07: TTGTAGATCT
str08: TGGGAAGTTC
str09: TTCCACAACT
str10: TCTAAACGAA

--- Solution (of length 24) ---
  Sol: TATGCGTACGACTATCGTACGTAC
str01: -ATG-G---GA-TA-CG-------
str02: -AT----AC--CT-TC---C---C
str03: ----C--ACGA--AT--T--G-A-
str04: TA-----A--A--ATC-T--GT--
str05: -A-G-GTA--AC-A----A---A-
str06: T-T-C---C---TA--G---GTA-
str07: T-TG--TA-GA-T--C-T------
str08: T--G-G---GA--A--GT---T-C
str09: T-T-C---C-AC-A----AC-T--
str10: T---C-TA--A--A-CG-A---A-

solution is feasible: True
solution status: OPTIMAL
best objective: 24.0
best bound: 24.0


In [None]:
bench(scsp.example.load("nucleotide_n050k050.txt"))

--- Condition (with 5 chars) ---
str01: TAGTAGTAGACTCCGGAAGTGACAAACCCTGAAAAGAATGGATAAATATA
str02: GGATAAACACTCCCGAAAATAATTTGACTTAAACAACGCGACAGTTCAAG
str03: ATACCTTCCTAGGTAACAAACCAACCAACTTTTGATCTCTTGTAGATCTG
str04: TAAATTATAATCTTATACTAGTAAAAAATAGGGTGTAACCGAAAACGGTC
str05: TTAAAACAGCCTGTGGGTTGCACCCACTCACAGGGCCCACTGGGCGCAAG
str06: ATGACTTCCAATGGATCCCAACCTCAAGCTTCCACCCCAATGGTTTCAGC
str07: AACAAACCAACCAACTTTTGATCTCTTGTAGATCTGTTCTCTAAACGAAC
str08: ATGAAAACGAAAATTATTATCAAGGGTATGGAAGTGGAAGCTGACGAAAT
str09: ACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTA
str10: TTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC
str11: GCAGAGCATTTTCTAATATCCACAAAATGAAGGCAATAATTGTACTACTC
str12: ATGAGCCAAGATCCGACGAAGAGCCCCAAGGAGGAGAAGGAGGGACCCCC
str13: TCTCACAGTTCAAGAACCCAAAGTACCCCCCATAGCCCTCTTAAAGCCAC
str14: AGGTTTATACCTTCCTAGGTAACAAACCAACCAACTTTCGATCTCTTGTA
str15: AGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTA
str16: TAAAACAACTCAATACAACATAAGAAAATCAACGCAAAAACACTCACAAA
str17: CCGCCCATTTGGGCGGCTCTCGAGCGATAGCT

In [None]:
bench(scsp.example.load("protein_n010k010.txt"))

--- Condition (with 19 chars) ---
str01: MALSYCPKGT
str02: MQSSLNAIPV
str03: MPLSYQHFRK
str04: MEEHVNELHD
str05: MSNFDAIRAL
str06: MFRNQNSRNG
str07: MFYAHAFGGY
str08: MSKFTRRPYQ
str09: MSFVAGVTAQ
str10: MESLVPGFNE

--- Solution (of length 46) ---
  Sol: MPQSAENSFLRSVYCNPQADEGHKVAFGNSTEIRALNRGHKPVYDQ
str01: M---A----L-S-YC-P------K---G--T---------------
str02: M-QS---S-L-----N--A-------------I--------PV---
str03: MP-------L-S-Y---Q----H---F------R------K-----
str04: M----E--------------E-H-V---N--E---L---H----D-
str05: M--S--N-F----------D-----A------IRAL----------
str06: M-------F-R----N-Q----------NS---R--N-G-------
str07: M-------F----Y----A---H--AFG----------G----Y--
str08: M--S-------------------K--F---T--R---R---P-Y-Q
str09: M--S----F---V-----A--G--V-----T---A----------Q
str10: M----E-S-L--V---P----G----F-N--E--------------

solution is feasible: True
solution status: OPTIMAL
best objective: 46.0
best bound: 46.0


In [None]:
bench(scsp.example.load("protein_n050k050.txt"))

--- Condition (with 20 chars) ---
str01: MRHLNIDIETYSSNDIKNGVYKYADAEDFEILLFAYSIDGGEVECLDLTR
str02: MERRAHRTHQNWDATKPRERRKQTQHRLTHPDDSIYPRIEKAEGRKEDHG
str03: MEPGAFSTALFDALCDDILHRRLESQLRFGGVQIPPEVSDPRVYAGYALL
str04: MGKFYYSNRRLAVFAQAQSRHLGGSYEQWLACVSGDSAFRAEVKARVQKD
str05: FFRENLAFQQGKAREFPSEEARANSPTSRELWVRRGGNPLSEAGAERRGT
str06: MDPSLTQVWAVEGSVLSAAVDTAETNDTEPDEGLSAENEGETRIIRITGS
str07: MAFDFSVTGNTKLDTSGFTQGVSSMTVAAGTLIADLVKTASSQLTNLAQS
str08: MAVILPSTYTDGTAACTNGSPDVVGTGTMWVNTILPGDFFWTPSGESVRV
str09: MNTGIIDLFDNHVDSIPTILPHQLATLDYLVRTIIDENRSVLLFHIMGSG
str10: MFVFLVLLPLVSSQCVNLRTRTQLPPAYTNSFTRGVYYPDKVFRSSVLHS
str11: MDSKETILIEIIPKIKSYLLDTNISPKSYNDFISRNKNIFVINLYNVSTI
str12: MLLSGKKKMLLDNYETAAARGRGGDERRRGWAFDRPAIVTKRDKSDRMAH
str13: MNGEEDDNEQAAAEQQTKKAKREKPKQARKVTSEAWEHFDATDDGAECKH
str14: MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGT
str15: MRYIVSPQLVLQVGKGQEVERALYLTPYDYIDEKSPIYYFLRSHLNIQRP
str16: MPRVPVYDSPQVSPNTVPQARLATPSFATPTFRGADAPAFQDTANQQARQ
str17: MFVFLVLLPLVSSQCVNLRTRTQLPLAYTNS