From 8c17bb088b2cacb3f670264251bddb88ff16d884 Mon Sep 17 00:00:00 2001 From: Ilya Shlyakhter Date: Thu, 14 Feb 2019 12:51:34 -0500 Subject: [PATCH] let scaffolder use ambiguous alignments when no unambiguous ones exist (#904) When scaffolding contigs to a reference, if no unambiguous alignments cover a part of reference, let the scaffolder consider ambiguous ones, when all alignments mostly agree on the sequence. The ambiguity can be the result of a misassembly, where the same reference part appears twice in a contig. Also, in case of a repeat, dropping all ambiguous alignments could leave a hole in the assembly. --- .../contigs.ebov.ambig.fasta.gz | Bin 0 -> 39442 bytes .../expected.ebov.ambig.fasta | 311 ++++++++++++++++++ .../expected.lasv.ambig.fasta | 8 +- .../ref.ebov.makona_C15.fasta | 273 +++++++++++++++ test/unit/test_assembly.py | 17 + tools/mummer.py | 80 +++-- 6 files changed, 656 insertions(+), 33 deletions(-) create mode 100644 test/input/TestOrderAndOrient/contigs.ebov.ambig.fasta.gz create mode 100644 test/input/TestOrderAndOrient/expected.ebov.ambig.fasta create mode 100644 test/input/TestOrderAndOrient/ref.ebov.makona_C15.fasta diff --git a/test/input/TestOrderAndOrient/contigs.ebov.ambig.fasta.gz b/test/input/TestOrderAndOrient/contigs.ebov.ambig.fasta.gz new file mode 100644 index 0000000000000000000000000000000000000000..a53fd24da4701e28e4c75f592dd37543e09309a2 GIT binary patch literal 39442 zcmeFYV~}Kzw+W zmq0F;T-KgJloIJ_ZVy8p8hSk*cYPB~9F2(bGkovHsw4j zobG-IdFOn8f3M&D?*Edpl<=aY_~HKoynBz>Wx9R;Q2cm!uYY;427J$C+HEc{ynYpO z_%r{chxq5}{siFp?jN;O?fHIkKMQCdUsB5dBjB?nb?Vn=E9AHQY;lZt^7ioDxBJPF z_j;z4;xdNmq5yLk}VJj;6^QFQ8m z_|pgF*MPfub}*BH-SyBF1%!`_Egvai+WtLvCRV_BVFQ|HXx%q`7I(6n>;LM;)W?*$ z1H3=)J(D~QF1VgQ%TwGva)Umx+CwX2JU%l^@RI2m0PJ4A40{7-q4TQmTYjCaaUHo8`P7Er7+qH?{WzAGs-rUB$?Sw_NPc6Z0e7QbU2wogQx5-K+A9;F*xAA&Mi!o4&eOp?8c=TrS#Z6r?}P8f zmpcd3GUw^=p8e#FIs|LhtE7w3f31(>si9Nu;h71@l3S3{U4O)GbbOz_`BPDd?Plo7 z&l37{Qx=!6$m#q{b*5n6f~ObC?(H=}kxoA0?u3+9dNy%<005{oIV|6n~?pz_UYpfX%XF8tD0@~ zyhAph<5f6NT^0$35Za+vEW^64Z;8kfC@IuVM}P*b$!C@o#yS#=hHb+c)usbA{%;-zn92lJ5_= zfa07T?riiI(s;q<6*|Ez$}{9r!tydKe;zecmYPOJcZEJ}HJRh91;m~(dX)@FX7=px zpSs~}WMrSC7U1#20lk((DjACG3XmOA009?3aD=uYoyF7V^GkZ*<^G?!1Qh;5dv9+K zx5CEq;}KikY~aI2*Y{nr7vGi?iCU~Z9EYyt&6ylcMnGZP@A=?G74f{%xIh9+!{41B z7Y5l}pO%yHcyHxn9rWCNH^yKB(V~-0gh}c#8UZ}*Q zMX&k z0m@7JsXjn=dC}e4nd3c6QH>tXQ+& zhx&aeNk?< zwDhC@?EP!{XzI*H3uE{uu7THzPLalRI9JujA{2|d+Fj}zj{woi5|itEMI5d@^UKV& znFPTZeJwYrTYSXhpVjrG6UO?c90UEE5?)7tM}vVS1>{~0f+--~%ok<4GS~N(%xBSi z6M%y?X3fyd+BmHRO_RIn0{I++$2BH^ciKo^Xh%cub>MUeS?1g>?w;)Hi~qohGR^Wm z8}PX-TyuhNs-e@2Gi?RXd-NJy_HrVa?j5RXC#V7}{29ejT5u~=Uq_;B)A zK~VGhGY3OiX>DgWCkPI_t04%zrTA(h7=pK6(1ZuW)x!!}Gaq{~(1opa=E6a9JpZ_^J20D$p-t{^<=lk8&>b&@*qE+*d_kkH`f3G|cqVWETU+%s#?)~GQ8gVzf zTs3!Yal6>A4x7+0?nO|2@5L=dq^zr{(T%AY?B{Zxr`*+;e!{eauc3!uQt7^lH-SA0 z(6?~8kS*(pQA3F`fiMLnIz4lahfS4hlQvE^-ws+uglmwF-uhq{O7OR3*O4Y zX zOwKdQ$XQ*BbD+G{-9OX;`8?u@`#=xBabjTH<6hbPlESL?7@rxcP^iUwm}fmfn3q~1 ztsJnhn%|CPXmE?QkG-Z6ySlg8EzdV`JgJf%%W+Q9DL5Mo6wa2NmiAYWh$q?`E%j&^ zLhxJNsea?zjzHlTkz3q2J881@4{uRf_`&5kZgW?;QzxPZfSS+Fk#kI+4Ad3GITqtV zMaPcZq;g@;tP#e^_L1AP#M6hz^-;{hN0|1x1W(%PfCY*1$5AAv=E54%?G9q85 z3%0gFt*Qposm5(OyITxiyF>Gq46jFXO+QBcyl^{`E;pQ&hem6Pz*vskBg*O04tk}t~2O`OO+i^IC9H}WCjBw zF3HE9oWtZOpWpZZ+l?bU|5jdigRedf$6=t|VCNIgVwXHStZq@l3z>+e>sLkkM?^`z zD%JE$O`DdTc46_KW#kA#s8MfVW3JEo54d|G;&bzma_=LbHBq}ioyJUd_hwz<1o-Pf z`9z*7bns??tWiBxCM}26D(9a2rn>({&eFon2Yfh@Ey8ii1tHfKtH-hkB>jcA9bqGF z+WU*QX7e3rSP>VpjtzZlVr$BD0ZpOITwDQk91M4t^?Rkw>5pdHz@J}{hJv81a|#|n zefHHs&zBi|TVE@{Sc>&7^Ltb;<_ci1;&s3SfUuNsjOgk{F*x;Iv~kA42Mgf50W33P zxFd=oK?mhQLBb|(GjVK%xkP~D{mKOV1@U7wYy`}lCz-0*;Ef1r%JYh@d~l8W=0z>n zRC_Lv5y5g^HV%yYAuW|K-@rI)57t;V(6g+>;xijyp7RaL$gVRREQSS=;r$-m2~KM7 z6d^bCi1!*gCxZGq?ykLZya&{DRoOC5#I?7#V=>+jZtnd{cCq3ThJ?vbwX^OVs(SAi zJR`w};YyN6iqqI{;D+IZ=<{uw)-|J6>Tt0St&y9h12DO#Q*|X*X3P_^>{ z;qZXxH`|9Cli{5?x0?CD%|Ukjf;Cl!HY7An;v5w)P0+c>Gh!q}b(AY_c?#-W+YL99 z;XH5jc@5#Zc$hdoFQqQFIi34b1fy>CLsaxzejnNhUs42IUpBRh{v`ukzNi;d8(xQ z>~WSqJp&C9=kQCiK#K6LEf=Yb0F(<}%pKqE!*H8mN-u+b4REVF9Q;AISqZsT+!5uH&zH;Lns zKX;buQUq%y$>#2F++Pl7FV1t%*O($Br`LKg6F40_?5EC|B-hXdt8dj50JU(W;^uuX zrX($kS?0MoB9~905?5kHwQ)u$qx(=~V5S2fV1@;C|BYaM?Ym?#q7NB>`b6&^HUSbfYLp(m1~}Vnv*}Q(BUfxT6JEA1?Ik z8ZO8P0~Q#wnIfW)-fBlwXB;6Ps`+y}J*zf`?zw-bm?h>@u$IzDR|S%9_9&@^)k10* zZr3_kONs+&ajIeQp9+)0b{63aRH$jQbN+|qnGBIRiicR8erDEpeseQDSr&zjwLM7W zaJ^9|sDc=;-hwqAw!QK}VfVOM??d8<2$+7ocDhiIpa``~Ot=ox0x;WeeI?oAiRlsC zC0U!c$(h~)1P>vi6CJv4s9$`5tzkYz4?GZsZoiFY#Bd4R_a*A&lF<|Q`k~Muhpf$( zU0811GPwrwbHZJV?vehK^!o2mr4<5pITxalg!u%xwLDTVzWv^3xDZxAtqr7tO4&JZ zI)yFREdwoQ>$m8JRiqxJG%Y&2gnE5g5^R4QnAVEndMWgTS~#%PjAMv2>wU+|;J@11 z(5k^MqykTA#b*z`*ufIPFV^lKst)L%xI^ST<>c~j&vSK`I(aLd!6R)t6F=~+$85{G z1AW1(TI`tK)4VpzdTT&~b34w;u}D6R8Pv0u98-!Kye*&~k;`FVK6)Jw1F2C3+bkGs zsgpSj$V`sOkobD{cCVEj^PPw(K+sTV$MAdk&x2=@&TrC;gvl`xKM(%jgdbhxSXvMe zRQY=ppabiq#3&tp%EH9=w}pXw5a9%vDAS+^DxjcG&zXV3`~x9i!I+*fFqR^&>cY$i zNT36ed=L<>;(qGFh0l*7ph4Hb&xce}Xh;ZwYZMfD!vz)40UKI(ZQ;VapM`glF(&JOZ{9g?=4Q`(cf*y!~4%8sTe^!FOe(HhuP!=vsS``5m z86^M%^GG%W3*Nvbz(A2VJ^B>&S2x3_MCnpb<$q)Oiwc(M=bycg;{!8EA^P#w0Fa!n`MEvPw^W*2ROt!y#=P zgPDq5>fcX^!R+VRq&@mnL-@CmE#$M2MF$BEM)%Xi9$n$vhlfYT_x>>;u;6=v(Okit zn$Yc+LJIp~OE!mQxj{Kg{3bcLAp)t(wwPmQJP0T~)e{1^(_CFWyM_+voCOjef zTuSI0#1+iv*7NbV9NfVkq5JV<))5x+5&vDqKUPog?jgF0#PuK){tsmRQ!K{&rv?9L z{zsTj;4ZX@nm<7B-#z(J{y)Nh!2S{HP@5GOW(f= zwhN>GPv!rx@4pKFo0b3HS#sSnps{Xzsfec^hB*4qgK!0kSWebvF74qBF$-geS+ zKi|lTr7=Hukey_75M z8MABn&5fTki1$OoGqESDzF*@=#W_8(*RO1WwnyLg;`ola< zuzS5P(2?VA;BY(AcmYB7r_G3QAvycwj!=jIRD3_ArRS;bRvS7ei$hJIZGY7&t8~B# zg~f8e(iOY}-odwNUEHK>kNYt3Tt*%4op8Cpwm@yy2TS&HdNUexlG12;4Z;X7d#~V-w5D11lOcORhIFPZaK9u%%)L45^NUF4p~N(MAuPfb-NU22k5? zeLoYHh;sRbhl7n(X&UUE_$qsB5VzEfF#=CgQ!E=V`wnW6i3UTkmx~I zqSJ5Nqbr3b`~$b&1(HpO;2!XLGrTy^WHO56VY)jXf;DjYF(UbfR+n#kAeOTwHmdwD zD%9Ca)hcaeUFdX*Ru%V4PGd5gE*_x_++DfW1duVMAfSX);YRnBL62g_%;x2*!P5a4 zC)gNJ(gcFiv}fSdqy}j$xi{OlINiC3f|9P~qJAS1aZBpVr3J*2?#`yY?$PUFt%K-f z5dwoL;Nm^7Ke|_rnqa`ceKQ`O>}2k$6f9>ar)JX#rH0`=X1~fl0PA|AzP1x}1_`FKP=aTyFsnV2BF`sGT(Yng) zICm<%(D~GKC!=sfTUB~d3uIc@fk{@z$fEs3r4im2vPAT7VIhh971GRSX}%E%#n-0p zP-K0&I6!D_xib%nrxd(CD3Qk%BW4}Mve~)o-jOnuhEcsKuWg6Rm({dPA7p}1^YK7a zW`uFD3Br3f?7m;P=0vJJ^zHU|*I*nG2+W=$ZqL*IiA|`J9kmDtgR2O^d?eKgC)`?F zQ;Jif)1@vhP$WE4`i;?zMh#l*rwX~CK`6Pih#n^-KN-mjTU-#HJHD%zn0#;#?y?Qf@l=%wHG(*5 z)XV|L%MnwBVyhmP1B3;FS-y)lND6w3*m&J8tCA@ucPK-yRNAE@a0uvy0o4MHl**J6gw_%hV))DGOor}+LNW3brv<&P$S~Q%QEH8HwP`W8FmS@T%T>*W3HRi>WHoRy{qZZaEOzhhxT+=eQt z6N%VYnb(I7OK4y>r&D3qNZ~8TkuW;+S=9IxB9yTE_Loyc*kw(P;cqr;l8?Pe<>$Q4 zW%c0%zLnZQ^yk7$mTw|-_y2lXbp$#Cf|pT+&BjP%3U^ zD;i>bgdjn3C#79kKN`N{slGHTPB6g|8{39E770aV)fs=)^a$eFlD23ARVf<>`LK>c z4SU#-T!|E=6#rRXc#+PIc3H|@NZx69w~`VpOE{7?*Lp!=ph$HAyEQEpBbgjofir+t}Wa3GUT1m&7;rCp*8vIZ|7 zi-t7w50{pDD|_^tMRlI0iypK1F+al01po3IUCFXTZRyueO-W{FZH{H*TAdotP`~3k zbZk|}6vHeEsd3w@Ro@Mh@2nRRmXO?+^{ldG@$3zI((s-G&!7S2;gvc1dP9@+0$=NI zs|+W>$g|12>STJUk6(TnU+mFkNiTl$Xnn7(xX|okQtRi&@H=;pN zc_liQdP%IpB2sz7!AyrT8l&ZVn0Bi|zFJ;+U<7|J*)z72D%5!%jYi_j(t!_ zkRfLeq{lM&4Ot#O0H<`(#s*?$AEBg4m0mIRj6`YvW%$Puw1TEyC=16+GT7Blvfh|oqrz)G29 zVO1hps6r{cdRq*2HGWgiP$6i0VyC!*ORr;{ad*TSm$g_8W%<}M%xa7O2u-0)W4)l&W0AfKwkgB-%EK8zw~ij# zF`#V`(^dvKV??)xQeS}0kfB+RlVqE}Vm&&7j_hbV12QO~;gB+fF#XFKLL=O3rkuTY zf2|JBoiKeYb|DH_6Iv4`=ZxJ_bz$hrISs8Ae2VZJbm^wG@*Dvo<_`E2Gsj3#i91lpUg zs?qQ@L@!IFf<`Y!{c@f&PSo5pO+R!u;1k5OGbiVTZagm%g5~RXZDC#it8H z(MHQ6Wl~F(ttA^jj@h31q4Bgos+Nsl#zWZ#&L5CjS&9rtyEMStp+hVdNWPSVSay1d zkjU|2f@7~h!rLt(Wn)(80qT%VqElRqZ?q|0299m}PGidE?Dnamc?r(i* zU~6f|fozIuo3Z4NVV%GpyR-&22BPB9Q432Bf&?gl7f2k@*9^)zXE?Lt7{!hUBV2Ki zR07V|_Tm?UAS}s$5!F8D-hR$=m`MM8q11auji1ucTMX}6f(Z?UgZc++OVI|$Q1j z<2`aS$90+w64UkM&wR$ZLTKx&AKGk1fUKaXCE()lW2<(W9I#){BS5x6sL`LBRim|H%FuADj0)txOm{>QLrpCxidIfs@{&>B62fV-jnfq?2Xx{Sfv|no`FWR;oY=hG_o7g7} zvF_5;A)Q(QSKiu8rjThx?1nn8ofoyHxUK%K6%GYgeVg;J83 z-C3JAT4gE_pHQ_jKVlnKmxTo0+G3p|2&AS;5rIeEfo{^2eJm4USA_Z;1Cwd*hqtNa z1%Lz12AU5R)|}8}gKOHUnouJGGe=CVhC-yj>q*mJNR-u!x%r_fVV=1cL&2Lk3oyy**Qx~?3};;$+} zTBEiSVtY#Rly^vI$P0}3CRCc1v+!v5{4^y)4I@It$4(Q>ZWOEu3{&!7F?bLOB%q+) z>e&pGxXRZtB=#wDj!6akpg&69x!FV6&vq8WJ8E@Bi)J6K;NT!r_%l4HOj!DVJ&e#M zpG^C)hM+Ztx8?TXvidlgv(Fx&0XEx;2_r@6~V9`i<5K}CgiKHc(KTvMQd*7hs$zCHw7R*VEXYQI81|(_Bz?2 zdQRbB=~mVTW^;}H3WhZS3RAQpk;W`BvP+_(6+oXC+t?h|E%VmyC-qq?v*#FG z2I5AY%GlR6Er#%$Z443A3W|29P;X}5>vIxb!&59TmbhVYy`Q)AiUA-(@g8gH^n#kK z{W*BQ@1Zyz<;tc&LF)sWbj%f01<{7b@yk%~rpSA1&YPKPkIdGPshO&JY|W8@^y z>$K~~snJHOP=E0uf(< zOk_xii>7i7VNJNR{=IaiunK3NvCS!N+`J{0G#)OtQl)taqDK*r;m|(z`2ynim0HrO zJ~mHFZQb^Tvmve1Y+zMFb?&1bpLw%L8I1+Xoq{Un2ZZ2y|E!nJ6@JuF>=6yAL`9v) zX1R0b;-(a$Y!1gI?AiRIvRJ25p4XK#Agi}fq1uOq@pjLQl#y-=E}l#VuxTs8HiIok zZANQ0U?Y8-8(2Vt#f(JoDE-rE|JNK#-%VNTc+C~5W+4;nwh0bKgaacqoFw}h=ZC|Y zX&z`NPELx_Dm;WkNh$Zb4T;dtQ-zF~wqd*h$Gq@NEthhh%MOc&+Da65P*6GQt7C1) zthTk5qaR;S%_@XaUqgpfFvaF)i@)88q})7eUDe5);w}>M=kCg@OjlV56ZS<} zFI+3w8%I{lC5ZTM;yDxQi7{gO;hb6K-E;xr!Weq{>h1Fq7Qqn!*OV(dcn*dvXSfN-q>{)MrO?p}>KQgNi1@5~!`&^#DQa9c_Y- zh_-`n8##`rP6o-6Z7kd!>-1nL43j6b_%VTBTz|$Fz^&hnT`Aclm6|z}UEnC?8lO`v z?u4_2GAXzzg=gh&leX)WV=iPihMo@855W&^5f^k_^2^@L6%Biu!_oi|sE%S85*UDy zesQ>6bb43RKU*bf-dphe)KF6@xSW(L*%3W`Ij4U*UO1zF6*ex*CAy?EVX4Z~Jr=UU z;cV&VLcd$0Cpgl>@lp!=vFwG>JE4#KEI-Gxmx4AJOOF?w6j6$R&FEfWo*i*2ai)NLIjM8ayRkHj=YK@^CdFEmF7q_sgx z?d9zt0s6d~>^eBp>i77J9u3T8d@p#+MTS3y%5R~kLUXE>*}xi9W%1*}&HfmBLb`=~ zt#pV^ciD+alCNT`$fp&>B@}jJ;E&N6t!aThEw`7Da}mX)I5&GMr{8T;nPfJxUIzx! zX!d_#*Dg+)cWyKakUWg$|LIF>c0(kG$?S~%T5;c~kB!72&8Fx1D^PCiCKe_`PO0vaj1!4xm3`0w(5#TZ)R!?lWtX{*qemZ&lSN2s8skqlDq+i z)aURtB0meMCEMchOS%lEBjpT5PMJ*^<{4c)i8u`k^%y<>XsqJ=G*>R9Y(R{ zyQW`fbbk^oRh9x4keO573Ihn3bHfI>t(U5c@@`YiUet8eH{XJ zMWrI%q*W=fDy))Kn*MC^GR=3CeX9{*0Zaw^F!bDBRx|spQOvRrT}B$4NGGN5#H&aZ z3lurD?17kG_=h{i#uf8=r=WAi?bmCBsA0k-kW@w`kK_#@8A-D>fuMSDjCgkgh~goK zCi{7xG%eegZu>_E2%3M>Te=?5E?jZgJh*edx;%sA)h3bZ%0gP6@N&cCbJ-CVjfMJ? zL7_fUx5*cRGc#*|>P6BEn=7sr7aUPE-5kWV>{0haA5~SHFjNHCa%R>t#Ff%$dDu@j z=_vQc-H+u`FF}ipP6w7#+NVipOt&|34+eiP{mAn&VZ2(ea1|cv*47?u_GKz@f4bCI$|Ag$B;e9z#)hljxfN>0(g1JV~xe;Hc^Bli;$duJuQFm3kaEcX1C$njLm z#`mX6bnzlx)qs%t0aFs_JFFh25 z4zxYGzXOXdJy>-AhS&@)^xXylSf4l!)5uSp?(W~jgEIb<<67|FglWt0XQM{f-?WeU z-!!*SyhVG=!k5n;Jg=UJrY()uPsk&vr5|;-!LHzE5nedvvlHItMA(B*2KE!DK{WD7 zvI6#(BxLdLYK}~wxQicuS9|&SneOEi{VDCi2eR>rOT+$)OVIfo#+7xb?9q)9|F>np z@iU=ygYn7dKK*y8e@XBd_*=wf`{c6&{N)QVZxY+MI(PZh$LE^-nXqIaoOE2P{aef4 z|7$~$#NPo5!hiYR2|oG$}}3ke*=DQA+{)6Va;nKPRUDwE>G4|1&M& z_}8QOOTN_Yy4Sys{5w<7U#4rnzqk^Lf7zAMyw(1>F?89{E)GLL%)+m`XM~cYhpRc@ z*cKfjXFamyRLH_F;}(8whW|{Mz3^+u-U5$}f-$Go-mHAsqB+fBI<>>Tttl?+OU8_5 z8;jaO)5avHrK~=u-o~hW)RZONRwj+h#(y>aUt+}3ZWfIZJloOLjd^*GKyS~xm(RPe z|L4teMoa)V3X7jeeKbk!A$%#46Sx!heZ!k+p!I=ZKC37zDn;@cw%W^>iN`H?tGD^k zQ_Kng;}uFkt>7K%YIS%49@k-XzWb`plUo-LHVeGGyaYa8va}KsbW(dfo<+ksbiPvQ zq1pTs3s-GKsDiRh@mXBD`6N2XT&PAMh|ZE(8ub_#%P)#dOC*RaD2*P@+%I3&PL{Nu zo5*amyLW^t9@4W@BxDsUfg8DNZKB8+QL@H{1NL!#fw)Y33l4b;NrXR_HrR$z9%`y7 zukxLd>z=7;-Kp$0nw@a47WlgO?C|jj?&t0ndATs(SafF={j^#9N`K*(3*PB-MJo77 z)_KtujRxZ{%qF&cIcm~ib@Eg##BXd)Yd(6L+QC2Are+L_Qqx>jg`?u+3^7nvRRy=> zdeA*1q!bH?ws+_DguW9IK9z5(ct8}o9s#%X)`ohHseN^Ej{b_PAC@J&y_z-Zu0&+3DyU0=ylIyl9?!30RPzgSu3QJX z1QN}6@c|Gdc@?Js+ehDb_9Ok+BP)i?F?&Z{{M$}-um#K{-OSvL zz)o1oBPpfb{M2Uh=(Vx3l1?~4M7J0?0}f?m)|9`^c4b?$lp%MRfF!1vE!_N$U&eUa znjYd;Mo@x)RPnW;ybQ$;swIkcczUG*IOtWG(sZ;XSJ->3e}>vII+s?;z%E0cAbqo? zJG_nBGZmW0F5gdIVk;ViYm>fQj@~?R+3}@>!(eV7xyr^AlfHVj6cw=9G}z^XYkM{n z_=fV4pCnqPGO4m&L3?1Bsg=Z8^9_;Af0j; zgaf~Gk11!6F@QtO`o`?M_}6}bGRU>N@R~bc?+o*v4gJ?O9+dn(Z?JxSyG$vmyBzUG zj|lk_+6fHw$kxojUsE>u%)QaLG`9JJ`6h@+DPN&>TQs!Wp}9?`BFFRd9okoEC0BCW zvH{NYYxY_RqRa@|eHwCNZuyl%r}JF5%5$-zgphSCzYzzv%7tL^Iri%}p1yvY%pWNY z4OmG|su9i`XcE#o7&NFk~TsV&lD121S_E-&o?ka&1YQ6?{6jA)LgY z0^Jq6T`}CcXfGOb`<6$G%mEe6{-XUVLsu@zHqsrK5bS;ur@o?xq3zn$V|P^Z^ITKk z1+h@kxKsaH#LY1|J&fP;CL*rpNQQUM9=pn! zIMch;mE8N%K~DGhP)sZ5MGAdGbLi`1|Lh)Z)_2`9K7wn$-?6-BB)r~9KG4%`8;sU! zir+>{RoJx8<(D|ZDj)^YnLTJ%I>pR|(eKA2SOkxPy)p$54{RpjR&ygGZzyuU-XYR6 z%!@6_^(gCCTCbu+enE)Rmkm7@KGUZ*B{VEB?B|#e4G}p{ESBWTr{iw14Z>5OHf~o; zsmbzhaLQWX9~G6fDlxImrG&j(tWwysV~1W~is&11=2a`7O${?SV@?aaS+iRmkXPy=&*zB z)OX|Jq}$FrOGV%Xr|>rCx`m+dT81pg+~?0~Spw}pE$-B6q#_^VOSBNlCkN}lSXeP% z-QWJnw?do{(f7)()rd*@eNEbq-W-z>#TGKi5-PL$H}Z(Q$r&qJlOgFqi@de%$Wy^B zb*ML+#)~NAEno_CO|rKlD!+NGVHKKm^s8Xo=bwuAA@GzP3KspV+DXO@Vn7>}BZkZ3 zprITcumDnD1{mEiejkPTdd``(2>AqqqI{9()cW=vxdx&E2(hGJwxBQ$^Fo5i)+y0P zB(t`=k)LHguyRrS4Mrgxwc@T)Z_-QHt@*|JYz4Vw?+#)VfrbDB#6TWmxkap~j0wSE zo<}~_Me6hW-4p6jF}Y^I26`yi0M>ECnT&h?GRCUoBILz2ORcmLU!}_)F6r*+@3!ZK_fn%LU#nCP9M zJnG|~FZv;rreBVOkoTlL3-kSbUb$G-glXQf}?N z7ubL=Qab;lu}qgx$q747DKZvRl)YIq;ry#g8b<7Vn@5Aq;W0; z&tEM236+`+KPRRP9@1OS2pk{wFe%w<6tQX%c}H0>uk(A&{pdVkgtx=PhbJqv7l2vz zRs)KJ&DMwb5+qOk1Q}S=JYCyir08-E6#-{Z-@8s%1{hYm3blj(%#XN8v8_lc*wqw9 z?|AhIAQ4J565MtyW!Atl+s;yg)6CSnF)XSurg~I|biD?dA8_d4hMdIP1W$fHkn{d^ zkR_RSgUj7-Y!CID$Ep_7SWCfS()z%yn*HX2Khss|*E1q~i;s}_$4`|aac%n#p91mp zj@XHc^roi102k(En@Tql)gx-jtyASe0>2M08kD0fL%sIBPJkVg}*vj#i4OH7EoMQ3h)a9djx`#fPCNu2hxRsFeP^`NS|_dxhd&eBv*Z1iJ&rI`RiK zcD24S)-fyJs{}hZ)#g2{k@aDgALH8h$;P%iu_q?5s9)?sfY>@+O$!-E%+qhnJ)R6V zt7w8FI1SExz$$hFlW$BXHnJ5me+HLKnBPr8597PEflFj1U+OlnNTaLTnf1--MzIBkzp~ljyBPiv?RE>&nIk*bC6z1bI)MiO^Tf z8fVV+{YbPM$aH*Byw;0F=QAfiPlJycdC6?*+~qDDO=cYwkT(w3^ifPZ@XWL&{RmG-KFokqG#7_ zmQ^$XXC<@;#_yaP`WlG+NDgyQ4m`??e9G&_R|t(RHOWS-NK$78n9p!*Bwf;NvBpsF zadV>th2xZP(P-jjB1`-XV&Wsx)YNlI$A!NBetwEblT#N{z=C?f-1mB>kGBh(t`gw` z=%m2_Clyp=IUUV&I`6vS?MYW1I4bdVON379uz(v>Lnvhqe)^ZDXKnbV8)cH5kT-rD z2VNZoMF`L>^=$zO2V(s+Mqd50w1-#?P$5aUm z0+Q{PBn+F4v6x_$XPXs0%@gvCuFehV#@JM;0MP@Kj?>@Ss$UA;1dsX=RcN9Q7yK^Z zoR<>9;}6A)jSEAV=9Hno7Ut7yud*@QvlG!gCSoH{v^X&bGz$i~G&w5oeFaan=Pze5 z4YUCNS|5aTnOe6%A(o~U_nCli>a!>YQpCFs~Emm-jFy!vi#z=wPkH$?fKi; zuDp^-D1k(-#G}U%_NXP!Azm%_EnYtN=nz3QY-i_@t@{;7+6kwA?pc{fh4YU3eambxV3^dI*Oe}IP43Xgspl}0v)BuhUQE%vINiR=R>(-Y%4et zKA?}%eeDPOGO->k))pQ?14LOD;zPs$o$&Z)`rHH9?X;RB07PqQdi|Waka(Pd1|jKe zq;c}NnaByrK@<6ODh(k+ED0fYgU0kY1B6N`t6~;i8T{0F?eg|7d9B-06+Bo9f{}u| znci&eE{7nu^~dvsxG=>-4!8zS%;#s(_Ac_WTXE0FGyFlJ0`{Xhyi4{Piu6cs;TJ7i zba(_OfMOeGLAP-#nK?5L(ZTe{_^qU?4*PfwsK`Ij_KoW#`}D@=orq9f(2&!z#>HIC zgzw+t$~_uX)UXPAL1epiU7RZQNjRUJUD#`gM_wl_<2kO)DWa=@6n%3Ywkz zoOenNnP!XNEY4V<#I?4ts?)xh59(sm`D4ZdL&*pOVE5sd|) zrYg;czAOWMn4Mv}O`BtbPPC&&V>)bK?4dZSnwXu4iGCNHj}7*O;yJJ zpdfxuqp%x&E)39CyL);f4q<45cW?9XKs0iR)Y&0ITf|-R(miGV>7DB(F?tc)DH!M~ zW6W|`i-HnDZga<2;OSPc2=}P3@9@`6(2{^jxpJ? zCbPss30&n+zSWACkkBkhuH zF}0YPnVFfHnHgKm%*@Qp%*<+Giy2FdEwz{#TaxvUXY84{C&Kpi_*_5C%Z|)kE1~kC zzNoME&MogYXl8|qFL1Z}Q#d>h=SG7D;W(_esv9bbEJj5&U|t)2YYKDX4qInz#_LT3 zgJzEFO*$V|T(cXM=iCC+yy}lhbD|6oOc|*zJMj^Cx`yiDM$`P3OHS{mf2JxqGwP9d zYr|m4cLjn*$)+8`?k-w7Ht<6?MQ1ONC>udTYHhE%|q`6H*$Wjac>b=5G( zxXBBu@zfSsYua=&FA7KcCAfIxTf*cQs7!)103Zhz?_qfVxyDkAiW&PT{g%Znis0gU zx*nUF&Sz5ZqDmjcEA&J4wMhoOg2CP@hyirfp5(OU&7iC#v*^A4vDHELE;>tkL|oEo za;jQg;0(t%aE@c0!ApApO`VTDgm3l@owRVl4}GyrE)*x2N21UXBq_KJD;@hhe2j|L z=-u}=hl2%Yv(YE=*h1rnUoR#tSyaFJd(dgML(G{Vzs7-N;?ETmN>5zkjB@o!OEJ@= zg#Z}n@%?cW`Z_%R*z&&b?cKEB+($=`?u`7KfQOq=!i85v)>$`(UxiE$yR0UT6qW?& z_?^2&NJ!wtn(+)`J_ybPC`~!u?lguM;G!yU`Fa%{(GN-_SjF$NFzcUBFr$$xPXY|~ z3|9)X0LQ(9UCtYa91l);UR(=WMK=2!0 z&i|f=^&Os=ujGUy{UxDH;P&sPM0bn75heOW0JFi4vVSA@X=#VEzRzFI-AiNulJ;1w_H zW9*Che*}I13fl6|hJR%FD}nLmR|2=MaDTa3{Ig;G%YgZh68=i~55wPF{$9bq2KEdu z>wCN)`FsAari^xcmHxN<{ww!=w3MH30MQqN9ew@JZvH;sZvOYX`Xm33-*j%T;Lmi} zxMq`syrQvcQ+oj8>26VJ6d?jCvm5R-xyF?BXNn!UVw;AP*F~}^Ajj4NvO@0*r8NE*I?*fiYSP8q#sS1l2#@0X1!zm zB(|+%cy9G;fw)qhpgpfO|9AC07vZ31=I21MXZ`%;n0Kt(7p>)sX1tmBg#A0Nd&RI5c@XXmc{p<&pDkNL)U-d-2-f#U=8XJP}x zcv=gtRXZauf_A^c1w3*sx2$7o7E_+Ia8fRvevZS#mQaYT{|MXN0ehZS-Yk8r1bFwD0L%dm|oxrH2j^9n7c!xF_U#6JYt-@qqOhf^@j3U5I zKM@>E3qmj(c52teeAzIh+UI5zF)qoV7;xE}$oy%545;vbOo6=4n@s3~*=&*PjJ>XM0QoG`j7YC}N*s>2qQM}*XQ2V>v_Ps|M4aL0Ko||yAY#(;s{?i+YqI&J-5|&v zEWV1;0OQr{hq0(K!8a+Pp{7AB--u`PxMb*!Ev4St=g&$>2b71Wunbw zQA1LWZk0(*@0)aKY5%pMrUj3f;mLWc-JlVS*C9_i!;m&<+$2azJ^VK@16UQl@z(I% zrcilzg}94-Hk;GJcSuaH?e60m_j}f~s)s#yOVJd!ZN@|e`8h(n)1UJ>9@*8kEYm(% z(V=@&kten5KLWRu5=SJc9L{}|JZ{lW@w*#W9dd6CyBK`4LE+?U0pf>h{67yH z>s_#hijEURo6kiRH#TxhlF(4ZCVD)c{|E4+6%{ z=_VBVdzorWjK&@L&(ls+7QrK!Y+4sCz>1dQ#q5}T9sA07`f(@IcLX08_d*7GPM3_N1?x6zPY1R zc;_jYholG66mveg$~S%FtONX38Id97H!ztc4Q>`BX5YPGhGzA(k)02@MVx$gj?u9= zg~&LZilIq6HE3gd4geNr+0_m>1f^SyhtFswWEY@GQ*_oat#ql@tkC!Tt)z$?D8z(~ zaMReBezk6|OX37w?*FE>SwnrErjf_n6s-rj>wm2>91*d#MpJ#9x|-Y56r~^eALW1S z0nOdZU)^EOkN@hojgf0wc66XEqiJ_K-u}>8VJ#LM{y!=kFpGGfPuBtBC*a zA)G?4j*7z|=vdnNiiSIlB{mR8okwOd1%8bz>{V-XwmXSrWB1%abSmB{ydqcgc4dnQ z4)K+FyrXTgxj;hAucuPznBBjql*GyPnPrL?UP;+!r< zcq4_68F4$Ii)YC|oAX2@p?a>|bxn?9V8r=k$Djktx$w>Q$!G2sZ8Giv{M5B}~34_AUfc97ujL-Wznxu7J%p8_{{Cr0?x zvg=5gt$E)ug*{m>-D|EcYfi(`EtMzpq>i_uQb=kO!@d|2GNV)l<;(fW)1#1*GQNk{ z8nHE!Vdgv0Ww`n@uU-t7m2QeD8jQ%$UzF-#tdzGG)HO#)ci&hq9BAW0Y2**_q_Qf~ z)>H?Y%w}zaWTdbH`k8G_VjI4fVeZ~oZHDxY{QAK7zN1~CjP8xPokPuxOUYbgK)jqv z)G(~NwXFk(QS5YQ#Rk3bYx4kqq}M_3LVG_j{cZm+R+dQxMg+`v-s*~x4{!rf{HG)A z@I3>oge>d<^2Bmn3D+bBrz=);Fia7WH)#&}yk7I{5%aQT;Z>W2)?)Z(>hz%|tA!*} z5C6U-5bv5k>&X3~C2PcF!%|tt>qob2U8G)olx1WlH9sz4E~X6ZTO=Vidx)W3)pTGN zT`*=UhsxnQLJ~4-nPJ{5kNZfrBgb@sP7n+SCQO%pBC9ax*9O*3cmhy8?W7gbJM>Yu zmnbBx{V+(y`K0fv8&-k`wh4UQ6VYQ-bKcS0(Q(CYJ60y*ETZw5( zayc+syZJ7VhPsfhX!4QINBPto#-KIyUDY!V%oh_lNCrR*yhvhgn4vakI4v9=4TGs7 z7QGwIAyZ~L&y(j2QXCSovZsGIo8&UJK~g#1oRnF6CA-q{02dqxYm6=NzQLNo`&W=K zI!{l;{hXS&_UR!oga17xVR)!H9^*DRWt8&*IfXlt8UOR|;s@;)Wiok*cY5gT% zA3B5n-Gy<+=O3a2!GC4_C&KmW^FIQ+EuMeH`6uk(B*u}kx(fVD@&EKW{w6k(?f=1^ z!qpL;sqdfq?@*^ZTys8S?$h-(KDZ-~a6TeK-jCcjOCf&JtW6 zAw1oMzrFkq=Ff-D{Le0L*Uh&2-b;PF_KWKJE{##!x6`xBCC824*UtYy|I52!{dS5l zSWiG;PY`5~pRfPn)6?72<1hRC@cUaI|F0E@Kd%7>1lMOEe!f5J5uP9Z#pw3@pqyi6 z15x6)r__j-@ER#G>?U|hfseNFtuo(3^*_M>bW5yaC4ABGA8x66PhZY1z792P9336K zx#Yfk3{~>)sS-F=#d9uwg8x>fF=0@r^6nCO{GQs>&hx)kDeYdn@WZT5>DjsrZc1cgC6kg`#rk{^62N0^}W3Q`Ez&k@#po=`Ms%5geRyEWBz% z_9Y_%e$Azm921JxF&dWyyvng{q;jxaCi!$b@eB)ILZJ31g8MG# z+T}%=MgGo!5Q7$UK^ERg-$~q3^R}#dj-C7DWFcDvW%LL5-vm<7L$ik=G|c!%X5ZyT zK{;Rp%GZ;054;@1si1y*CX$itd0^4x(uHz41Y+L9MkzM}hBGaO4T44DM29;gl&%k` z$)Q!Dc`X-hzBREY?=Ll-(^ekWT_WRsO}5A~zG0mVfR|;c6CU73H-jH^<_OPnMc%+Z zt{?lB)71})M&Nz#YiCQRgcw<6=%(^9K+Q2HP_3;uW)UU%W*_D)tQZp|dI2Vg^k?|E zDG0IJ>Qa1hu!@(Y^klrfO$sFo@Ar08IB#Dw&xV;>*6=XQD7(_EVhSN@Hc^M!NxL#U z2{N+&Dh8RGc8I*NcEf8+A42&vvyT*Dmih#7&NZWtXgGw*{WLTdtB_M}mifL5A7+@+ z&B8s05eK*Ap|rE1#I{2cdNw8z67~@DbqX>kUqMNPqY<9K|ENgSNH+koS8 z6E|MC*EaQT6;mP|G_<(O9R{fi3}H-u+0vB8)Q4kc|AvE9U~Kn%BbS@zZ^!;uZlZJ6 zB!epJeoQ5qj&pQ)oG{%#T$1_O8u+;}Fgm&9h*$e8h}D%Ph)`@A!9o~A(nmmRWQxB<$1oP1)-W7up+B7{LSRTLt>G>}r0E(D z>=tyt^i48XON^+!s3**FUw1|YzHnsIdT;>UuL3~ zf4L{;nxeyNtG^NYm9_Egjs7t9p~=tf9XMQj3!X@p>6*lDuIr)cKqq+(D<0`v6UH0D zr}Mcyr8y4msw#&adwyP|kRYq~;H&efH;09WhZoL?66k?^s*_2tYGIH5=DdvQ_Q=`^ znBZ?lQxti+^em;EPbzYm1g-&PB<4fhvCJ&+zbHeVSaUX7K9R$eOeLxidL*w0BdV9) zjbRP!wQ3;gV_fbvU%1rb(}5e?GhTl8Cy#tTd+ zCzl%>B1fgL%B}_$shZ0P+V-#@KJW#cgWm@oq%BTF&xquuj>JdRk_p9Z* z$6ZUr`$JYNh^VYeN^a%&ba9@jePWp8T721L` zklRfBm}3^0kSo8e)54R@K}?!JWLhk^=PYj?REv7F%9JJ$+PL{RU)@TfhJ=wbK&0*^x&Un|hsxWiG_XaL>7K!3+`4<&I%u^<2`NW2l7@ zM#FXd=EjhB*}?@_cFQpDTLuu8Dqc)S>5NzRK~9yRLc~sqH74mc49^l7pb!B+539VZ zo86W3HE{f#Ug6S;D`(7L0A4!J;02D9p5jZ9HtNi`ggjW*7{Tm!^X}s!aE*%0-24}!f36d}ywroe z!?&izyFB^KG(~Bry7F?&GA5P;NgTnpdpO*shF=WK6SN~U0g_arpSmyhakn8dfF(08 zQJRSo21>hz=LthFRa@WM?p>=flfM4*5e`JVAWQjM{f1eB2HE9>;9Ab97CJ7676trGXiFiRjUX20SV(hgo2GpR)h>Onwc9(WZRpL1lJI*P)keyXW*6=rr)z&9c*lQfm#9BmImtfG5RL z!v3Hi7A)c})-JTuTvLva`u>+Srxhf%3PshJqr3;F>ocleS~=_j_`3|m?Lw{gC9_6y zIzSGR0|ZaEn1NhS&(FMouSLY^6`0Q0M;3iD~zGD)k&w z+P%WDUr-JQBN-EH=LM(gi8aPi+&GyL;g3_ zO@L@f)~r{C9mq(@DrQH~apSyK0m6mRhv5@98`C_?0K78*ng{SiVLyc?0r7l1{?=-T zZF*Z1+oZo(hyI9W%$*H%w)D(~s)qGqDab`0zzq}bnU@$za=my4Z`2>27UIGlEhzC5(u_zd0v<(}xhZVk z4zfy>B^>&1Fs9#5Koo7C$C#d4B^NR#R}->thsdLg#j&$ALUup$$cNKel$!^UNR%wN zrdlDWOJG2&zfaYhgPg$tY&2S;7G*5{P@ODx)nay%rSLA5h7pTd;-(kylI= z>PS>wco)>hgg)2!#I2-CgZmp_;6M(R9a zvwIq4-xX96Ib8yyK%ziHwzERCGK`K&)LVsFJa1e&CM6Pm%K^15WZ^Me;YDO}>B}%k zTZXBYA8hu#m(i_1C=<$S?wc#^Qy07;vZ=Vc?SPRT~#=Tg`rL8?weO2Fo%=*STk zfRlJ2!wD=|3fWBQ=q1BW#-GOMT$9Ue)b0E}8N~E9aEB%?qzDrUnYPAlW9*?`f{r|C zf96N0i`GA0SR>^^Wuwn-VH|#f%)HtJ;I{ReXZ7Q=YEs!wtg``tpgvk#S4u7!`RvAy z%Ly3o;48<+u~7^BF);?R`n}J57w$6`oHQkB)Z|*NR0|0L5N;?^LEiWUOFavI-sU>z zt~jRTW%>efOfJr%1THNROTsuddbV&0HWQxtE)G=V5Om6=%L`I5#eMnFG8g6_%-q8h zQjuwG@>4~Qj!fc7Q>v|XRb~UT!*f9>`J|^VjDD3sVK3MuxB4j!z=8aGdcE$Qzf*PL5s|6$> z*nY|H3!zNaMyf;16*gIC$3-ZHVfc327jxGv00qD5@-Pu8=Iqn%8n(geoJJ(=Z0K10 z(AsrBl^j=NN7TJMqV%c2`A1_oR?w@M#-NCNZ{SMCHWSSgolQhVgU!U$D@#2GIjfd~2T#}LR7 zW`sDE?dciqjj3MVk)Aj@|50%IlGuz~wU=m8!~rnY3iE zvIdcM_*NP38hWA=nx6U&ybkY3XARmgmxSn2!Pgi|rJjKb)^i_12+20}pn<*A87Cj# zPg)h5iVxJ0PYvlYo3}a+u9@S)isRadAzxh@*^_z$PtQ1H9hEE(%D|k{KQ&*f)_$Py zfa#_RNgV_f($hAR-exh9z!?CQ)&J0fYUF1<$V+Xem zRd$qIF2#taN5|WD1f$dOLQb~+EW*f7QxPr1I9QNZHGrJKZw<{t7X(oqxwJfcQ9@e8 zwqCQZhGoZyqO;P;sn~Q_C}b|ff1cqya7nq2IF}X_I1?j$f848#QTJRb?#$o)xi^*7 zX30GlI!X>+e_vFKVNf5ZJ)YL|O8H_NnjWtJ93EALjm?(ftp)B}ui){~%lrP~_4?}d z#~MmL%M_qhGR-|Mb@?gy%?U zNy7S@=PS62dw3Fi(SzF0#=hlv=|1!Zu4Q>qlzXX98dxRFLo7Rq8;#Mfel;s+_k$6 z+MMcw7$c-s7g0qIUQm3j^+5}!L?;+X#3a!lnQX1Az==EA0921dUhT(uYK-vCJ5Vrpi6dv zB6n6)uRg}J>{C6q#gd0 zEZ%|Qwl^4N9XSdcqA3SGF;&U~w98nF0p8%g_CNOq5)+}IFD8=4NlCh z`Upd>b-r$4v7kG4K-fT5G-$GLS8yn*q@Wt%d~#q2x!qvG=;`00}PUKvolLkQo5F09bFzC;N=Ib8BC6s`&>- zGP_RMzGmq!%x9%#H#*rnZwya4?5GC*Ie;~CeLq>oMY?ieKW+WA!_Yo#uA1m0Yk4#w z0=0;ugVVCw>q+jk1`rcx(bB{>9ctnui%a0u!q)h32J(UIWf~P_y!=S~X`g$uvSk-8 zW&*@aJ#?_K1nQOfOZsAUY1oz6T51gW92SfVZus2lB?>4ReN&gaAywx@o2JoFvzU5A zQqK-}K-Uo47+GVO;iGBL-YU>Y5;A$*VT8MzkhRwIwcC>1A2y{B&aG-qQ%kmRL#*G0 z`uJqnCYp;%2#nkAqV%=^!VxP(y%fLaCF>cn1w1t#^eZl|0W#gYG(Qo9N!?`FXyI>L z9tlyvN=-9iWcihZe1tEm-ew3TY9;mA*0izK{)m-Lsqgvk6R(UK@XbT(VIhgu2cp57 zWInKCj2gc+e&2FtGGMnwQn)*l<}L)9l^kojW}oX^8k?I%lRH?f+vhOxxFyhHw+V16 zmDl5w+RZP=*IChfcmZ4H1 zi>dfN7dySIagI#8RBb3NO!k@~X{&ingZF~hK^SWW9+8GkZL}r!@?Y|mD-+)ok=02xRN@ttms<(%H4zru%D#PEe z)7AK`INoxyOOsnK;Nkr6N_B{^_pNPORtGtR%aVI9l~?;nmP1PGH{3aNz_IdWBPP(g zjG`hu6w)>~faRj``4N1TX0*h%#HF88Ov$ z9|+UdME@WLp1aP4J{a9ocyg8Ux$_zB`vaPrf3T)+=+=&)W(+Q8PcICzWOLU(!BMWu z>Lp9l;Y~~^k6euSn;rFTMhkS9U}JVQZ}4tH;uT!f{IX7LoBp;b)YI*sKQ~A3FDEB2 zmf)3tVt4(v@mj>^dU!!uX!glv2+2g*a=hnhQtk14?7@OPc{3D=#iI{TQ?9iQTI|*f zLaUDlote1uwJ3dQKl;~{za zcrDSxX;D>k@!0wEm`q@PuyQG5@Kg8|s0V5K2%`x?%Mcx9UAi>?x#sw$)Id-#=i|+3 zGkgfKemw!3Q{2WyH+(R?=``5S6ea*J+|pQNyk5TQo`qs6EtKu73$u^?HBPRWA$nRa z=SpR5wOgW9{{^NL#mrwWtq;lrF=ZFz;2O@(fmU2cwXdg>A#r=0Gxq@#L>UpI>=CA6 z&528-`9T{FaGk!1f-o{_t{wOT1u#5H9R)!aJ%l644M1`}M+z_C2t%+-~cG zlPOi3yL7J;SpZ0H6kH`^Dj`SCC3k6@3nolbV=n=nk~klPA}BS4nlZaV+BQHQ9CY$7 zJe&11%Y|26>2Vh)bswJm?1{Qod3spSl7!5~F0?f*sF3qxr>=2~AP&kmE+E`f8rC3? z0aJ$#MzQ0SX+yU9fK5ma0(6%tYt$)o^sjv{W`yhCrUB!tdzQmnNhVEo9h4kv`%av zuKoFEJ&VmP+#N_-qh5EdYrMs!1~=>vkt2VmS|VRIGX_oUywzJ>rM-a|lcXb}V2+j} ziY&Mo5b+B3i7>^`*nF@OI|VUpsU?t!bG$!6bz%Q89BFAf|3=7L4PwacUBj_Qhiz(H z3k!Vls$BGvEDzQ)4~l8H$}MhE{P0yps;f}=*r|t$^+kc?uUvF6Q=PFC_4ZvQ(0Aij zv6AKh;5agu)Ace9ogu@7^9%Mmd>IuKqA(zD%$F2QRyGut+!jIW)vLk@k}3RM$^_rt5e&DFHb z1f@Pjs+I+aAmJHnWa0IvXXOM2&3{!(rf^}EuVA(+*SHbKc0FI+?_N*o&diy z;HV-i{30qF5Dn6UKeS;12La}N)Mid@uO~~wC1icmCrFyNus?IuyNsGi3%alHl+fYX ztp|fg_D0#Ar2e!ZY?!Q~5oXI<)s%pY!qb8y=2R$7AK7R(Uc`jk@OrQqpazSod`LjXpE(i{yRxaq3rjA z3qI6kc3LY6-#DOU~hBf7L?K)&)^&kE!I%eyVS$OnJV z*K4q6A79`1BZKC0bJ#yU>?U_0q^n=8o zOoXV~G#blkL3y?y%eHU9jbbSmC$a6AQ-`&*2 zGNxE1lEPFOO2sc@Ocx66!<1^(hC3Xgk1b#>g1L3Gz3jGfiaWu%j}nu}4=rF;ZuM;7 z!pzAM+Pt~Mr=DdnG!0v{jVoLd!|tWORWdt7B#B@76&68UG6+Hh+1aVJdeQBk917PLn zm1zltQbwHR#m=+9+g}beYuYKTv|;x?yU#0~oiF=!l35~FjIZGOpfjW^hAjuuIyU~zr{qXGC$Y6*J}eZuvCU>e%NrFOM8}?7KRqYdG59- zaZ|qK&xvoD;z!=)hxMXbU%=^2lZ}{Ky7mOc>^)`y1JdEmIt-p3O$_uaSdi=KV{h`; zM0-U%cWTGxW{RaLSBu&AEJoqD<71F;y6i+>6gY09y+xRDiAw7vS@nx7%h!8 zw&r~{-3S%{S~rikL7q)8!M>y#8=ECRDHldk&8%5RKBAL5aqxV+T^A;<%m>lxIi(FJ zUXx{QPS14-?Zr%Mo6%|mpVbkf^Q!*r<^7BiNrc_p*v#6ptnsyOHnS_N-aIcEQq9Iu zV`lM=(sIPEQXJL+fkP3y>xNzetZ8$iNg(XgBwKFF-E{YC36m3kSN<;zUo+MFlDnUw z=YzZ}jC@-$M^$tS*0@mj)hLNJkrH~EJF&}K zp4Zd;vYI}nzX-w=zdLBz5~(?pApm|>+bm#XCMUYeGZZ&-T(}pySZ+ocRhvLHyea9i zGKfzjDHCm*2nwq-5jSy#4%1Hq$9ARAt^A>tHLY|zFc zHA`^(UFYlAPeq!>TWd>0L?hmc(N1A&0I<%{hMp2XWGc@fsnBw1$#Ypg)6a93^8~K- zc9tBNflBW~fXImuFrr6uRwhRy$W!h1a2~9I1%T1fe&>)wg@)Wlm5ty&MKpOafpNpbD)32_1K(92&V(&6B7vqu3Bd8$PR{Q z&E3c^$G18mIdogmu-xqqL3E34DsflByjl^nn(7z+c=_`fxZ=8obQUmg#dZA+1v}5`ePt~`yDg6K|kR%V{CGh1e*B|nC@#J z-7vhv2}Bzz&(BhVRpoP_bU#CxMv#9ufZ$LW-kuDBSuW;7ltiN_!=S zuIkx}yv8+&5k(MT1IhpCuwOVGOy9Xvhh~F8CPSwb;Up3mK6AxiS&DG1D~%1G{*Iq_ zWUebAc*G9rRT11b={c=;pE-uCWl_j{2s0KFRws}J?-9mhfori=%I=uHMF3O~B=4JV z`T!4lWP4Xu-=&rQ*M-978h0;`uSaKBD=TM!b1QxPp%VzlSD_p~#BzfFQ*=iz*I+x< zip1uzUTj8Yb6c)3C$l=QQJR%rT-7ShORZ>Z7U!c;GB%I(QZE{r!G5R_jm`asLq6H5 zMry^Cf8X`>hsWF9?bGj<6Km!}{@;qYGHBnPJ=18tJLu(bK#vn_EOF*9E4^H1V+olO z044~qqItoquEI=I27`}e@ZUzqCX{u+d293D#M{PW1?N z+xv>zK6X{0Ech}0Oaj5od$|C`5bVY^*cWbs^Mp~Z`gpm3k)33>nL=xkvPSmNWa15? z8!H$YY(no1M-&P7M+>Eeh+(G!KJ=3Zt<)dja=>l*tg0!s`AzSOXfz4xq|L#ep3w&( zXju-WobeyekQ6J%ICzo{qKwaNw8PKD4UqS_f3w$K3UxG(R`5Qc8kO*RTj_?-`gLg}U)qzyhR_f!$8?%3SeTU1zT2vI1L|ev1x85g zR~<)`P~9;>9V-mZj~Q^dI>r5<>pc#}?P~K&CHjrchfFj~gF5<&uB#}y=*I*KAuHPj z%Q~$tB!q~|o9ew1Z=DjpKCD`2?fRl9*iGjupD=M~W3W4tOxJXH9$W!>o&5po@bTL2qO7}CiHPDOz@hQdD?d4BTYjFTYbhdAkb;}m7VWTDPE1gZ zT?!}eBcoUuN@yL8#ZBD0iN&29)uJs`IW?6#WREMS&Yw04ac8E7 zTdCfw*dulvcrtDBGK3w$A^(7k8k9#YA-PVVkq+2%Oq^kT3!WR8(_ssrX8yz+2FGI$ z*Zz~sD#M)Ev=Umk_c63oeQm~d38b6esw_r_iT<#nkgELZZ8ft}Ud6T3tFP+Z>(le@ z>F59PcFQ!))`x$Xh2T6L{^M{^_pZ<0d0GzK7~{|I_vZdpF#pC?8%DQ&16p>R7`;7m1`>z8Y&4gc5a8*O%qpp>Z_^AgzXD*fr&~}T7eg0+> z@DzJ)QVsD(qkDh6z5V*``T85axRfp_ecOY1l1GL`^Qk%mKcW8=>Lx z2I}VkstP5TOTQfb+h}*@UZt^P)=sFFcUEjzNIg0Rk+LUpHL#=H2i`$w;!wJtPVxIL zoNw%jWXfVLEu{QpNhbyl>pZ=3f_QUBM9E6caaWVoRocja>e6Wz8LRkRhJFk!ImzO% z*g!ZINM@384dw@T`K`jIjD85|YsXypfk+w|6afJV}uG~Q8ivQCYvttxs0WyOo#ukp& zhFsHni=Vx&{5`=H{^#YubO0@ifr^LEfkVJFe<1cSyesW7r5ue*LfkFCDQgP6T>#!` z!!dsq-@X_+twTno_IL5^Lfr2aWL{kEC2&fN_X&fg?+M57&NoF9>tktzTHbwf`EU}r z>=J0eV8Xq>*Fyfl?2ybwE81YHPeDuB>*M-4p~cyrCVWdTwkUSOB&M1G4N+z8+OYK2S_H>;V5x zdobYMTv=eH#7wVuO{P%HO{;s@;{F|Ld3>k!Dxn#^Fg1yo^3{?-9tqm)!s{DT;cfPX zBU|TefsI--TzHQ)PsF9)SPUCL+wu(xG0=P6b5m|g>Zqs*80vE51H6|Chw7m|moZ2j zF%RqNq#NZ?lyhLBZ=c3h??POdxCYzaa&VtLk+Z=-u#6@?&;eoJJ+({yCg0JMS%QEE zz60xauX$xnSuN3j>2&tpB=vWq+NqQvR~-U1x-N7ted(`B1L^+yS%G@{dVhglRp?$` z>3#{SXG$=F(qH~F$_^qz7^qC)(4B%8jui#bq_!`TLKtK#T#9qUl*y~_V0ynizU(kK zMYvSQ!V*u7LG%km#}Lq+n2*d~95FI5e9GgZ$72vMz14xcFIV|>e&sna)9f$vfWEH; zG#@PonDABXTL-G&L~UXJA3#$c9<{6!M_*Ui z(-Y`_bo=%Ab&snZTqyDLPJ~OmT;~`0WQ2d$zeKJ@RV-lf0z@kh@=| zr2eVJq`F9ZqQ(fzHMzf(LVx!{V;L~2Ln17)w@P!}ITvxUS1_v7Z#*r*RRM><;zB8g zze}Z7t->Yu@Sp=mv!Gq~8l$Ppwwd4B$swA)dRG(Ad?2~N;KWG*5ssXXJ3R##<*fOr zS_$~6=`bTXI-x2vkG}rR9coo7H5U?cIUTWSWc!?n+Rw>UzpMy+XFxGU78%r7#t-j! z3r><-wFvEzMCV%lfe*?AC@yr3jiUzsH!kO!VwI2s*7Ogy+BF~pRmiZ;Jn&{QiMvp{ z1BwL)6_m#etzL?)lMq~5B3iGSy&3TbPt)hFV*}`TopdbA+MoQ-0MaZyQchSL>tX_1 z2Pv;lay6;#M=M7H4D~x=4@Ux@UT;SNdHA&ZpW<3$E5kq-mBx)qunvjb=s5pZ3itP& z3GT~5aiUp!1AqP<0e=2Io%N>|#HuRXsTSvpq`HmYCI*6_;1qN8+I&rz9agEX7{9m} z6Wa=DJMZ{kDoWU$EO7;vzoi_sU2;k?$1in-RND+PoRoNO&Zt=R7GEd<4^ ztj3@rDgbB3l?b#|3dgLrMTf9fVmeAe;!+0|Pc8G~CAzIMyesDXc}00Qoh#Zbg6)xP zh@bK}s~|-~;#q?_o_jvsG>CTxXIG>7@hj!&?zQ(vpWkV6AG&;BMrQwSueQsR4+l8m zTHN+zzx>}kN-?}%pYI;;x3GJ@hEv4@#NSog+d~g(1tMZ4*yN^Or0{U!PopVZZFs8~ z@)jF!DtbMXM@NcZ_x=f3p>Q34+|FBq_4n=h+N<;C#RK;0SEc=-AhfVP)sbzxt2Cl_ zMF-DvDQ^imisf!Ew(9@>2XXiE<@fK|qr0=Sl~IAuSDyR>+n3o-{LdQn0bbT*Jfm~w zOvikSdScU5j|F^Kg&efGY`s|RyK@^J8Xk1C*tp)xkY+P6x`|V$kt0dFgw!_@1`p^5L-(3nGz8^;hvkL!P z00aU1{`1RwJXKrv93$D?N`Y9aLb_`OXSfGbks{RT5#nVUSw0P)Foks!eK9A+HBjc7 zbz1YHwvXzbksPv>BdYP2T%&rr$A6yQ-#-3+@2cM$?e~}L=?G(iI0T9f%WS>HXpY9N zPlF!pCbaKpYR#fVYDsm|JdA3jh=j#u-M0rE!R!GRUM}%LO+mS?rbNzd=(?tHhgPv8IZ{S9CBq$bcr z^T}-pOP6pxY|ktTtddDOF)q#UnovYDc^4dV2>%358IXa+W*ij2>+J2UWX<@00lN^4 I7xR1r068R23IG5A literal 0 HcmV?d00001 diff --git a/test/input/TestOrderAndOrient/expected.ebov.ambig.fasta b/test/input/TestOrderAndOrient/expected.ebov.ambig.fasta new file mode 100644 index 000000000..39bd9e9fa --- /dev/null +++ b/test/input/TestOrderAndOrient/expected.ebov.ambig.fasta @@ -0,0 +1,311 @@ +>KJ660346.2_contigs_ordered_and_oriented +GAGGAAGATTAATAATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGT +TACTGTAATCATACCTGGTTTGTTTCAGAGCCATATCACCAAGATAGAGAACAACCTAGG +TCTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGTTGAAAATCCCTTGTCAACATCTAGG +CCTTATCACATCACAAGTTCCGCCTTAAACTCTGCAGGGTGATCCAACAACCTTAATAGC +AACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAATTAAC +TTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCAACAACCCTAAAGCCTGGGGT +AAAACATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGAT +TCTCGTCCTCAGAAAGTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCAC +AAGATCTTGACAGCAGGTCTGTCCGTTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCA +GTGTATCAAGTAAACAATCTTGAGGAAATTTGCCAACTTATCATACAGGCCTTTGAAGCT +GGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTTTGTCTTCATCATGCG +TACCAAGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAGGGCAC +GGGTTCCGTTTTGAAGTCAAGAAGTGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCA +GCAGTATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACG +ACTGAAGCTAATGCCGGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTA +GTAGGAGAAAAGGCTTGCCTTGAGAAGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAA +GGACTGATACAATATCCAACAGCTTGGCAATCAGTAGGACACATGATGGTGATTTTCCGT +TTGATGCGAACAAATTTTTTGATCAAATTTCTTCTAATACACCAAGGGATGCACATGGTT +GCCGGACATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTTTTTCA +GGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTT +CGTCTCCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTCAAGGCT +GCACTCAGCTCCCTGGCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTT +TCTGGAGTAAATAATCTTGAGCATGGTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGA +GTCGCCACAGCCCACGGGAGCACCCTCGCAGGAGTAAATGTTGGAGAACAGTATCAACAG +CTCAGAGAGGCAGCCACTGAGGCTGAGAAGCAACTCCAACAATATGCGGAGTCTCGTGAA +CTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAACTTCCATCAGAAA +AAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAACTCTAAGAAAAGAGCGCCTG +GCCAAGCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGGACATTACGAT +GATGATGACGACATTCCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAA +GATGATGATCCGACTGACTCACAGGATACGACCATTCCCGATGTGGTAGTTGACCCCGAT +GATGGAGGCTACGGCGAATACCAAAGTTACTCGGAAAACGGCATGAGTGCACCAGATGAC +TTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACCAAGCCAGTGCCTAACAGATCG +ACCAAGGGTGGACAACAGAAAAACAGTCAAAAGGGCCAGCATACAGAGGGCAGACAGACA +CAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAACAATCCACCATGCCAGTGCT +CCACTCACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCCTCGCATGCTG +ACCCCAATCAACGAAGAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTT +CCGCCCTTAGAGTCAGATGATGAAGAACAGGACAGGGACGGAACTTCTAACCGCACACCC +ACTGTCGCCCCACCGGCTCCCGTATACAGAGATCACTCCGAAAAGAAAGAACTCCCGCAA +GATGAACAACAAGATCAGGACCACATTCAAGAGGCCAGGAACCAAGACAGTGACAACACC +CAGCCAGAACATTCTTTTGAGGAGATGTATCGCCACATTCTAAGATCACAGGGGCCATTT +GATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGAT +GGTAAAGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAA +AAAGAGGCCATGAATGATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGG +CCAGTAATGAATCACAGGAATAAATTCATGGCAATCCTGCAACATCATCAGTGAATGAGC +ATGTAATAATGGGATGATTTAATCGACAAATAGCTAACATTAAATAGTCAAGGAACGCAA +ACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAATAAAAGTGATTCTTAG +TTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGAGTCTT +AATGCAAATAAGCGTTAAGCCACAGTTATAGCCATAATGGTAACTCAATATCTTAGCCAG +CGATTTATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACA +TTTACACGATCGTTTTATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCAT +CCTTACGTCAATTGAATTCTCTAGCACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAG +CTGGCCTAACAAGATGACAACTAGAACAAAGGGCAGGGGCCATACTGTGGCCACGACTCA +AAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGAGCAGCTAATGACCGG +AAGGATTCCTGTAAACGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGCTACGC +ATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGCAACAGTCAAACCCAAACGGA +CCCAATTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGT +TGTGCAACAACAAACCATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAA +TGGTCTAAAGCCAGTTTATGATATGGCAAAAACAATCTCCTCATTGAACAGGGTTTGTGC +TGAGATGGTTGCAAAATATGATCTTCTGGTGATGACAACCGGTCGGGCAACAGCAACCGC +TGCGGCAACTGAGGCTTATTGGGCTGAACATGGTCAACCACCACCTGGACCATCACTTTA +TGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACTGTCCCTCAAAGTGT +TAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAA +ACCTGACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGG +AACTGCTTTCCACCAATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATT +GGACATTATTCATGCTGAGTTCCAGGCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGC +CCTAATTCAAATTACAAAAAGAGTTCCAATCTTCCAAGATGCTGCTCCACCTGTCATCCA +CATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTTGCGTCCAGTCCCACC +ATCACCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAAACACT +TGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAGAGGCAACTAATAGCAGAGGC +TTCAACTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGTATCAGCCCTA +GATAATATAAGTCAATTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTA +AAGATAAATGTAATAGGAGCTATACCTCTGACAGTATTATAATTAATTGTTATTAAGTAA +CCCAAACCAAAAATGATGAAGATTAAGAAAAACCTACCTCGACTGAGAGAGTGTTTTTTC +ATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATATGAGGCGGGTTATATT +GCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGCCAGGTCAAATTCAACAAT +TGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGAGACAC +TCCATCGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCAGCCACACACC +AGGCAGTGTGTCATCAGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAA +AGTGCTAATGAAGCAAATTCCAATTTGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTA +CAGCTTTGACTCAACTACGGCCGCCATCATGCTTGCTTCATATACTATCACCCATTTCGG +CAAGGCAACCAATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGGAATCCCGGATCACCC +CCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCACCAGT +CCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACTCAACCACT +GCCTGCTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGTTGCGTCCAGG +AATTTCATTTCATCCAAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGG +GAACAGTGCCGATCTAACATCTCCGGAGAAAATCCAAGCAATAATGACTTCACTCCAGGA +CTTTAAGATCGTTCCAATTGATCCAACCAAAAATATCATGGGTATCGAAGTGCCAGAAAC +TCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCCAAAAATGGACAACCAATCATCCC +TGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACCATGGT +AATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGGTTGAGAA +GTAATTGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGATAGTGATAAC +ATCTTTTTAATAATCCGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGT +GCTTTACACCATTGTCTCTTTTCTCTCCTAAATGTAGAGCTTAACAAAAGACTCATAATA +TACCTGTTTTTAAAAGATTGATTGATGAAAGATCATGACTAATAACATTACAAACAATCC +TACTATAATCAATACGGTGATTCAAATGTCAATCTTTCTCATTGCACATACTCTTTGTCC +TTATCCTCAAATTGCCTACATGCTTACATCTGAGGACAGCCAGTGTGACTTGGATTGGAG +ATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCACAATCTAAGTACAGACATTGC +TCTTCTAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCAT +CTCTCTTAGATTATTTGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAAC +CAAAATAAGCTTCACTAGAAGGATATTGTGAGGCGACAACACAATGGGTGTTACAGGAAT +ATTGCAGTTACCTCGTGATCGATTCAAGAGGACATCATTCTTTCTTTGGGTAATTATCCT +TTTCCAAAGAACATTTTCCATCCCGCTTGGAGTTATCCACAATAGTACATTACAGGTTAG +TGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGATCAGT +TGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGANNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN +NNNNNNTTCCTCAATCAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGT +CTGATGAGGGTACCCCTTAATAAGGCTGACTAAAACACTATATAACCTTCTACTTGATCA +CAATACTCCGTATACCTATCATCATATATTTAATCAAGACGATATCCTTTAAAACTTATT +CAGTACTATAATCACTCTCATTTCAAATTGATAAGATATGCATAATTGCCTTAATATATA +AAGAGGTATGATATAACCCAAACATTGACCAAAGAAAATCATAATCTCGTATCGCTCGCA +ATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTGA +CTCTACAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTATGATTTACTAA +TGATCTCTTAAAATATTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGT +GGAGGAGGTCTATGGTATTCGCTATTGTTATATTACAATCAATAACAAGCTTGTAAAAAT +ATTGTTCTTGTTTCAGGAGGTATATTGTGACCGGAAAAGCTAAACTAATGATGAAGATTA +ATGCGGAGGTTTGATGAGAATAAACCTTATTATTCAGATTAGGCCCCAAGAGGCATTCTT +CATCTCCTTTTAGCAAAATACTATTTCAGGATAGTCCAGCTAGTGACACGTCTTTTAGCT +GTATACCAGTTGCCCCTGAGATACGCCACAAAAGTGTCTCTGAGCTAAAGTGGTCTGTAC +ACATCTCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCATTTAAAATTT +AGTGCATAAATCTGGGCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACC +TACAACGAACATTACTTTGAGCACCCTCACAATTAAAAAATAAGAGCGTCGTTCCAACAA +TCGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGTCTAGACAACAAAATATCGATACTCC +AGACACCAAGCAAGACCTGAGAAAAAACCATGGCCAAAGCTACGGGACGATACAATCTAA +TATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTTCTTAG +TTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGGTATTGAGTTTGATGTGACTC +ACAAAGGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAA +TGACAAGGAACCTATTTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGC +TGTGGGCACTGAGAGTCATCCTTGCAGCAGGGATACAGGACCAGTTAATTGACCAGTCTT +TGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATCTCTGATTGGCTGCTAACAACCAACA +CTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGAGCCTAAAAATGCTGT +CGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGTCGTGA +ACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAA +CTCGAACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACC +GCAAGAAGCCTGGGCCGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTA +CACAAGGGTCCTCGACACGAATGCAAAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTA +TCTAACTAAGATGGAATACTTCATATTGGGCTAACTCATATATGCTGACTCAATAGTTAA +CTTGACATCTCTGCCTTCATAATCAGATATATAAGCATAATAAATAAATACTCATATTTC +TTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTGACACC +CTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGATTCCAAGACAACATCATAGAA +TTGCTTTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGAAATGGTTAAT +TGTAACTAAACCCGCAGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTC +CATACTCGTAACTAACATTAGATAAGTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAA +AAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGAAGCAGTTGACATTCTTCCTCTTGAT +ATTAAATGGCTACACAACATACCCAATACCCAGACGCCAGGTTATCATCACCAATTGTAT +TGGACCAATGTGACCTTGTCACTAGAGCTTGCGGGTTGTATTCATCATACTCCCTTAATC +CGCAACTACGCAACTGTAAACTCCCGAAACATATATACCGTTTAAAATATGATGTAACTG +TTACCAAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAA +TTCTTCTCAAGGCACTATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGT +TCTTAGATGAAATTATTAAGTACACAATGCAAGATGCTCTCTTCCTGAAATATTATCTCA +AAAATGTGGGTGCTCAAGAAGACTGTGTTGATGACCACTTTCAAGAAAAAATCTTATCTT +CAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATGACCTGGCTATTTTAA +CTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACGTGGTTTGTTCATGATGATT +TAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCACTGTTAC +CACTGAACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACATCAGTATTCA +AAGAAGCGGTTCAAGGGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAA +TGTGCAAAGATTTAATTACATGTCGATTCAACACAACTCTAATCTCAAAAATAGCAGAGG +TTGAGGACCCATTTTGCTCTGATTATCCCAATTTTAAGATTGTGTCTATGCTTTACCAGA +GCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAATCATTAAGTTTCTCG +AACCATTGTGCTTGGCTAAAATTCAATTGTGCTCAAAGTACACCGAGAGGAAGGGCCGAT +TCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGAAGAAATTACAGAAATACGTG +CACTAAAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGG +AGATGACGCCACAACAACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTG +TGCTACATAGTGAAACAGCAATCCAAAAAGTTAAAAAACATGCTACGGTGCTAAAAGCAT +TACGCCCTATCGTGATTTTCGAGACATATTGTGTTTTTAAATATAGCATTGCAAAACATT +ATTTTGATAGTCAAGGATCTTGGTACAGTGTTACCTCAGATAGAAATCTAACACCAGGTC +TTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACTGCTAT +GGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAA +GTATTTTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGATGCAGTATTCG +AGCCTAATGTTCTGGGATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAAC +AATTTTTAGAGCAAGAAAACTTTTCTATTGAGAATGTTCTTTCCTACGCGCAAAAACTCG +AGTATCTACTACCACAATATCGGAATTTTTCTTTCTCATTGAAAGAGAAAGAGTTGAATG +TAGGTAGAACTTTCGGAAAATTGCCTTATCCGACTCGCAATGTTCAAACACTTTGTGAAG +CTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTACGGAAC +GTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTCG +GTGAGCATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTG +CATTTAGGTATGAGTTTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTA +AGAATGTTTTTAATTGGATGCATTATACAATCCCACAGTGTTATATGCATGTCAGTGATT +ATTATAATCCACCGCATAACCTCACACTGGAAAATCGAAACAACCCCCCTGAAGGGCCTA +GTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAAAACTCTGGACAAGTA +TTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGTTTTAAGTTGCGCTCAGCTG +TGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTTCCCCTTAGAGACTGATGCAG +GCGAGCAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAG +TTACAAGTGCCTGTGGAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTA +TCTATTTTGGAAAAAAACAATATTTGAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGG +CTACAAGAATGGCACCATTGTCTGATGCAATTTTTGATGATCTTCAAGGGACCCTGGCTA +GTATAGGTACTGCTTTTGAGCGATCCATCTCTGAGACACGACATATCTTTCCTTGCAGAA +TAACCGCAGCTTTCCATACGTTCTTTTCGGTGAGAATCTTGCAATATCATCACCTCGGAT +TTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAA +TATCATTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGA +AATGTTTCTACCGGAATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTT +ATCTCCGAATGATTGAGATGGATGATTTATTCTTACCTTTAATTGCGAAGAACCCTGGGA +ACTGCACTGCCATTGACTTTGTGCTAAATCCTAGCGGATTAAATGTTCCTGGGTCGCAAG +ACTTAACTTCATTTCTGCGCCAGATTGTACGTAGGACTATCACCCTAAGTGCGAAAAACA +AACTTATTAATACCTTATTTCATGCATCAGCTGACTTCGAAGACGAAATGGTTTGTAAGT +GGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGCCGATATATTTTCACGCACGC +CGAGCGGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACATTATTAGCCT +CTAAGATCATCAACAATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATACATT +GCAAAGGTGGAGTCTATGGTTTAGTTATCTTGATCATTGTGATAATATCCTGGCGGAGGC +TTTAACCCAAATAACTTGCACAGTTGATTTAGCACAGATCCTGAGGGAATATTCATGGGC +ACATATTTTAGAGGGGAGACCTCTTATTGGAGCCACACTCCCATGTATGATTGAGCAATT +CAAAGTGGTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCCAAGCAACC +TGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACATATTGTTAGTGCATGGCCAAA +TGCATCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGA +AGATAAGATAGGGCAACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGC +CATTGAATTGGCGTCCCGTTTAACATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCT +AATAAAACCATTTTTGGAAGCACGAGTAAATTTAAGTGTTCAAGAAATACTTCAAATGAC +CCCTTCACATTACTCGGGAAATATTGTTCATAGGTACAACGATCAATACAGTCCTCATTC +TTTCATGGCCAATCGTATGAGTAACTCAGCAACGCGATTGATTGTTTCTACAAACACTTT +AGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAGCAATATTATTTTCCAGAATGT +TATAAATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATAT +CCAGTATAATCGTGCTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTCA +GTACTTAACATACACATCTACATTGGATTTAGATTTAACAAGATACCGAGAAAATGAATT +GATTTATGACAATAATCCTCTAAAAGGAGGACTCAATTGCAATATCTCATTTGATAACCC +ATTTTTCCAAGGCAAACAGCTGAACATTATAGAAGATGACCTTATTCGACTGCCTCACTT +ATCTGGATGGGAGCTAGCTAAGACCATCATGCAATCAATTATTTCAGATAGCAATAATTC +GTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTA +TCCCAAAATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATAC +AATTCTTCGGACTAAGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAAT +TCATAATCTACCACATCGCTCATTGCGAATACTTAAGCCAACATTCAAACATGCAAGCGT +TATGTCACGATTAATGAGTATTGATCCCCATTTTTCTATTTACATAGGCGGTGCTGCAGG +TGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGTCCATTTCATCTTTTCT +TACATTTGTAAAGGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGATAGTATA +TCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTTCCTCCATCAGATCGTAGAACT +GCTGGTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAATGATCATGTACA +TCCTCACGACAATCTTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGTC +ATTGGCGTACTGGAGGAGCAGGCACAGAAACAGCAACCGAAAAGACTTGACAAGAAACTC +TTCAACTGGATCAAGCACAAACAACAGTGATGGTCATATTAAGAGAAGTCAAGAACAAAC +CACCAGAGATCCACATGATGGCACTGAACGGAGTCTAGTCCTGCAAATGAGCCATGAAAT +AAAAAGAACGACAATTCCACAAGAGAACACGCACCAGGGTCCGTCGTTCCAGTCATTTCT +AAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAATTTCGATAGATCGAGACACAA +TGTGAAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAAATAATCTCACA +TCGTCTAGTCCTACCTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAA +TGAGTCACAAACCCAAGATGAGATATCAAAGTACTTACGGCAATTGAGATCCGTCATTGA +TACCACAGTTTATTGTAGGTTTACCGGTATAGTCTCGTCCATGCATTACAAACTTGATGA +GGTCCTTTGGGAAATAGAGAATTTTAAGTCGGCTGTGACGCTGGCAGAGGGAGAAGGTGC +TGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTCTTCAACACGCTAGC +TACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCT +ACCTGTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAG +CCAAATAACAGACATAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAG +GCAAGTCGAGGTTATAACCATGGATGCAGAGACGACAGAGAATATAAACAGATCGAAATT +GTACGAAGCTGTACATAAATTGATCTTACACCATGTTGATCCCAGCGTGTTGAAAGCAGT +GGTCCTTAAAGTCTTTCTAAGTGATACCGAGGGTATGTTATGGCTAAATGATAATCTAGC +CCCGTTTTTTGCCACTGGGTATTTAATTAAGCCAATAACGTCAAGTGCCAGGTCTAGTGA +GTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCA +TCTCAGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGGAGCCCATA +CTGGCTAAGTCATTTAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCCT +TGGTTTTCCATCATTAGAGAAAGTACTATACCACAGGTATAACCTTGTCGATTCAAAAAG +AGGTCCACTAGTCTCTGTCACTCAGCACTTAGCACATCTTAGGGCAGAGATTCGAGAATT +GACCAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACATATCACTTTATTCGTAC +TGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTATTGTACA +AGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAG +TGTGTGCAATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGTTTCTTAGTTCA +AACCTTATATTTACATAGAATGCAGGATTCTGAAGTTAAGCTTATTGAAAGGCTGACAGG +GCTTCTGAGTTTATTTCCAGATGGTCTCTACAGGTTCGATTGAATAACCGTGCATAGTAT +TTTGATACTTGTAAAGGTTGGTTATCAACATACAGATTATAAAAAACTCATAAATTGCTC +TCATACATCATCTTGATCTGATTTCAATAAATAACTATTTAGATAACGAAAGGAGTCCTT +ACATTATACACTATATTTGGCCTCTCTCCCTGCGTGATAATCAAAAAATTCACAATACAG +CATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAACATAATAAACTCCGCACTCTT +TATAATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATAGTAATGTTGTA +TCAATATCTTGCCAGATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAAC +TTAATCTTAAAGATCAGTTTT diff --git a/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta b/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta index f811a8e53..bcca56396 100644 --- a/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta +++ b/test/input/TestOrderAndOrient/expected.lasv.ambig.fasta @@ -101,10 +101,10 @@ CTGTCTTTAGCTCTCTTCCTACTGTCTATCCACCGTTTNNNNNNNNNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN -NNNNNNNNNTCAGTTTTAACAAATTGACTTTCTATGTTCCCTTGCCTTAGTTTGTTCCTA +NNNCAACACTGCATCTGCTCACTTTGAGAGTCAGCAGCCCTTTCTTCACCAGAATCTAAA +TCTGCATATAAGAACTTAAGGACTGGAGAGGCTCTTCGGAACTGGTATGTTAACTGCTCA +ATATTGTCATCACAAGTGTCTATTTTATCACTATATAGCTTTCTGAAATCCCCTAACAGC +TTCATCTTATCAGTTTTAACAAATTGACTTTCTATGTTCCCTTGCCTTAGTTTGTTCCTA AAGATTTGATACTCCTCTTCAATTAAACTCTTGACTTCATGAGAGGTGAGCTTGTTATTA ATGCCCTGATGACAACAAGAAATTATTTCCTCAAAGTGTTTTGCACGCTTGTCAGTTAGA ACATTAATGCTTTCAATCCCGGAGAGCCTCCCAGACGTTAAGGCTAAAGATTCACAAAGT diff --git a/test/input/TestOrderAndOrient/ref.ebov.makona_C15.fasta b/test/input/TestOrderAndOrient/ref.ebov.makona_C15.fasta new file mode 100644 index 000000000..3ec1a60e7 --- /dev/null +++ b/test/input/TestOrderAndOrient/ref.ebov.makona_C15.fasta @@ -0,0 +1,273 @@ +>KJ660346.2 Zaire ebolavirus isolate H.sapiens-wt/GIN/2014/Makona-Kissidougou-C15, complete genome +CGGACACACAAAAAGAAAGAAGAATTTTTAGGATCTTTTGTGTGCGAATAACTATGAGGAAGATTAATAA +TTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGTTACTGTAATCATACCTGGTTTGTTT +CAGAGCCATATCACCAAGATAGAGAACAACCTAGGTCTCCGGAGGGGGCAAGGGCATCAGTGTGCTCAGT +TGAAAATCCCTTGTCAACATCTAGGCCTTATCACATCACAAGTTCCGCCTTAAACTCTGCAGGGTGATCC +AACAACCTTAATAGCAACATTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGAA +TTAACTTTGATTTTGAACCTGAACACCCAGAGGACTGGAGACTCAACAACCCTAAAGCCTGGGGTAAAAC +ATTAGAAATAGTTTAAAGACAAATTGCTCGGAATCACAAAATTCCGAGTATGGATTCTCGTCCTCAGAAA +GTCTGGATGACGCCGAGTCTCACTGAATCTGACATGGATTACCACAAGATCTTGACAGCAGGTCTGTCCG +TTCAACAGGGGATTGTTCGGCAAAGAGTCATCCCAGTGTATCAAGTAAACAATCTTGAGGAAATTTGCCA +ACTTATCATACAGGCCTTTGAAGCTGGTGTTGATTTTCAAGAGAGTGCGGACAGTTTCCTTCTCATGCTT +TGTCTTCATCATGCGTACCAAGGAGATTACAAACTTTTCTTGGAAAGTGGCGCAGTCAAGTATTTGGAAG +GGCACGGGTTCCGTTTTGAAGTCAAGAAGCGTGATGGAGTGAAGCGCCTTGAGGAATTGCTGCCAGCAGT +ATCTAGTGGGAGAAACATTAAGAGAACACTTGCTGCCATGCCGGAAGAGGAGACGACTGAAGCTAATGCC +GGTCAGTTCCTCTCCTTTGCAAGTCTATTCCTTCCGAAATTGGTAGTAGGAGAAAAGGCTTGCCTTGAGA +AGGTTCAAAGGCAAATTCAAGTACATGCAGAGCAAGGACTGATACAATATCCAACAGCTTGGCAATCAGT +AGGACACATGATGGTGATTTTCCGTTTGATGCGAACAAATTTTTTGATCAAATTTCTTCTAATACACCAA +GGGATGCACATGGTTGCCGGACATGATGCCAACGATGCTGTGATTTCAAATTCAGTGGCTCAAGCTCGTT +TTTCAGGTCTATTGATTGTCAAAACAGTACTTGATCATATCCTACAAAAGACAGAACGAGGAGTTCGTCT +CCATCCTCTTGCAAGGACCGCCAAGGTAAAAAATGAGGTGAACTCCTTCAAGGCTGCACTCAGCTCCCTG +GCCAAGCATGGAGAGTATGCTCCTTTCGCCCGACTTTTGAACCTTTCTGGAGTAAATAATCTTGAGCATG +GTCTTTTCCCTCAACTGTCGGCAATTGCACTCGGAGTCGCCACAGCCCACGGGAGCACCCTCGCAGGAGT +AAATGTTGGAGAACAGTATCAACAGCTCAGAGAGGCAGCCACTGAGGCTGAGAAGCAACTCCAACAATAT +GCGGAGTCTCGTGAACTTGACCATCTTGGACTTGATGATCAGGAAAAGAAAATTCTTATGAACTTCCATC +AGAAAAAGAACGAAATCAGCTTCCAGCAAACAAACGCGATGGTAACTCTAAGAAAAGAGCGCCTGGCCAA +GCTGACAGAAGCTATCACTGCTGCATCACTGCCCAAAACAAGTGGACATTACGATGATGATGACGACATT +CCCTTTCCAGGACCCATCAATGATGACGACAATCCTGGCCATCAAGATGATGATCCGACTGACTCACAGG +ATACGACCATTCCCGATGTGGTAGTTGATCCCGATGATGGAGGCTACGGCGAATACCAAAGTTACTCGGA +AAACGGCATGAGTGCACCAGATGACTTGGTCCTATTCGATCTAGACGAGGACGACGAGGACACCAAGCCA +GTGCCTAACAGATCGACCAAGGGTGGACAACAGAAAAACAGTCAAAAGGGCCAGCATACAGAGGGCAGAC +AGACACAATCCACGCCAACTCAAAACGTCACAGGCCCTCGCAGAACAATCCACCATGCCAGTGCTCCACT +CACGGACAATGACAGAAGAAACGAACCCTCCGGCTCAACCAGCCCTCGCATGCTGACCCCAATCAACGAA +GAGGCAGACCCACTGGACGATGCCGACGACGAGACGTCTAGCCTTCCGCCCTTAGAGTCAGATGATGAAG +AACAGGACAGGGACGGAACTTCTAACCGCACACCCACTGTCGCCCCACCGGCTCCCGTATACAGAGATCA +CTCCGAAAAGAAAGAACTCCCGCAAGATGAACAACAAGATCAGGACCACATTCAAGAGGCCAGGAACCAA +GACAGTGACAACACCCAGCCAGAACATTCTTTTGAGGAGATGTATCGCCACATTCTAAGATCACAGGGGC +CATTTGATGCCGTTTTGTATTATCATATGATGAAGGATGAGCCTGTAGTTTTCAGTACCAGTGATGGTAA +AGAGTACACGTATCCGGACTCCCTTGAAGAGGAATATCCACCATGGCTCACTGAAAAAGAGGCCATGAAT +GATGAGAATAGATTTGTTACACTGGATGGTCAACAATTTTATTGGCCAGTAATGAATCACAGGAATAAAT +TCATGGCAATCCTGCAACATCATCAGTGAATGAGCATGTAATAATGGGATGATTTAATCGACAAATAGCT +AACATTAAATAGTCAAGGAACGCAAACAGGAAGAATTTTTGATGTCTAAGGTGTGAATTATTATCACAAT +AAAAGTGATTCTTAGTTTTGAATTTAAAGCTAGCTTATTATTACTAGCCGTTTTTCAAAGTTCAATTTGA +GTCTTAATGCAAATAAGCGTTAAGCCACAGTTATAGCCATAATGGTAACTCAATATCTTAGCCAGCGATT +TATCTAAATTAAATTACATTATGCTTTTATAACTTACCTACTAGCCTGCCCAACATTTACACGATCGTTT +TATAATTAAGAAAAAACTAATGATGAAGATTAAAACCTTCATCATCCTTACGTCAATTGAATTCTCTAGC +ACTAGAAGCTTATTGTCTTCAATGTAAAAGAAAAGCTGGCCTAACAAGATGACAACTAGAACAAAGGGCA +GGGGCCATACTGTGGCCACGACTCAAAACGACAGAATGCCAGGCCCTGAGCTTTCGGGCTGGATCTCTGA +GCAGCTAATGACCGGAAGGATTCCTGTAAACGACATCTTCTGTGATATTGAGAACAATCCAGGATTATGC +TACGCATCCCAAATGCAACAAACGAAGCCAAACCCGAAGATGCGCAACAGTCAAACCCAAACGGACCCAA +TTTGCAATCATAGTTTTGAGGAGGTAGTACAAACATTGGCTTCATTGGCTACTGTTGTGCAACAACAAAC +CATCGCATCAGAATCATTAGAACAACGCATTACGAGTCTTGAGAATGGTCTAAAGCCAGTTTATGATATG +GCAAAAACAATCTCCTCATTGAACAGGGTTTGTGCTGAGATGGTTGCAAAATATGATCTTCTGGTGATGA +CAACCGGTCGGGCAACAGCAACCGCTGCGGCAACTGAGGCTTATTGGGCTGAACATGGTCAACCACCACC +TGGACCATCACTTTATGAAGAAAGTGCGATTCGGGGTAAGATTGAATCTAGAGATGAGACTGTCCCTCAA +AGTGTTAGGGAGGCATTCAACAATCTAGACAGTACCACTTCACTAACTGAGGAAAATTTTGGGAAACCTG +ACATTTCGGCAAAGGATTTGAGAAACATTATGTATGATCACTTGCCTGGTTTTGGAACTGCTTTCCACCA +ATTAGTACAAGTGATTTGTAAATTGGGAAAAGATAGCAATTCATTGGACATTATTCATGCTGAGTTCCAG +GCCAGCCTGGCTGAAGGAGACTCCCCTCAATGTGCCCTAATTCAAATTACAAAAAGAGTTCCAATCTTCC +AAGATGCTGCTCCACCTGTCATCCACATCCGCTCTCGAGGTGACATTCCCCGAGCTTGCCAGAAGAGCTT +GCGTCCAGTCCCACCATCACCCAAGATTGATCGAGGTTGGGTATGTGTTTTTCAGCTTCAAGATGGTAAA +ACACTTGGACTCAAAATTTGAGCCAATCTCTTTTCCCTCCGAAAGAGGCAACTAATAGCAGAGGCTTCAA +CTGCTGAACTATAGGGTATGTTACATTAATGATACACTTGTGAGTATCAGCCCTAGATAATATAAGTCAA +TTAAACAACCAAGATAAAATTGTTCATATCCCGCTAGCAGCTTTAAAGATAAATGTAATAGGAGCTATAC +CTCTGACAGTATTATAATTAATTGTTATTAAGTAACCCAAACCAAAAATGATGAAGATTAAGAAAAACCT +ACCTCGACTGAGAGAGTGTTTTTTCATTAACCTTCATCTTGTAAACGTTGAGCAAAATTGTTAAAAATAT +GAGGCGGGTTATATTGCCTACTGCTCCTCCTGAATATATGGAGGCCATATACCCTGCCAGGTCAAATTCA +ACAATTGCTAGGGGTGGCAACAGCAATACAGGCTTCCTGACACCGGAGTCAGTCAATGGAGACACTCCAT +CGAATCCACTCAGGCCAATTGCTGATGACACCATCGACCATGCCAGCCACACACCAGGCAGTGTGTCATC +AGCATTCATCCTCGAAGCTATGGTGAATGTCATATCGGGCCCCAAAGTGCTAATGAAGCAAATTCCAATT +TGGCTTCCTCTAGGTGTCGCTGATCAAAAGACCTACAGCTTTGACTCAACTACGGCCGCCATCATGCTTG +CTTCATATACTATCACCCATTTCGGCAAGGCAACCAATCCGCTTGTCAGAGTCAATCGGCTGGGTCCTGG +AATCCCGGATCACCCCCTCAGGCTCCTGCGAATTGGAAACCAGGCTTTCCTCCAGGAGTTCGTTCTTCCA +CCAGTCCAACTACCCCAGTATTTCACCTTTGATTTGACAGCACTCAAACTGATCACTCAACCACTGCCTG +CTGCAACATGGACCGATGACACTCCAACTGGATCAAATGGAGCGTTGCGTCCAGGAATTTCATTTCATCC +AAAACTTCGCCCCATTCTTTTACCCAACAAAAGTGGGAAGAAGGGGAACAGTGCCGATCTAACATCTCCG +GAGAAAATCCAAGCAATAATGACTTCACTCCAGGACTTTAAGATCGTTCCAATTGATCCAACCAAAAATA +TCATGGGTATCGAAGTGCCAGAAACTCTGGTCCACAAGCTGACCGGTAAGAAGGTGACTTCCAAAAATGG +ACAACCAATCATCCCTGTTCTTTTGCCAAAGTACATTGGGTTGGACCCGGTGGCTCCAGGAGACCTCACC +ATGGTAATCACACAGGATTGTGACACGTGTCATTCTCCTGCAAGTCTTCCAGCTGTGGTTGAGAAGTAAT +TGCAATAATTGACTCAGATCCAGTTTTACAGAATCTTCTCAGGGATAGTGATAACATCTTTTTAATAATC +CGTCTACTAGAAGAGATACTTCTAATTGATCAATATACTAAAGGTGCTTTACACCATTGTCTCTTTTCTC +TCCTAAATGTAGAGCTTAACAAAAGACTCATAATATACCTGTTTTTAAAAGATTGATTGATGAAAGATCA +TGACTAATAACATTACAAACAATCCTACTATAATCAATACGGTGATTCAAATGTCAATCTTTCTCATTGC +ACATACTCTTTGTCCTTATCCTCAAATTGCCTACATGCTTACATCTGAGGACAGCCAGTGTGACTTGGAT +TGGAGATGTGGAGGAAAAATCGGGGCCCATTTCTAAGTTGTTCACAATCTAAGTACAGACATTGCTCTTC +TAATTAAGAAAAAATCGGCGATGAAGATTAAGCCGACAGTGAGCGTAATCTTCATCTCTCTTAGATTATT +TGTCTTCCAGAGTAGGGGTCATCAGGTCCTTTTCAATTGGATAACCAAAATAAGCTTCACTAGAAGGATA +TTGTGAGGCGACAACACAATGGGTGTTACAGGAATATTGCAGTTACCTCGTGATCGATTCAAGAGGACAT +CATTCTTTCTTTGGGTAATTATCCTTTTCCAAAGAACATTTTCCATCCCGCTTGGAGTTATCCACAATAG +TACATTACAGGTTAGTGATGTCGACAAACTAGTTTGTCGTGACAAACTGTCATCCACAAATCAATTGAGA +TCAGTTGGACTGAATCTCGAGGGGAATGGAGTGGCAACTGACGTGCCATCTGCGACTAAAAGATGGGGCT +TCAGGTCCGGTGTCCCACCAAAGGTGGTCAATTATGAAGCTGGTGAATGGGCTGAAAACTGCTACAATCT +TGAAATCAAAAAACCTGACGGGAGTGAGTGTCTACCAGCAGCGCCAGACGGGATTCGGGGCTTCCCCCGG +TGCCGGTATGTGCACAAAGTATCAGGAACGGGACCATGTGCCGGAGACTTTGCCTTCCACAAAGAGGGTG +CTTTCTTCCTGTATGATCGACTTGCTTCCACAGTTATCTACCGAGGAACGACTTTCGCTGAAGGTGTCGT +TGCATTTCTGATACTGCCCCAAGCTAAGAAGGACTTCTTCAGCTCACACCCCTTGAGAGAGCCGGTCAAT +GCAACGGAGGACCCGTCGAGTGGCTATTATTCTACCACAATTAGATATCAGGCTACCGGTTTTGGAACTA +ATGAGACAGAGTACTTGTTCGAGGTTGACAATTTGACCTACGTCCAACTTGAATCAAGATTCACACCACA +GTTTCTGCTCCAGCTGAATGAGACAATATATGCAAGTGGGAAGAGGAGCAACACCACGGGAAAACTAATT +TGGAAGGTCAACCCCGAAATTGATACAACAATCGGGGAGTGGGCCTTCTGGGAAACTAAAAAAACCTCAC +TAGAAAAATTCGCAGTGAAGAGTTGTCTTTCACAGCTGTATCAAACGGACCCAAAAACATCAGTGGTCAG +AGTCCGGCGCGAACTTCTTCCGACCCAGAGACCAACACAACAAATGAAGACCACAAAATCATGGCTTCAG +AAAATTCCTCTGCAATGGTTCAAGTGCACAGTCAAGGAAGGAAAGCTGCAGTGTCGCATCTGACAACCCT +TGCCACAATCTCCACGAGTCCTCAACCTCCCACAACCAAAACAGGTCCGGACAACAGCACCCATAATACA +CCCGTGTATAAACTTGACATCTCTGAGGCAACTCAAGTTGGACAACATCACCGTAGAGCAGACAACGACA +GCACAGCCTCCGACACTCCCCCCGCCACGACCGCAGCCGGACCCTTAAAAGCAGAGAACACCAACACGAG +TAAGAGCGCTGACTCCCTGGACCTCGCCACCACGACAAGCCCCCAAAACTACAGCGAGACTGCTGGCAAC +AACAACACTCATCACCAAGATACCGGAGAAGAGAGTGCCAGCAGCGGGAAGCTAGGCTTAATTACCAATA +CTATTGCTGGAGTAGCAGGACTGATCACAGGCGGGAGAAGGACTCGAAGAGAAGTAATTGTCAATGCTCA +ACCCAAATGCAACCCCAATTTACATTACTGGACTACTCAGGATGAAGGTGCTGCAATCGGATTGGCCTGG +ATACCATATTTCGGGCCAGCAGCCGAAGGAATTTACACAGAGGGGCTAATGCACAACCAAGATGGTTTAA +TCTGTGGGTTGAGGCAGCTGGCCAACGAAACGACTCAAGCTCTCCAACTGTTCCTGAGAGCCACAACTGA +GCTGCGAACCTTTTCAATCCTCAACCGTAAGGCAATTGACTTCCTGCTGCAGCGATGGGGTGGCACATGC +CACATTTTGGGACCGGACTGCTGTATCGAACCACATGATTGGACCAAGAACATAACAGACAAAATTGATC +AGATTATTCATGATTTTGTTGATAAAACCCTTCCGGACCAGGGGGACAATGACAATTGGTGGACAGGATG +GAGACAATGGATACCGGCAGGTATTGGAGTTACAGGTGTTATAATTGCAGTTATCGCTTTATTCTGTATA +TGCAAATTTGTCTTTTAGTCTTTCTTCAGATTGTTTCACGGCAAAACTCAACCTCAAATCAATGAAACTA +GGATTTAATTATATGAATCACTTGAATCTAAGATTACTTGACAAATGATAACATAATACACTGGAGCTTC +AAACATAGCCAATGTGATTCTAACTCCTTTAAACTCACAGTTAATCATAAACAAGGTTTGACATCAATCT +AGCTATATCTTTAAGAATGATAAACTTGATGAAGATTAAGAAAAAGGTAATCTTTCGATTATCTTTAGTC +TTCATCCTTGATTCTACAATCATGACAGTTGTCTTTAATGAAAAAGGAAAAAAGCCTTTTTATTAAGTTG +TAATAATCAGATCTGCAAACCGGTAGAATTTAGTTGTAACCTAACACACACAAAGCATTGGTAAAAAAGT +CAATAGAAATTTAAACAGTGAGTGCAGACAACTCTTAAATGGAAGCTTCATATGAGAGAGGACGCCCCCG +AGCTGCCAGACAGCATTCAAGGGATGGACACGACCACCATGTTCGAGCACGATCATCATCCAGAGAGAAT +TATCGAGGTGAGTACCGTCAATCAAGGAGCGCCTCACAAGTGCGCGTTCCTACTGTATTTCATAAGAAGA +GAGTTGAACCATTAACAGTTCCTCCAGCACCTAAAGACATATGTCCGACCTTGAAAAAAGGATTTTTGTG +TGACAGTAGTTTTTGCAAAAAAGACCACCAGTTAGAAAGTTTAACTGATAGGGAATTACTCCTACTAATC +GCCCGTAAGACTTGTGGATCAGTAGAACAACAATTAAATATAACTGCACCCAAGGACTCGCGCTTAGCAA +ATCCAACGGCTGATGATTTCCAGCAAGAGGAAGGTCCAAAAATTACCTTGTTGACACTGATCAAGACGGC +AGAACACTGGGCGAGACAAGACATCCGAACCATAGAGGATTCCAAATTAAGGGCATTGTTAACTCTATGT +GCTGTGATGACGAGGAAATTCTCAAAATCCCAGCTGAGTCTTTTGTGTGAGACACACCTAAGGCGCGAAG +GGCTTGGGCAAGATCAGGCAGAACCCGTTCTCGAAGTATATCAACGATTACACAGTGATAAAGGAGGCAG +TTTTGAAGCTGCACTATGGCAACAATGGGACCGACAATCCCTAATTATGTTTATCACTGCATTCTTGAAT +ATCGCTCTCCAGTTACCGTGTGAAAGTTCTGCTGTCGTTGTTTCAGGGTTAAGAACATTGGTTCCTCAAT +CAGATAATGAGGAAGCTTCAACCAACCCGGGGACATGCTCATGGTCTGATGAGGGTACCCCTTAATAAGG +CTGACTAAAACACTATATAACCTTCTACTTGATCACAATACTCCGTATACCTATCATCATATATTTAATC +AAGACGATATCCTTTAAAACTTATTCAGTACTATAATCACTCTCATTTCAAATTGATAAGATATGCATAA +TTGCCTTAATATATAAAGAGGTATGATATAACCCAAACATTGACCAAAGAAAATCATAATCTCGTATCGC +TCGCAATATAACCTGCCAAGCATACCTCTTGCACAAAGTGATTCTTGTACACAAATAATGTTTGACTCTA +CAGGAGGTAGCAACGATCCATCTCATCAAAAAATAAGTATTTTATGATTTACTAATGATCTCTTAAAATA +TTAAGAAAAACTGACGGAACATAAATTCTTTCTGCTTCAAGTTGTGGAGGAGGTCTATGGTATTCGCTAT +TGTTATATTACAATCAATAACAAGCTTGTAAAAATATTGTTCTTGTTTCAGGAGGTATATTGTGACCGGA +AAAGCTAAACTAATGATGAAGATTAATGCGGAGGTCTGATGAGAATAAACCTTATTATTCAGATTAGGCC +CCAAGAGGCATTCTTCATCTCCTTTTAGCAAAATACTATTTCAGGATAGTCCAGCTAGTGACACGTCTTT +TAGCTGTATACCAGTTGCCCCTGAGATACGCCACAAAAGTGTCTCTGAGCTAAAGTGGTCTGTACACATC +TCATACATTGTATTAGGGGCAATAATATCTAATTGAACTTAGCCATTTAAAATTTAGTGCATAAATCTGG +GCTAACTCCACCAGGTCAACTCCATTGGCTGAAAAGAAGCCCACCTACAACGAACATTACTTTGAGCGCC +CTCACAATTAAAAAATAAGAGCGTCGTTCCAACAATCGAGCGCAAGGTTACAAGGTTGAACTGAGAGTGT +CTAGACAACAAAATATCGATACTCCAGACACCAAGCAAGACCTGAGAAAAAACCATGGCCAAAGCTACGG +GACGATACAATCTAATATCGCCCAAAAAGGACCTGGAGAAAGGGGTTGTCTTAAGCGACCTCTGTAACTT +CTTAGTTAGTCAAACTATTCAAGGGTGGAAAGTTTATTGGGCTGGTATTGAGTTTGATGTGACTCACAAA +GGAATGGCCCTATTGCATAGACTGAAAACTAATGACTTTGCCCCTGCATGGTCAATGACAAGGAACCTAT +TTCCCCATTTATTTCAAAATCCGAATTCCACTATTGAATCACCGCTGTGGGCACTGAGAGTCATCCTTGC +AGCAGGGATACAGGACCAGTTAATTGACCAGTCTTTGATTGAACCCTTAGCAGGAGCCCTTGGTCTGATC +TCTGATTGGCTGCTAACAACCAACACTAACCATTTCAACATGCGAACACAACGTGTCAAGGAACAATTGA +GCCTAAAAATGCTGTCGTTGATTCGATCCAATATTCTCAAGTTTATTAACAAATTGGATGCTCTACATGT +CGTGAACTACAATGGATTATTGAGCAGTATTGAAATTGGAACTCAAAATCATACAATCATCATAACTCGA +ACTAACATGGGTTTTCTGGTGGAGCTCCAAGAACCCGACAAATCGGCAATGAACCGCAAGAAGCCTGGGC +CGGCGAAATTTTCCCTCCTTCATGAGTCCACACTGAAAGCATTTACACAAGGGTCCTCGACACGAATGCA +AAGTTTAATTCTTGAATTCAATAGCTCTCTTGCTATCTAACTAAGATGGAATACTTCATATTGGGCTAAC +TCATATATGCTGACTCAATAGTTAACTTGACATCTCTGCCTTCATAATCAGATATATAAGCATAATAAAT +AAATACTCATATTTCTTGATAATTTGTTTAACCACAGATAAATCCTCACTGTAAGCCAGCTTCCAAGTTG +ACACCCTTACAAAAACCAGGACTCAGAATCCCTCAAATAAGAGATTCCAAGACAACATCATAGAATTGCT +TTATTATATTAATAAGCATTTTATCACTAGAAATCCAATATACGAAATGGTTAATTGTAACTAAACCCGC +AGGTCATGTGTGTTAGGTTTCACAAATTATATATATTACTAACTCCATACTCGTAACTAACATTAGATAA +GTAGGTTAAGAAAAAAGCTTGAGGAAGATTAAGAAAAACTGCTTATTGGGTCTTTCCGTGTTTTAGATGA +AGCAGTTGACATTCTTCCTCTTGATATTAAATGGCTACACAACATACCCAATACCCAGACGCCAGGTTAT +CATCACCAATTGTATTGGACCAATGTGACCTTGTCACTAGAGCTTGCGGGTTGTATTCATCATACTCCCT +TAATCCGCAACTACGCAACTGTAAACTCCCGAAACATATATACCGTTTAAAATATGATGTAACTGTTACC +AAGTTCTTAAGTGATGTACCAGTGGCGACATTGCCCATAGATTTCATAGTCCCAATTCTTCTCAAGGCAC +TATCAGGCAATGGGTTCTGTCCTGTTGAGCCGCGGTGCCAACAGTTCTTAGATGAAATTATTAAGTACAC +AATGCAAGATGCTCTCTTCCTGAAATATTATCTCAAAAATGTGGGTGCTCAAGAAGACTGTGTTGATGAC +CACTTTCAAGAAAAAATCTTATCTTCAATTCAGGGCAATGAATTTTTACATCAAATGTTTTTCTGGTATG +ACCTGGCTATTTTAACTCGAAGGGGTAGATTAAATCGAGGAAACTCTAGATCAACGTGGTTTGTTCATGA +TGATTTAATAGACATCTTAGGCTATGGGGACTATGTTTTTTGGAAGATCCCAATTTCACTGTTACCACTG +AACACACAAGGAATCCCCCATGCTGCTATGGATTGGTATCAGACATCAGTATTCAAAGAAGCGGTTCAAG +GGCATACACACATTGTTTCTGTTTCTACTGCCGATGTCTTGATAATGTGCAAAGATTTAATTACATGTCG +ATTCAACACAACTCTAATCTCAAAAATAGCAGAGGTTGAGGACCCAGTTTGCTCTGATTATCCCAATTTT +AAGATTGTGTCTATGCTTTACCAGAGCGGAGATTACTTACTCTCCATATTAGGGTCTGATGGGTATAAAA +TCATTAAGTTTCTCGAACCATTGTGCTTGGCTAAAATTCAATTGTGCTCAAAGTACACCGAGAGGAAGGG +CCGATTCTTAACACAAATGCATTTAGCTGTAAATCACACCCTGGAAGAAATTACAGAAATACGTGCACTA +AAGCCTTCACAGGCTCACAAGATCCGTGAATTCCATAGAACATTGATAAGGCTGGAGATGACGCCACAAC +AACTTTGTGAGCTATTTTCCATACAAAAACACTGGGGGCATCCTGTGCTACATAGTGAAACAGCAATCCA +AAAAGTTAAAAAACATGCTACGGTGCTAAAAGCATTACGCCCTATCGTGATTTTCGAGACATATTGTGTT +TTTAAATATAGCATTGCAAAACATTATTTTGATAGTCAAGGATCTTGGTACAGTGTTACCTCAGATAGAA +ATCTAACACCAGGTCTTAATTCTTATATCAAAAGAAATCAATTCCCTCCGTTGCCAATGATTAAAGAACT +GCTATGGGAATTTTACCACCTTGACCATCCTCCACTTTTCTCAACCAAAATTATTAGTGACTTAAGTATT +TTTATAAAAGACAGAGCTACTGCAGTAGAAAGGACATGCTGGGATGCAGTATTCGAGCCTAATGTTCTGG +GATATAATCCACCTCACAAATTCAGTACCAAACGTGTACCGGAACAATTTTTAGAGCAAGAAAACTTTTC +TATTGAGAATGTTCTTTCCTACGCGCAAAAACTCGAGTATCTACTACCACAATATCGGAATTTTTCTTTC +TCATTGAAAGAGAAAGAGTTGAATGTAGGTAGAACTTTCGGAAAATTGCCTTATCCGACTCGCAATGTTC +AAACACTTTGTGAAGCTCTGTTAGCTGATGGTCTTGCTAAAGCATTTCCTAGCAATATGATGGTAGTTAC +GGAACGTGAACAAAAAGAAAGCTTATTGCATCAAGCATCATGGCACCACACAAGTGATGATTTCGGTGAG +CATGCCACAGTTAGAGGGAGTAGCTTTGTAACTGATTTAGAGAAATACAATCTTGCATTTAGGTATGAGT +TTACAGCACCTTTTATAGAATATTGCAACCGTTGCTATGGTGTTAAGAATGTTTTTAATTGGATGCATTA +TACAATCCCACAGTGTTATATGCATGTCAGTGATTATTATAATCCACCGCATAACCTCACACTGGAAAAT +CGAAACAACCCCCCTGAAGGGCCTAGTTCATACAGGGGTCATATGGGAGGGATTGAAGGACTGCAACAAA +AACTCTGGACAAGTATTTCATGTGCTCAAATTTCTTTAGTTGAAATTAAGACTGGTTTTAAGTTGCGCTC +AGCTGTGATGGGTGACAATCAGTGCATTACCGTTTTATCAGTCTTCCCCTTAGAGACTGATGCAGGCGAG +CAGGAACAGAGCGCCGAGGACAATGCAGCGAGGGTGGCCGCCAGCCTAGCAAAAGTTACAAGTGCCTGTG +GAATCTTTTTAAAACCTGATGAAACATTTGTACATTCAGGTTTTATCTATTTTGGAAAAAAACAATATTT +GAATGGGGTCCAATTGCCTCAGTCCCTTAAAACGGCTACAAGAATGGCACCATTGTCTGATGCAATTTTT +GATGATCTTCAAGGGACCCTGGCTAGTATAGGTACTGCTTTTGAGCGATCCATCTCTGAGACACGACATA +TCTTTCCTTGCAGAATAACCGCAGCTTTCCATACGTTCTTTTCGGTGAGAATCTTGCAATATCATCACCT +CGGATTTAATAAAGGTTTTGACCTTGGACAGTTAACACTCGGCAAACCTCTGGATTTCGGAACAATATCA +TTGGCACTAGCGGTACCGCAGGTGCTTGGAGGGTTATCCTTCTTGAATCCTGAGAAATGTTTCTACCGGA +ATCTAGGAGATCCAGTTACCTCAGGTTTATTCCAGTTAAAAACTTATCTCCGAATGATTGAGATGGATGA +TTTATTCTTACCTTTAATTGCGAAGAACCCTGGGAACTGCACTGCCATTGACTTTGTGCTAAATCCTAGC +GGATTAAATGTTCCTGGGTCGCAAGACTTAACTTCATTTCTGCGCCAGATTGTACGTAGGACTATCACCC +TAAGTGCGAAAAACAAACTTATTAATACCTTATTTCATGCATCAGCTGACTTCGAAGACGAAATGGTTTG +TAAGTGGCTCTTATCATCAACTCCTGTTATGAGTCGTTTCGCAGCCGATATATTTTCACGCACGCCGAGC +GGGAAGCGATTGCAAATTCTAGGATACTTGGAAGGAACACGCACATTATTAGCCTCTAAGATCATCAACA +ATAATACAGAGACGCCGGTTTTGGACAGACTGAGGAAGATAACATTGCAAAGGTGGAGTCTATGGTTTAG +TTATCTTGATCATTGTGATAATATCCTGGCGGAGGCTTTAACCCAAATAACTTGCACAGTTGATTTAGCA +CAGATCCTGAGGGAATATTCATGGGCACATATTTTAGAGGGGAGACCTCTTATTGGAGCCACACTCCCAT +GTATGATTGAGCAATTCAAAGTGGTTTGGCTGAAACCCTACGAACAATGTCCGCAGTGTTCAAATGCCAA +GCAACCTGGTGGGAAACCATTCGTGTCAGTAGCAGTCAAGAAACATATTGTTAGTGCATGGCCAAATGCA +TCCCGAATAAGCTGGACTATCGGGGATGGAATCCCATACATTGGATCAAGGACAGAAGATAAGATAGGGC +AACCTGCTATTAAACCAAAATGTCCTTCCGCAGCCTTAAGAGAGGCCATTGAATTGGCGTCCCGTTTAAC +ATGGGTAACTCAAGGCAGTTCGAACAGTGACTTGCTAATAAAACCATTTTTGGAAGCACGAGTAAATTTA +AGTGTTCAAGAAATACTTCAAATGACCCCTTCACATTACTCGGGAAATATTGTTCATAGGTACAACGATC +AATACAGTCCTCATTCTTTCATGGCCAATCGTATGAGTAACTCAGCAACGCGATTGATTGTTTCTACAAA +CACTTTAGGTGAGTTTTCAGGAGGTGGCCAATCGGCACGCGACAGCAATATTATTTTCCAGAATGTTATA +AATTATGCAGTTGCACTGTTCGATATTAAATTTAGAAACACTGAGGCTACAGATATCCAGTATAATCGTG +CTCACCTTCATCTAACTAAGTGTTGCACCCGGGAGGTACCAGCTCAGTACTTAACATACACATCTACATT +GGATTTAGATTTAACAAGATACCGAGAAAATGAATTGATTTATGACAATAATCCTCTAAAAGGAGGACTC +AATTGCAATATCTCATTTGATAACCCATTTTTCCAAGGCAAACAGCTGAACATTATAGAAGATGACCTTA +TTCGACTGCCTCACTTATCTGGATGGGAGCTAGCTAAGACCATCATGCAATCAATTATTTCAGATAGCAA +TAATTCGTCTACAGACCCAATTAGCAGTGGAGAAACAAGATCATTCACTACCCATTTCTTAACTTATCCC +AAGATAGGACTTCTGTACAGTTTTGGGGCCTTTGTAAGTTATTATCTTGGCAATACAATTCTTCGGACTA +AGAAATTAACACTTGACAATTTTTTATATTACTTAACTACCCAAATTCATAATCTACCACATCGCTCATT +GCGAATACTTAAGCCAACATTCAAACATGCAAGCGTTATGTCACGATTAATGAGTATTGATCCCCATTTT +TCTATTTACATAGGCGGTGCTGCAGGTGACAGAGGACTCTCAGATGCGGCCAGGTTATTTTTGAGAACGT +CCATTTCATCTTTTCTTACATTTGTAAAGGAATGGATAATTAATCGCGGAACAATTGTCCCTTTATGGAT +AGTATATCCATTAGAGGGTCAAAATCCAACACCTGTTAATAATTTCCTCCATCAGATCGTAGAACTGCTG +GTGCATGATTCATCAAGACACCAGGCTTTTAAAACTACCATAAATGATCATGTACATCCTCACGACAATC +TTGTTTACACATGTAAGAGTACAGCCAGCAATTTCTTCCATGCGTCATTGGCGTACTGGAGGAGCAGGCA +CAGAAACAGCAACCGAAAAGACTTGACAAGAAACTCTTCAACTGGATCAAGCACAAACAACAGTGATGGT +CATATTAAGAGAAGTCAAGAACAAACCACCAGAGATCCACATGATGGCACTGAACGGAGTCTAGTCCTGC +AAATGAGCCATGAAATAAAAAGAACGACAATTCCACAAGAGAACACGCACCAGGGTCCGTCGTTCCAGTC +ATTTCTAAGTGACTCTGCTTGCGGTACAGCAAACCCAAAACTAAATTTCGATAGATCGAGACACAATGTG +AAATCTCAGGATCATAACTCAGCATCCAAGAGGGAAGGTCATCAAATAATCTCACATCGTCTAGTCCTAC +CTTTCTTTACATTATCTCAAGGGACACGCCAATTAACGTCATCCAATGAGTCACAAACCCAAGATGAGAT +ATCAAAGTACTTACGGCAATTGAGATCCGTCATTGATACCACAGTTTATTGTAGGTTTACCGGTATAGTC +TCGTCCATGCATTACAAACTTGATGAGGTCCTTTGGGAAATAGAGAATTTTAAGTCGGCTGTGACGCTGG +CAGAGGGAGAAGGTGCTGGTGCCTTACTATTGATTCAGAAATACCAAGTTAAGACCTTATTTTTCAACAC +GCTAGCTACTGAGTCCAGTATAGAGTCAGAAATAGTATCAGGAATGACTACTCCTAGGATGCTTCTACCT +GTTATGTCAAAATTCCATAATGACCAAATTGAGATTATTCTTAACAACTCAGCAAGCCAAATAACAGACA +TAACAAATCCTACTTGGTTTAAAGACCAAAGAGCAAGGCTACCTAGGCAAGTCGAGGTTATAACCATGGA +TGCAGAGACGACAGAGAATATAAACAGATCGAAATTGTACGAAGCTGTACATAAATTGATCTTACACCAT +GTTGATCCCAGCGTATTGAAAGCAGTGGTCCTTAAAGTCTTTCTAAGTGATACCGAGGGTATGTTATGGC +TAAATGATAATCTAGCCCCGTTTTTTGCCACTGGGTATTTAATTAAGCCAATAACGTCAAGTGCCAGGTC +TAGTGAGTGGTATCTTTGTCTGACGAACTTCTTATCAACTACACGTAAGATGCCACACCAAAACCATCTC +AGTTGTAAGCAGGTAATACTTACGGCATTGCAACTGCAAATTCAACGGAGCCCATACTGGCTAAGTCATT +TAACTCAGTATGCTGACTGCGATTTACATTTAAGCTATATCCGCCTTGGTTTTCCATCATTAGAGAAAGT +ACTATACCACAGGTATAACCTTGTCGATTCAAAAAGAGGTCCACTAGTCTCTGTCACTCAGCACTTAGCA +CATCTTAGGGCAGAGATTCGAGAATTGACCAATGATTATAATCAACAGCGACAAAGTCGGACTCAAACAT +ATCACTTTATTCGTACTGCAAAAGGACGAATCACAAAACTAGTCAATGATTATTTAAAATTCTTTCTTAT +TGTACAAGCATTAAAACATAATGGGACATGGCAAGCTGAGTTTAAGAAATTACCAGAGTTGATTAGTGTG +TGCAATAGGTTCTATCATATTAGAGATTGTAATTGTGAAGAACGTTTCTTAGTTCAAACCTTATATTTAC +ATAGAATGCAGGATTCTGAAGTTAAGCTTATCGAAAGGCTGACAGGGCTTCTGAGTTTATTTCCAGATGG +TCTCTACAGGTTCGATTGAATAACCGTGCATAGTATTTTGATACTTGTAAAGGTTGGTTATCAACATACA +GATTATAAAAAACTCATAAATTGCTCTCATACATCATCTTGATCTGATTTCAATAAATAACTATTTAGAT +AACGAAAGGAGTCCTTACATTATACACTATATTTGGCCTCTCTCCCTGCGTGATAATCAAAAAATTCACA +ATACAGCATGTGTGACATATTACTGCTGCAATGAGTCTAACGCAACATAATAAACTCCGCACTCTTTATA +ATTAAGCTTTAACGATAGGTCTGGGCTCATATTGTTATTGATATAGTAATGTTGTATCAATATCTTGCCA +GATGGAATAGTGCTTTGGTTGATAACACGACTTCTTAAAACAAAACTGATCTTTAAGATTAAGTTTTTTA +TAATTGTCATTGCTTTAATTTGTCGATTTAAAAATGGTGATAGCCTTAATCTTTGTGTAAAATAAGAGAT +TAGGTGTAATAACTTTAACATTTTTGTCTAGTAAGCTACTATTCCATTCAGAATGATAAAATTAAAAGAA +AAGACATGACTGTAAAATCAGAAATACCTTCTTTACAATATAGCAGACTAGATAATAATCTTCGTGTTAA +TGATAATTAAGGCATTGACCACGCTCATCAGAAGGCTCACTAGAATAAACGTTGCAAAAAGGATCCCTGG +AAAAATGGTCGCACACAAAAATTTAAAAATAAATCTATTTCTTCTTTTTTGTGTGTCCA + diff --git a/test/unit/test_assembly.py b/test/unit/test_assembly.py index a32e0af5e..71be96d51 100644 --- a/test/unit/test_assembly.py +++ b/test/unit/test_assembly.py @@ -401,6 +401,23 @@ def get_seqs(fasta): return [str(s.seq) for s in Bio.SeqIO.parse(fasta, 'fasta')] self.assertEqual(get_seqs(outFasta), get_seqs(expected)) + def test_ambig_align_ebov(self): + inDir = util.file.get_test_input_path(self) + contigs_gz = os.path.join(inDir, 'contigs.ebov.ambig.fasta.gz') + contigs = util.file.mkstempfname('.fasta') + with util.file.open_or_gzopen(contigs_gz, 'rb') as f_in: + with open(contigs, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + expected = os.path.join(inDir, 'expected.ebov.ambig.fasta') + outFasta = util.file.mkstempfname('.fasta') + assembly.order_and_orient( + contigs, + os.path.join(inDir, 'ref.ebov.makona_C15.fasta'), + outFasta) + def get_seqs(fasta): + return [str(s.seq) for s in Bio.SeqIO.parse(fasta, 'fasta')] + self.assertEqual(get_seqs(outFasta), get_seqs(expected)) + def test_obscure_mummer3_bug(self): inDir = util.file.get_test_input_path(self) outFasta = util.file.mkstempfname('.fasta') diff --git a/tools/mummer.py b/tools/mummer.py index 4129f4bdc..3ac678f92 100644 --- a/tools/mummer.py +++ b/tools/mummer.py @@ -209,7 +209,8 @@ def scaffold_contigs_custom(self, refFasta, contigsFasta, outFasta, aligner='nucmer', extend=None, breaklen=None, maxgap=None, minmatch=None, mincluster=None, min_contig_coverage_diff=0.0, - min_pct_id=0.6, min_pct_contig_aligned=None, min_contig_len=200): + min_pct_id=0.6, min_pct_contig_aligned=None, min_contig_len=200, + ambig_max_aligns=2, ambig_max_lens=1, ambig_max_frac=.01): ''' Re-implement a less buggy version of MUMmer's pseudomolecule feature to scaffold contigs onto a reference genome. ''' @@ -282,17 +283,35 @@ def scaffold_contigs_custom(self, refFasta, contigsFasta, outFasta, # (# assembled segments) continue + def n_diff_vals(*vals): return len(set(vals)) + def n_diff_lens(seqs): return n_diff_vals(*map(len, seqs)) + def frac_unambig(seqs): + """Given a list of seqs of the same length, return the fraction of positions on which they all agree""" + util.misc.chk(n_diff_lens(alt_seqs_f) == 1, 'ambig_max_lens>1 not currently supported') + n_tot = len(seqs[0]) + n_unambig = list(map(n_diff_vals, *seqs)).count(1) + return float(n_unambig) / float(n_tot or 1.0) + # construct scaffolded sequence for this chromosome seq = [] for _, left, right, n_features, features in fs.get_intervals(c): # get all proposed sequences for this specific region alt_seqs = [] - for f in features: - try: - alt_seqs.append(alnReaders[(c, f[-1][0])].retrieve_alt_by_ref(left, right, aln_start=f[1], aln_stop=f[2])) - except AmbiguousAlignmentException: - log.warn("dropping ambiguous alignment to ref seq {} at [{},{}]".format(c, f[1], f[2])) - pass + for consider_ambig_aligns in (False, True): + for f in features: + alt_seqs_f = alnReaders[(c, f[-1][0])].retrieve_alts_by_ref(left, right, aln_start=f[1], aln_stop=f[2]) + if len(alt_seqs_f) == 1: + alt_seqs.append(alt_seqs_f[0]) + elif consider_ambig_aligns: + if len(alt_seqs_f) <= ambig_max_aligns and n_diff_lens(alt_seqs_f) <= ambig_max_lens and \ + frac_unambig(alt_seqs_f) > (1.0 - ambig_max_frac): + alt_seqs.append(alt_seqs_f[0]) + log.info("using ambiguous alignment to ref seq {} at [{},{}]".format(c, f[1], f[2])) + else: + log.warning("dropping ambiguous alignment to ref seq {} at [{},{}]".format(c, f[1], f[2])) + if alt_seqs: + # if have a non-unambiguous alignment, don't consider ambiguous ones + break # pick the "right" one and glue together into a chromosome ranked_unique_seqs = contig_chooser(alt_seqs, right-left+1, "%s:%d-%d" % (c, left, right)) @@ -541,8 +560,8 @@ def get_ref_seq(self, start, stop): ''' return str(self.reference_seq.seq[start-1:stop]) - def retrieve_alt_by_ref(self, start, stop, aln_start=None, aln_stop=None): - ''' Retrieve a sub-sequence from the alternate (2nd) sequence in the + def retrieve_alts_by_ref(self, start, stop, aln_start=None, aln_stop=None): + ''' Retrieve sub-sequence(s) from the alternate (2nd) sequence in the alignment using coordinates relative to the reference sequence. No gaps will be emitted. Required: start-stop interval must be wholly contained within @@ -550,49 +569,52 @@ def retrieve_alt_by_ref(self, start, stop, aln_start=None, aln_stop=None): ''' # grab the one alignment that contains this window - aln = list(a for a in self.alignments if a[1]<=start and a[2]>=stop) + alns = list(a for a in self.alignments if a[1]<=start and a[2]>=stop) if aln_start is not None and aln_stop is not None: # if specified, restrict to a specific alignment that comes from show-tiling # (sometimes show-aligns is more promiscuous than show-tiling) - new_aln = [] - for a in aln: + new_alns = [] + for a in alns: if a[1] > aln_start or a[2] < aln_stop: log.debug("dropping undesired alignment: %s(%s):%s-%s to %s(%s):%s-%s (%s:%s-%s requested)", self.seq_ids[0], a[0], a[1], a[2], self.seq_ids[1], a[3], a[4], a[5], self.seq_ids[0], aln_start, aln_stop) else: - new_aln.append(a) - aln = new_aln - if len(aln) != 1: - log.error("invalid %s:%d-%d -> %s specified, %d alignments found that contain it", - self.seq_ids[0], start, stop, self.seq_ids[1], len(aln)) - for x in aln: - log.debug("alignment: %s", str(x[:6])) - raise AmbiguousAlignmentException() - aln = aln[0] + new_alns.append(a) + alns = new_alns + if len(alns) != 1: + log.warning("invalid %s:%d-%d -> %s specified, %d alignments found that contain it", + self.seq_ids[0], start, stop, self.seq_ids[1], len(alns)) + for aln in alns: + log.debug("alignment: %s", str(aln[:6])) + + return [self._aln_to_alt_seq(aln, start, stop) for aln in alns] + + def _aln_to_alt_seq(self, aln, start, stop): + """Given an alignment of a contig to ref, return the contig sequence aligned to a given stretch of ref""" ref_l, ref_r, ref_seq, alt_seq = (aln[1], aln[2], aln[-2], aln[-1]) # convert desired start/stop relative to this reference window # such that 0 <= start <= stop <= ref_r-ref_l+1 - start = start - ref_l - stop = stop - ref_l + aln_start = start - ref_l + aln_stop = stop - ref_l # travel down alignment until we've reached the left edge # (because of gaps, you must check each position one by one) - # end loop when ref_seq[:i_left] contains {start} bases + # end loop when ref_seq[:i_left] contains {aln_start} bases n_ref_bases = 0 i_left = 0 - while n_ref_bases < start: + while n_ref_bases < aln_start: if ref_seq[i_left] != '-': n_ref_bases += 1 i_left += 1 # travel down alignment until we've reached the right edge # (because of gaps, you must check each position one by one) - # end loop when ref_seq[:i_right] contains {stop} bases + # end loop when ref_seq[:i_right] contains {aln_stop} bases i_right = i_left - while n_ref_bases < stop: + while n_ref_bases < aln_stop: if ref_seq[i_right] != '-': n_ref_bases += 1 i_right += 1 @@ -601,6 +623,6 @@ def retrieve_alt_by_ref(self, start, stop, aln_start=None, aln_stop=None): i_right += 1 # grab the alternate sequence and strip gaps - alt_seq = alt_seq[i_left:i_right+1].replace('-','') - return alt_seq + return alt_seq[i_left:i_right+1].replace('-','') +