From a18bda07584b4331c1ed4f78773198be0fa6b42b Mon Sep 17 00:00:00 2001 From: Benjamin Lefaudeux Date: Fri, 12 Nov 2021 15:48:17 -0800 Subject: [PATCH] flipping the seeds so that it drops down from the top using less seeds tiling + vertical seeds --- .gitignore | 2 +- docs/plots/strided_sum/Strided_sum_fp16.png | Bin 0 -> 51159 bytes docs/plots/strided_sum/Strided_sum_fp32.png | Bin 0 -> 51839 bytes tests/test_triton_basics.py | 50 ++- tests/test_triton_dropout.py | 8 +- .../benchmarks/benchmark_triton_dropout.py | 4 +- .../benchmarks/benchmark_triton_stride_sum.py | 71 +++++ xformers/benchmarks/utils.py | 10 +- xformers/components/__init__.py | 3 +- xformers/triton/dropout.py | 81 +++-- xformers/triton/k_activations.py | 9 +- xformers/triton/k_dropout.py | 292 ++++++++++++------ xformers/triton/k_fused_matmul_bw.py | 4 +- xformers/triton/k_sum.py | 66 ++++ xformers/triton/sum_strided.py | 48 +++ 15 files changed, 504 insertions(+), 144 deletions(-) create mode 100644 docs/plots/strided_sum/Strided_sum_fp16.png create mode 100644 docs/plots/strided_sum/Strided_sum_fp32.png create mode 100644 xformers/benchmarks/benchmark_triton_stride_sum.py create mode 100644 xformers/triton/k_sum.py create mode 100644 xformers/triton/sum_strided.py diff --git a/.gitignore b/.gitignore index a8541986e..3364ebe5d 100644 --- a/.gitignore +++ b/.gitignore @@ -51,4 +51,4 @@ examples/data # Hydra default output dir multirun -outputs \ No newline at end of file +outputs diff --git a/docs/plots/strided_sum/Strided_sum_fp16.png b/docs/plots/strided_sum/Strided_sum_fp16.png new file mode 100644 index 0000000000000000000000000000000000000000..e8fd201f60e04b1e4ee5d5d09fc4340a3757cc82 GIT binary patch literal 51159 zcmc$`bx@UY_dR+5MNtt!0ck}*X%LVGgHENprMsjtKpF(4OQi&)q#J3Zq#LBWr2Fm% z-}{X_^Sk%Y>kQ5S59fSx@3q%j`}jSV5yiejd)KnJTiKhN=-qLAX=`UVy};vKCXdL!oeVkuS7wf@vlwRFjan&=UoxxRtS2P6~>rO`ChY2HiI$ z7O3xh@y@V+?te!(Plu4CEJCqZX~O#n|jGGxA3p z{Hm-Lq-4dte1Rr9dS=UooFbZce*Bb zHT8!ug3Z>$vMWKLN8=TZUrhV7?K~eEci&vx830;NjCI3eTC@NNvN_;lkVRpR`6Wmadg8 z-)7aTZ>b^+ReLfiMiqGcx0PALc{ZHgwAc44bHxXjouxdZPI9Gejg}FsYE356%D=zp zuaP|ZE;qT58f!Vl?0F!m_({twORF;G?OSxq^3_L&+lzKjo19h)2Od0laEFmG=;UZm zd2simtia^D=NYg2(e7+8?^=y%;g6TC1iLFkc29E{pS&y&TZ+bRb(ZWY}`P zBmQ_R@odJQKKsw<(YoN|z|Hhk`}R<=+e20EyjkkS0t~m&dei08wmZ|)({B+F{2ek% zg#D6$t=JoPW!`9V5RT=vn(N?QpDE~(nf}qrM8Bt@N*?0ld3H3hz0?F%)E5NvCz$=|5}=7%~pC?1EUdTGuvAxB3?@0}nr-U!#V(o3&d z^&3AOxx%qwND>LsYmed3ux+8XU+VRH@(yzomWIMHt}%#2KTjoih=Rgx1I?p4am2D5 z`!#H$R1e>r2c=wshwv?|dqE_eh6jIV%|}Z@A6X2O7LMJN<@w$nDDEr{i&-WLBztvmSZXvPMf&0m1J$aYjX$DQsmsuvjy7nlT62qw@-$H1yJx3o;c!Mk<+4xR&nQ-XI?baH!Ba5_i7Q)k?bCku6Li+cE9;w@?1o-OvU(gl zeOEdWV6f%86)aTSTsWZBx-+2NSvsE}3G1~mUdao`A=Riefr~V_+c8NrWbTU(`B#&k z)If6nL*FLTks?M}{v8GHoMMj?7i0mM%2ytqZiaZGtYL0;aF>7R*1fm2NOYbfI`$CM z_Bf#S^YcS#xGbiO+BA_>Y}8&w_E|_oY?U;_8#2+`S|-_2k7_DURbJf#ZUULA?V5!inXCo~VH zujRVR0Y|jyH5x_~n~_XPutN)VBx!zoh4U8bIxnLGoRe@@^B9_Z!*;#jpPxjfiW?dl zE_%Y5xyQ^LQfM}SPTe!S+?Uy;Y80d%eSW?0T=eQoCD1GI6SDZt?>TEX7 zQg8ZA)O?(sOzCnSTqm!A<(IIj_m9;a%j02ECEIyDi!*TD=KlPAmvW*=TpjlAGv#0Tq#+?8QQ8y@d1`JB**=%OHO;l^>XTKg6pQO* z`OaH&KAO*ZAZku$pM2PyZu%G**_5kW$91%l?|i)3q+0Fao)RLjRQDdcm0#f2r{E&~ z`}gO)v@<mcfph1#NiNK;*bNe{%Dug`q0hYO5<;nj((bbqUR+wE{Mjf#ru z(|cpw^e7P|)Fg!n_>Zj4wXtyG9kR$t%MtWhi)GkFUN3+~9keI_C@?S_r1z(xt>Mw}5LvT}dE!HcxWFi<;YlR6l z>%9rJ#9+f(T3U1gKrpBk_@+okqEoZ=AlYfHbe_CFTT`;qWoI8Ewz5m5)XP9p?l5wG zm)~EWUBbh|Q>k>}m~h*|R@Sf)Hi?Dwv@sWJ_0V#Zt1*zM?&rHp2Y=q-7>|{4T)cQO z+@t+*hIX}pe5#Z#hlwG1~8{Pb9TCZ7R7Bx z15sQbA{yikVh+>#myw!{kW3ZU<~|^J0K(CoPJtr^VPWC8SL4p%*F{kX<`9>Xnt=s2 z=!oM+a=5a#dw7qmV5LTRnZxqmE>Vh2BsIJ4nvZ*)opd`)0Lselc4UeS;GA&#C`#_^ z1ux~(+$`21aAb~o?RJgBfTnW#Fr=;4R}=0{wcb}Zo%j#{hINyM#d5Ty`%#u!;nZdm z#r(m1;(T5cd6L6&Ki_8kO}Cvs<&-y!DpS+b0Ps%h;0*loq3{qJD|2w(_<0E@wv-cM ztpLP_?k3ndefT+w-T3!sc2ZRa0xh>q;RN?xRl~NZpAfJm01i6;ZMn}4VK8Go$j^&A zfJj35roV45o_

H9dVmPSqjZV2W>Z;zGg}llg{(g$FN5`;5>a;D3T|rsiy_9(LBingqWBHAXZ+LyOeYYkWH1a*Yi5pMYKsVs*U}>8fBlVOSDLtZ*P!_%ZZI+d zA)(}J-v8zU1BZXVp8sbolVbn>{t``Ra*2PXo#C1z(UY|EYQ$iUU~?{TXLN5#@1d#l zW4ZHzR%sU9tk&&LH9^M?`}$T><~bl~U*BF^Z(*$}?Yyh6Co5wvmd55P`!7J!!J@P^?gMb4MeuH@FfSFc!sKa@g3~mam1bEwmdSZV^5Sacud? zbf3zr*QD^CxR@BKW_gEvPv9y6Lk-Kd4xZ)jw#Y)P>!wY>-U@^z=Xt za(#1ib2fM+Ly;cx<+Q0Q1u|#L=fyPJi-V8TdVXDQv1Yga^B9VZtB=o4Pn?(1(gj`2icVd;L&R+)>MvsOL z8tf&rJbPax4G*W?B?=xcvbg#f;~#mQ;?>U!bO_xgs?=5~kaJ(BR6QHG~MT+XUxHwzg2mJi;pFVxMvdbqR zp#A4;^Wa1zoV2(|&J#1?kR4g4lv6&B^4KA;VUMg|g2CYO^#-@p1;# z*WRwc2z4O2$Jzd(1;CB|J4ajayAnOXQ96srk00yaUc7pnMdxv0FC!%-UWOqPEs^y? zgj&dqT9ra`s8?@u7i010Q@DrX($drqABF)QVuMl-E+NrFgUjWPt5a|{UPAc+*A|9M ziflCi^*RL4g!FuVf`Q~sD8}COnIbnxS?we3pPof3z$8&ow~-?N+1Q4Q6i`?wt$gb5 zYR@yC^V28IL|R-7Em!|UsffBd*&I_o_4SpI*lG6bqMw?$EZeh(Ur!DyMR;EG^1 z#cdNgxdNtU0+`6}uxNFuvHh@RyB@i)w;Iw5=W#EcI;J;dGI3z{PFhV8ub%Q9zQ$s> zo>X@j2(DtW;)=SDKWP5?r=0fI>aaqPqBj8&<3ou%Sfu81My2zbJll$d1uMLbPH=90 zSAUKj?oIRW0n39_j0$dj{xc_iT9J!exo*4zey6HSz+NZ2@`QKcA8Xi|Eh*cM0){F9n3^p<_)#R_Rp(x7|12`V04li95KBu2J zdOJ=Iz8|jMA<$9jG!(T^t~i!#`^xtn3)`lELIno*;NReuVkbm{%F_Dpac?$XZ6oF; zavEsEyz_M+Mdf?h#jgmVC5CMfLqneU=Hl(~<%O$l1k4GT?3d5+qrRepUl5~=F9zBwYY=`|K#!CU=zFLtAcHUnk?;)fZ$6(jnP?OuYm|bvC zqDOBA@ko@7RPRb7*)#dQYB}{sjG?tFBTk%cP0gcN9S>kq%F^N9m;Md4etC;+Hq&AB z=$GE3wPpc5k{)^A`83(rVd+N^Y61>to&T2cgSXkkX2y*VHavN@kGIUd;S!1fohvUyVNF4!=+Q>@1c2p znp!I7$SD2pSIUrM{jvOGNvT&%5is##ob0)b+K_B+z)e{Cs?H6^WgItq*hBU!Ld$VF zmtg%uE17=k^^`Q{opY96c~%jY^DQ_^8y(++MBS_sjm@J)co+BPC`HR9@ zb6KOZTI*+^)|A;JG^q4Cq=eZ`MTX!E1`Ke z`jpvWijpf!^Tf6@nw+yi(EZwDq2ewTO;N4aO2JadhEcVVn!Qo&^v{t@U2CGAUSOfR zP5qwZ{OquN3|9_p(7VgiEpsNw$YM6L)|Br;^Ev(Kz63uFjMZ?dUi9z64*yWBs?#8P zeuJdog=(1c?qJL-ESWdDGUta^x7jrBcdm?msYMk($wtbV!b6AZ$kesOe>aKc2Sk59 ztux&b>7LI{U9Kpu;e_BUnVu!4tj2$9c9VB^cI)_lX36}l@GWPz*@P_%4PP4_5-trc zS$;t={Qhg_8?380x6alpV}hr>2C+h2XtNvEth1~4xKNDY{b6>i>frgN*B$*hPlg1ejMja zwU{scgTD{0gj>+_J;^!fw2*5evYN}eP7wIll=OVvZgB2JYTj}^VNWM~))e8%z_Q#r z=*8?#t8@2jaq8M7|MR!=9f_R9)N#r&OhIdA$M*MhBSeXgpbajuUsZGG2wMAqq_nAQ zvY}vI{+O`1TZ(1v#fzeW80tT9)64+dCbd=8U~`ghg9^IC#87VE{FG-pRd>5W_IcEn9?_{T3Yo#c8(;9Bc;X? zv5c2I6wj{%))SIf2Kg^#nUpfUA4%VoC?;e+xqPf;jxC>8yg*)oZkGGuiYUU)0S%1M ze@G^SoQ6<-p^=fw*-$;YH$kEEOihg#*pg|Wrny&CfaC>&JPwF~?i5MF?9w^n?w+2^ z?w(X~Lv9UM)Zb+kpZwURAEu{aB5+Z2>ZM%~HcPJHCq zRfkJ;ZzCW`e;X=gPE@QWtJiz;g+B%zPDKDg<)zp63I&VG+Ua?`;=De@WIb}(Ua$>u z-Cgkmit5cJa)gbfx__U|`L6*&3=5te$f5x7Hw2UMI-MTOb1#3-M>RJ$|CtUEoTE6~ zzog$7h=!2#Z!j+X{dtMP`ebK7d7b1YE^fNlvm}u*`Q?vsDi7T!icN4EPZ?ndvOT>g z8Mox_iHe4O_<(vnu-q`cnG4cJ_YM1IU)+29tHl$8rFQpG=jb`mb`>iB1*jr`=*=k_ z78Vks%N#||=a;lxF?I=%%;rFk(-hSWlDMqoT=-P%{k`f5SsNQxz__!B3IfOzC^9cV zZ__(2!2Hk-R4HaEp;88JrKF_+MMVaMFOFH-GSqEUpDZR~eZ~wKi)+3c&2CSmtqn8` z*%*JI|9*Ha)qt%|C2v>&!fQ<4D2%sN0{XPAxbAE0PkA%c^&}MpS zN@67L8UW|$Zo~YDqv|-)|Hii*S%R`=iBd-q;xna{gRA{;0_f8)ZuHILk%opUYCV%) z?)e3%b9*T>XUegBsKS};<9*pVz)3RzsxLdLqExa7@NOnyc#M{CDc6Lk#-$_e11^LaMgdVn4O<1&DjSdspbTMK z4+iFsj%vK_?lHEpv6-2lr*IsFe|0j2iV?a|Py#3Pvi|z7$7h7ul zm8`Vd(hGE7tdbs%62oLujWPBqXiWks5g4i|#g1Uh1roPeh8K z+}cXP0jt(2zrD!mTy^KasRH#;2e+ik36IGseMSTCrHB>|DB!!te7Z&Uh|s`CG#tT# zy@`ZTj9Q1d`}~8%1wY6C7Ti%*<~=7QF8GxXHjTGEWgxiSpG14=UOhQdaXI%!*U=Bt z%CykqyUFKJCS6(s*YWfuQY(4AB}MCPKyh>?p^BuJd#U*M|=SlmCL7|%Q7#I13 zl_bmV3Fhu)N_GdqZ|6wy?VsW?ZYR&`!J%z@`HGoms5ng#y&#$I#>(k>JXrt>MMPVsNa z_!6fZ4t8KeM zBi?zh*aqW;wr}5D>Hf!2g_{0wR=$pURz{_Ne9Q_^=Zl#90mRgd7V3AhUu2swsVq+j z*v{@mEKo_0;XENmz+t+Gw4Ph8BaJm=PoWR=VS@jLr+3NDzqx}M^YcCoUOBMPgMKB; z1+(-aH(tCF68!ZNJI;a<``4$-1iA`FN>Hzx1z_|U|3?4&4JoH&|7slzw%R%^cf6t1 z7>W9c(vxhdU_?NG@%&y%bkvKYxko})9~yoA(S2NgrfZI>a%krOgf47-_o3&k<;p%$ zJteY1Izi%BY+uUZ{`>92y~ucgCxO}JM6NHC0AP01z0-^!{RhnPH=It~j<#f`7`(S% za-(qh8(~_v&#_oKx;oq62fO>+f*u@?>fRN4T-oifd<%d1uhgLX4GfvA)@T${e-KJ& zx9VDeu%Khzo6`tnysu4S1|vMZF5Yxq)*1{`zf?}eEmPKs6iTS*)RtiP19k?L5JCN< z)HR>GK?uEe;rHR{5Z_^=-md4KHYzUOM?Wb2=1fNNzwJdgZVa=C!z~R~xko5zPubCN z@2zH-LfAR7dc86j*;&L`&ec;${MwhCP|pTtQ1TDfe2A7TV(l)B6AQpGR6?uA5ESk<{2uY_kLE(%yCnL z0lI}ZyX58f>Y|Z%ZWjL$CD0LaK=j(T;~u<7+xfMOqv~85igLb`D@1*xOBkL#uWOpc z2g`XeI{z}i2Aq2DoknOJ!};diG`LN(TU{ONpO|^5iWBx@ z+!#`q-!r>%!srjM3023E%Y1e@^Wtu!Lrq_S1@id$RJqYcB2FI5+YxH@NSr)TzEDnz zb?5vYBp#k8x>fbMhi}%f@f=F$z5w>G4z7OFDI$uue+iMze z)g34u&DzD@oXFcV{#3tL&aE6&aZVBL{$mdsNhZ7se%^bh8LaT{{7ugYmSC(OI-}~b zZwFLbn!}~Y_}pU(;j8H`caiIA{u!}nN31haV5R??&#$QYYW{=H8l#lX1ElFNc81f=+_-_LM>Q~!ww zz=#HYKhf>&7`dF4x{%P*xL2#iQO-xr9FzwQ&U8s5{Z}2u<-DKLQA;K8MJFdGcQ*m4 zO2f+=3-b88SgR_c%)%K49SsfAe*|j;vvvotTTuXYPY%Pcxr6o=hI>zh1;hrCz}qiU z)}H}Sdc7`Qa+CQC<4eCWqZM7bE<5l)MDBnEvxRXBiP1vGA;Yi-*(1~x$3iUbP? z`iIz9N%NJ-y#$ajjKE!SUo&!yyhO?*U$yl_TPFCwxYq9(OR_!eHq0IrQXdt^=T<)B zzS=e>{+)>SC8Nkwnj-(ve{*Q~3#SZ&1%WH^WJFhCeGt<5m75sM!;zU1p(K03mF-Nk-o1Oiy%kx zvmMzcy?j-_0K3rG<;HbRbjQ(Q3D8^stO7t#Qru2k+zaJh#s<<0yN*EKM_%s^ede7z zO*jt!&A^sgD<~+`V{?u^*3u#erIUARF;5?Wyl&T5;iy#AwX!7eUi?m#j{TV3HGlOu zGJ3*Fp~c3G{=Xsjn-y6IP`8JOqHy!LctSD?J!K1b*oftDJLb1ob!#s`iRKJWBQY5n zfjI$?<=xIs_TU(VTavKopt!lYL8km>KLY1KrPz`TB-?LcP=q8KviSdfa9V1xbXw3B z%(t`X!l&g4hhPb*YY@AG-F(L^qD+9p1fO2%lfXcgyQ@-`T7y|mWn;8ag7|b}&`_}z zg;m8S=F#5zJlH$vEk+8>l;J8jx$liN@$Zeo6}SK(@fz5Su95MuUtX62&B5DfX#Ni^ zRg-FBLv)t$B`&g)^|~eN$a|+C1{GhnaVhz;Ieh{Jp&&L=!S6O{%+La zAfkcoY9fes})z=X$x7mb5Z{ zVpnqVLT>3DUcQW52ue65%iLJkz)<2n8=Js=J;@(CP^Lq5p0%RhhfE2tW8|iJSQQbL zeIuc0NmtV;QbPF7P_@o-OC}K+Oaf(6?YV+WIw~QC@Vkiaz7~W{jut++b#}~+>X7f8 zFM7nU`ooerttg9Pw3f;kfgj0A^rr`G5149VdT)s?p%|3t-Ft{J%NejJg9!b7#j<8Q zhrM5Ew)DDd4_xgUeR3Z2(;NH=JyEG|Dup?<)D1U^l?n27U6pcM!|otxC{(Sb*c~6H z^3!zs@vjH{mMt?nmbl_Jp+A|V-<~H`Q?j0o7ZoKynk7v~OKHzz{wXNhNss(<#n1!~ z{d`dUo7u4ep_P{;4H%qhpB*d?xTDnQgiuK8N`HCE=iu4@3*&aGxDSCJyVVVn(J3k_ zgq9P_k~?la^GGb4@R~BL+}EA8M0G5sytqeu?mjGhFKEU&UTBB%U2rFA^=z%3W;0G8 zl{}jBdy*~l)Y}rBmXfkiWU=>TPBV|wvh#H#Fj==!{RpDM<&Z6qqu+P@UR$k?uVNvz zT~=a$L@SN=R{4_`6@u_Wi96YkZ!sUDP@+C}qw;oIUb(*02=}5pAMhr`$A;$dAZp6@ zzLxVFTklUsce$7FhJIw^I^HiUn7r4UzGIdSczrj?$F>DpJy*}zY z_`ryz_2Z3mwvj`rYYL}kTi>jo$n|5ed26%&YJ}8xf~$2hX2dIhse};xA{I0=Qi^%D z8?KXwJG6|4GGt;T;^4{BbYC)kqso2dd^R?~VOHb3PO8%ZrkwV{-yd$%9bD?3{CYv`Qpniv*zi~!<2S15AHZKBlq|v;aCPH&n-+ze zmf%B8S+-qTKX|!gA%nSVJnGEI8LDF;dzU*st%7#on0A zy_heAbq^UkUVq!v(}(lnqZ_PWkDOH~^-(Iti~&I2+3%}+2heJ9-+cY8DpEJ^bW?w|?8U0_ z1ZC)T#;~`F30}0wI$}vyWwp}A9MS3E4V*BTxARPH5v6qUt~`=i|5~mqISYOqNaAFm zMU>1}TuyzHFRi_FjYRoSBecm!H|oiEnc8y%>xF+d-d_Ue9(&6HR6O8O?dnpWOXpyTj9f0`lx0ma_HO;)=-UB4_Jvzaj! zO&g!Wts`thhJN&XyxWJ{P`-#glvF@}ac{km*nMW%H^OSP^VFP^mFbdu zPeIN1IMQ|`Bz(|6#em4OxUWLc`1rl>&Cc=GvNdrt`KnbImXr!po zqqg0U9*njBZk^caF$Y+V3APB>{E+{H~f0R&Jrf zYEE2SZXQ6*lbUQKubX?CCXI(GYj-CY#yXS*EieAbn6%)oRKE#YY(|+>I`vc2nQ#9K zkT(NUJXq#H59U|q0mFJ^_O6gx94ZZ-`h=H9?~WOwn7H`8pNz#SI0wm<1JvOO17_DV+1&husRzlXtg#Tp+d(x8XcCnn?{Z5 zg3YCFb}S4>N&3XSZP0t+sq|)eJnb$?Fky~H#2zo?a~O-sz_w-;r9CQh1f#3qv6p+#gw956S4P6 zFxc$sQy1XQqu5RG`X>gO!CiGpB6V>fQ#nWD=tpN$LqjMy&M zm2!I)+5kMKB_Ow>d4@ZtL;BidwVZgBpC6o&UM7M zR_={UdU$x0lJFK;O^~Pcv}6^J|I&OlCJaW81=xD1?v23GgqUGdEWoxQQSEV3vR-vm zGN0fI($OEN7w&R!gnxPFgE<-R=i6&}x-wRhCVL@-ME%{Pl8y@efOebS-BNGzgqWhY z?@Ak}?w6IEOfd-wlUG)X-HEs68m@HZEPu5=*DEi}I97xhqdiX?_{eb(Uw`ul|1^Ws zA+QXh?Sli*ZqN?gj$fe614HMP;*s`GwDL~CFPjYgphn#DP<1m7oI=aPEVu}#LGG!a z-|7O}%3AfA$I9-9U+;jzO$Y_A4CIx7zq$Pu!OP#VR7i0}E!LTKH`{#ZhwO4c0Pa~U zOQzrK>M;GTV#*uf{5GI#6iP{?n*uDI=+=K~&>$ea8Ca$UOUb)SIEXo^uOyVXdg2Zn zoAoQ*!jUW}Wj}^1KBw$ zDV9tZx51qXS^`+Z;RS)GnTLLmB-MOFRGjl%G~ZSm+kNdX2omX;PPJ~7zpYx1P-q-~ z&vHfXliO8CU{9AUhh^V>FTRC|e~C6U!?yOGCP$c;xZ z{(%0KU6ZgTyBgKqNw2!D_*dgCP$MIjKQR6rPn?}-FG}C$C&xju^Gbf_b(Jpw28X5U zWV>E5Pfz3#ugGZBt=A|O3ws~6Y5a)KA+*5I85!sEzK4Z%pEP)>KdTiwl8~Yw7^oCZ zoeM12-W89#1b{K_!ucjx{3lbZMA{RO>pKd*4&lAmMLkSBu2a}s?xt}-#4%k{2du=S- z(BWVCEAka)h8Q=T54lJg%asQ`qz|8Ho#-*rSi3}u1ac=SawyQ+9@9CfS%6}@?{#7=NiBkJ!zDk~C5sCYCC%nzSFDJbs1W?ZJ#VkHAg70q{nmq0YO3crX`8ic9h zX-+9b0LyzO-^zG{3Ga%`{`DcBmC@3V>(yro;Q7yk&VdRoW=M`R+dZ=L@Rg?Dhq;?Tx*nxF-^LxwrK~cY(PZM1@PsF-L zX$P>EHJqVC$Y{H zJEY?VXmDGWC*EJzwK1ef$uOt+R^MJ3F#<2^Ag^Fgj{Loj#gV6JT~sp{*-7#7@n5K@ zsJOIu(Wb@K(xxf!h06nP;$t^UjUVQyQ>80-d&FNC;mBG-; z1%TlrsDE=!we!tn-S}Sx%1asn4Yu2C7~aAH3DHGifo)I2rYK96rJBkp5>JUUq_G>~>&Xh8Gz_eZ}23@y@g11#+`aLZ7&9ppPsCkWv3S(;-H z{~>mvCy1T?w(9QjE>b|du90l0gwlM}<^>^fzh8Evzp#u0eYXp5j^=9tiJ-SgN zsLPd39B=rH{j$*KF{@<9;teO}=5gB4Dc$M5egZ7qL2ePW4U~+nePoao5_;PJ*1V0( z`z%*~6^qHLln?cF!XWf)P@H*)DEoxRBKgP#nKL$P%1>FQG zBiCse82A>apo%{PpAk$z*I=~~FJ7#-`K*pOUS(2%jlfF3CC9aNdWxxDn48|59&d?` zEEws#+6eJHP8uzO9*><|$5a#3ZDg6Fs`N(L5`S{#;!7HsxA?x$sJyZlU93k);eJMf zLiAoDO=4xybV|ik=#qreAw7c!H1;OTa_h>TYP`n>TJ7w*3mpsHzE@>r(-ZWBGgMM! zGgpW5FJ<4(gnxCmhnvmN;@0); z#XV?e`%#w=3&9s~iY`!{NYfCorfe3&1+VexC@J5}tM1Z{7fztxaBg(qQnMGF69ALX zNTdDNS3a1dZd#%%MG_|uaIu`>gsFiRZ8}(7&on`epP(zc zu^8PXQ}6$&JCM$vUP`yKC?g|d2#ju1pCkM)s$mB@-`l40StI{B>5Da8x|e|z&B=Ao z8|d7k$T2t?chgHE9rbuY2+O=lw}S=Uc+5!4-X$FJ@3~`0Qw_B9BHndzeDET?nI0*9 z|CRgWPxf4JYY8Q`(#3hKZ;?oyx?z*Nn)~CQYXgwL_=Ek5w~=tIW=d@rv@Zs9&QF7# z#Sj{jMa}bFc1*#ZWD2Y^Se&FE``^M1{WuNwG1vYro&DTmzVK$W0i8V36VcbYQ5Cy= zJV;ak+7F^aPsNjltDXM6x5CTjz5zbvJK63*IuljN#c;IHLdx06Slmf^(q)wa*4 zbfua+-MK}NhYp*u3u)*8+Hj(vWS;Om&b8B5tyVx#9rbd!7mdIDfKlXW(B?cH~*4y~N7O z3T6@d+kU=YH>rD8DAMA^$N)2Qbz@oIgOK=erxM&PHnBV>7PoXI|I%+^Ct8EV$3mcH}m_GRrm(rG3BOqNxi zWZLaX73brOAHgk`7Q_=m-h%3Tbn26?ih)gw6Qb{q8qcii`u}{=Uz^WWC*u4{mM(qs zwqPn7Zm_@A6G5jK0IK0fvb`K4l%8Kh z@8O2B%pNS>tXQ5y5m0uPTo({zGZv|Qo?o}HBoWS3w=v)GdO`{X#*MBm%DB}_ii6|x zoKB1R9OlE(@91kgv^y+A6z_>pisA#Zg^sl#5Osdo&Pu=y5Q5Sg%teZ&k3dI$3C-g^ ztGw=q_G*Quiicd#)7nJt8faC0LXa~c3H>0YfQZwzt3AT2ad1O3rp8J>OnIMOtQp!d zkBb*o7#wGj(mFCF%k06KDo~HM*1aD?-J}FbNtYhzFDT`sIIYOzd0#cMTx(0ZcI3F+ z9|V2lX`@9*3$!aA5KoV+#@m4Ag2MR%(gKIjT#0Ufu^e5+DlKqBr>Dk#1b%hSLHo1d zL~|aMDotXJU-e`D=Q?V4%1(cx!9p&M)7A7(EXKd@ocpPuoNkTbFbf`Jf_@Js%`(y* zuKzbe>+94+0UZ1wt9fGoIVsGZ`tejj+)8HtxrW|}(~6JI^p5&y*@ET#kZ>~6R6fL!0u&{b?T(QN^Z9|dLu!3SIOmu5Hsj0G_3-&?avxKtNXIh!tYy#zC0 zc5ouno%d0)^hc*`KKgL-)v!#c6=LoJ4-h;6c2J~|A3Eea;(6_$;n!x^BrRMZ@jq16 zQ5GFFwfFY+_7an_(2Umsj(G$BG|`4@E{F6z+uuPd!CQna-^6&&Ua4|2VgX61OBgJ} zyUH#SLM|OZz&HpxB3Kz`p~Y&l`s{=mQqjdboScz^gLTYYfgvG+OX+35az`*Y{}>*f zIG#KjD-yi(AJ9L&D(^YRG}?3j?LbiF3p!S#pr$EnrIOC6LL!5CLOZ2S|45v4@oggP z3gD~YT^mCvwAVI?jN*QNdy(X{m=X!GQ6jv$s?YQ;D=W{Rrl0`3i^oxg(f=vPUnIKZ zKO8*i`xqsBAjEDiT-UnRM2xFAxfyX%7O8w%OaLt(8LD{v!&I^I8V6oE^iUTemz7n2Ghp<)RFN#3sEeS^O7D63 z+S{y7<3D}<0@xv!mEY9XK1F)!fuxow#s(f4@gFj(DU?|NjS1yOFw}q-lBA#pdiXy= zG2VZLV%O^g8{0ng)`Umm7BMx_^N#i8G}Z9|UKt;VxoykG+G06zUL)`%F|iZKE;ML< zy98E-3iuQVgoj|a9F2kx)IfT1ay`#H65tAVPb0iIFvB(z?t6$Y3?Pf`jIrPkjSrA+O@?yM!BLu z!C0ENWYIhD<#dMBX}O99A{PK$tp7u}PJbiS^VxX3H%!N%aGKb0g!SDEKvb^zoN_8N zjAKf%??S~C5gd+%4%Usc4Q;nrj|&0#10SmOtVXM9-Q7Ii5elle7YnY>c*`nNIM`U? za&sf0|H+4)lk!_R(HJVeq)z<11e){!H)-UnG@4?zrQ)vB&*K<7v;nx9sb5&)#|<@| zJ3Wx}k8s0$EL2TzQ?RC~>)OMP0O;ca4~j>b-s&BaNe&-BmVe3eB}GInF%DMgo6rjZ zG9(r%dpuIdN^m3U$o=@ls!h9;B;cYYR~iPH{E~u0I%Q$Z`SZa4_&YQm< z0+*jCBquHT-wL%w@@IxGv;Ixt2|gdyxDaX{tO32sH3b=H&pqUgb#r@>WTvqKrGcWlh9h;l@V%~;-|m~T2v_N zA*{(?<_!k==nN~XuK4CRoH?! zfbvkDGJn-*v+4@oym}m^59_<#U!O2cH`&)N{&q2;{0X09KxrEm7XP?k;3~qHP77I@|cGSLNn4bPr!cP0z_G)yF}NXe9eZwl{4Loq&5CEotZ5Xx57JQ zbu4JPY7DRycZCqAZMe=M->r66vI(mIG;2p9zpa9+!Db@F2)I9-Rf9wy;;@KaYUos+ z!JsT8^BRs^v(3ZN(p&8@$nDM@^j-194=%!>a|jHnJ_|~zUaPar|6Ts<9bJZkkPOnV z(rmL~w2lO#wu+NKfwc~EjDsY_IlZhNLnVnojbT_frHm~OQlcexe)G2#o;mkSkiN6p z%PBFEMwh)CG3ZkAej=Fh%ZrrYJm$;~>Dp&kYE=1SyQN8v4x(+7s* zXbd(jLLnX~*QBER_6!h3%;qrr&0A)|&$1fk2khNLt-pf9QEu6}zcXpyr}!thDt^Cj z{!*+L#JAa_Gr>I35a!^Tr^cYcnNi79Lx9BwGm|rmPJLsv*yN-nbE#&$j59P%JD_2X zYf|5G=ws`V-6J%Xxj>}?lb9R6a>B{P;KmTkGC1_7JzP_CQ}@KI89LF38xw&K7?RnK zg3dA;&nnEsiR5Kk2crHYlh42KLrsz1r^4VDQJN3$S)38%4*T()sF9{^bC>GJ(8yOG zhGTFGgX(QC+U$5Azd(q}38tK^v}v?f&%Z^~cKl9-QqPyvg?{&a`h9ne^>wr4Aqyo- z0nVUr%{K)(?o<9m>Y2j#R!yd(6IGYTDH%4geR-B0zi+mnaXWg|HD(be=lx%ty$Ljy zZTmh-B?*ZNDbyoLh6eLo5lRyxQ<*Xk8A>vjIc3U}F;Ob>Ov(^KgE3L&5F+zD?&I!V z|NYzlwf5d?ul=s|t@W+<<#9jvbzSFmp2vBd$6=^j3dT0W?|+{>nYI#$n!&?N&!U~N z?P;N&|0A)Hen#gt(ZA{#DrF2fi?+a4Y!8rM1Zf^dUZ^Zt-bXfOwaoBigN3)?$;Pq( z&66RS8)sb}J>l_v!7%de+8f1-&)n-sDuz$H4IY*{@9ilV1Yt&s-J~sdvqR$S5Ou-i zXaO&2zi|C{%LAV~v*AZob`9(1_mFuRok8(+rMGaIP zA*0|3VYx*7WaviucRVPD%XDmue7bzn-BOREjrWuR6eJ;oLn=JrY$>)Mzqku?r382C zc(%}ppGECgjaX&1X{U~b-%}W_B#Z6Hwb&~lN<^>iCT8FBZuP_i zpkmGLQebzjf_M+K3HoF0>-EPwi+%I!{%^SrZ``O!DrW%+PJP%=c{l7@Uez>CqhoUM zRgT^O|NM!i86n(psDGG$q?`?Z7wG2|4_XCKVuS8?_lVhX_4GiOzvcfgq=ugt(OrcG z*)$l#CxE%2hx-AH;h=`0RG+(mjcyXFbB}T2?F1P${d@ky&wJF=PO7n=mw(kAwa$ZS z+C$Z6ZPtGL%~o$36QQS)I*85)Mz?78p4{R=XhT7yYybD(0L2I~Ep&3pJpjXG+xE*7 zS_v45+f+V1_*pYcj~sQ-3ly|udH*|~Sn8Id+@ky9A{9kqsW9=2t^g$EAvG^}K8+zcKV#JON z-I~i_Gb3~6vw9L5dIoLVC>eLRUTUV0qkTPP-X9v){X>d(1@OebvR_vptk~B3?b|Yq zR5~{kt@%ZCsnynNKY})YfFuWzLlpzG*;^118b#4 z@blVyAyfvzQLt~mv*>bvqh@GfpPJo&OlGwoDe~Rxh!L746>Rq{CLg!Q+^wDvAP6a;_}6R3NIk|L4@)S*iEfkxqer_lud|imvP_1VIdOnYRmKbVmjxUjMkZ- zJ{3H=X`C1rdg9ciBRxCdevct!k_l{N>%tiH0Lil~+W4nuW|Tay1hGkh&i)IkQ-c{L z~0^FH!^|4#oX=CGxbF4|c$odTo1T{4&^6iFCHUiK< z@opY{I6hprUvv2cS%Gn!Eo~`P0LRG#H8N~s6m0ABpGXfS-K@n6Ia2b|b*(bf|7Ra? zbBkMMh;4suvwO$xz%wz}nwz>(UiK(VJt#!pM^2_tBj7)LhdaWsb4$U|F)@wuPhTm; zQcZHrU9Y=d`lln5FSQ@@976Q3e-H%_(J^1SmZjl0Y%f;7L-v9C0-9w)S|v*Bmm>?9W{q$+L)81P88l{U?cm?Sr9v1&=)q zVZ6SUBwRwbd-D4iT4FRj_AOQZOnn*2j+l~G;k?jN1wjDG&(aj--S2opJguvK`MsLg zf{@0IWfxiyP5(x7*;$uG)O4SdGv`CGL?zlX@xpXHW>J`LOL@}>FUd(~ci3JtNJ-9% zjQ((a=CR1k2*FGv+XO2- zpYJvJ7_C{lLKaO;8qLw2BT@@;!-1PuM3JGzzULHZ_F3KI+>-a~`)o(X{(n?3%xgc; z3h?tHrn0EE^~BQ_nMl!9Q)n3c=|y&CdG6wa>x^xIrV0z+mT#M@aY1lOF^b4Qp8W2b zS!0m7zSqpiHTO1UjDxX&^-YQAB-YdEXXQQwp%97PNxYfv(M927o63LO+fvVF zn5FT+`PL8UcPuTg^c=tNL|2&YnQn^D>2!tve&uSka+pHXfBn2_d89bD1yWM&Mj_h0 zeS;h66~`|+6y|r-~k_w?wX4mOb;$AJa;*AiMeMiYSnNxTNhKOCVZKy{JjQehD6fboK%8Ad- z9VT(f_pp)dbRM>Cc&x0nRZJSD_q@3&AI-zxZbY7C{P}dLPSG9@B&E$oH!j#vBjX|n z#d}U(^!zf>lVsD!Mb@tBM0DIV#uDvQ%pzMZow3{k+&=NL!gUWm5|;uVKk(D6GxwtP zQS4Eo81$aWoAH$9_}ic*6gZSpv`)Igy}>FW6*;)lD_&fG{?=V42z7~f1$ z!LJ}^&J31&h>HBU^!74027y~L2)8;M;~#I7nPq)GT7&7$rCgz+fVJ4>L-8}0FHA@QNM{kW{9zoEKYO`*pn}x1pyZ zrs66Rvi(0iZSY9hX`!NuKT(B!N&I|&v=jl$P)|nMiw86B>!h$gn{15nhu4EH66$^N zbDJXG0_RCW_+tl zg(m9E)?_{A!|JJwVS7T}B=PJ$YreNdko(5^o@(muO&9k6rKA^|^a-YwA1yTMn_E(1 zM|(<9jrhc~7g~}X>jWjC&+)H^Im;SQ%@VCkF;ZfG&-~~1^|c<|BS)sUmh7hoopk2f zSt8C%s;*YXIEt*2f%UFr>%hV|@FtMI=daQ9kj3P_!*_mh_&;oH3-`N9=i1@-ZPUvJ z*`~Xhz5fJOfX6c`n49uZszm5V6HUY&zZsQfDys82e(Glr-;T!bcq7NJpDnU&aAMmS z*R3$;LIWDTVmb%SJ<-{Y`_SF;>oOkvJtNMr=XK^$`q$FSAL(CLe0lCdP@%KksYOZu zI^nh}*gA=G9XX4)-NebR(OAjm4zwS*hN=;>g^ILB117&frr>ZEiST3oM|YCRYda`# z;mKX&TUCO@xb5u3x8v=c{)&9NqgAiU4J0#*?cMD*l^LqHJI-%3cIAvTGdU3;_qF(U zvxe@K5L9d1>#ukPt-NHPDfV_B=Z~sU<2t-kb)3|-AHUN&Xx5}-i0S9N4XA=VrdpO= zzbxJ3BI}pWPeBz}yT{)z3=y767Mw7YHw5*ncTAYYWWm1D_>Z%^@xfacHuiYXzmINt zk)I*J@ULEDFuqW!@UJ7dOz+_*9oawTXdImSCe~6$xp%iGh6d$9K=YZK^$r? z`j(-JwZBS>g_*$aY6UVXZO6iI)bXQF27 zj@nU6Rq$h~~}urVrWfZ6*4rF>&*`a|)#~x-si!Twc_qwzK7YPTcA{eHlR5G4&TYJ`Qd} zbS3TCUeaP-lVe!3gODW#xBjyg`w4?BXn}p`Z<^b|u#C&@#boM6*p?(GKHE5ZU5%|t zUrX#3O~=6@XZsc%CZAZgEyGW--6shZbW_H42`FNJ;mLgWpl5>efRq%gM+D#J9*Gyw zgn*VODnI4F`46x9+#0n5qP^)BeS!R6Kb9qT4GzPg_lACTJhR79IWN*a0RnDfvv8PD zVIL^+>N3lRT8b@{eVdEzO#u6#iNZ>>Kf%!-L=lfXmeQlBXu2%%AFKR{K3(tfW#O3o z&W*tVFK2hgOkEG>aSJ^bQ7s3KgDx&iy z_05?_R>x_{48&=ZyK7^zG-SE1TrIe~Ygf{}@%sA%157QiZd}--wx|2Uls(HQBDvOH z5ghx_6hgx;9Sa~JPS<@cd~wl{YlkmH;U z@|{ihnCGP4+)0-Z*`gwX@7WlXy~Re z9p~G_4Zds=`*(oc;nbYjH$$<`p77t~XR6Hz#$}J(Smf`%V^3xi?<5_VVWMhBb=|bp zUD7o={MwhKTgMFLGJn(j`7Ub7_<=UUW+~Q(8uOz6wd3y6fR0@Qt`Y_|@#*KWyP)y< zTi*&=%8y(2c8-e3?KnZpe%4-hO^R95Z}=`JeOEnI$EiLMnj1?!EIHE;pO)E)NSEy$ z3zd_*q}cv=Kg59pLUWycPbFz^bKC~pKM-N{7YR*{a2tp6A%CMRyy>0+(0pFFnc07)h8 z1ONVfU>?xj_ImV)hNkivipNff6g^^eFkhQA#8v zAy0NWz$QRv@4Xf7RVR}1_zm6+Q6xPd)KfHxzX}cqAoawoc7;@VTOH9XwJar|>1McZ zTFVpisWCSdMWG3`rwHBy`x0EnSY>0s8uwwAvhEYl@0a!x26Ru%-Xc_>;ey#x*OA@z zj@utn|3?zEPHXb7;O9Aj2|x}fArrl}k42=}J0PrkmVT9c6MU;pyt7A=veY1LPQ{PK zjWVKkvP^sSzNJXE@+DF%r-<{_;|w9uay_ObwW@!7sta2yeba57b!2M{ZRNI>q&-m| zn40>c0Hl())9ExzoqzPx!z7^qWCD98GZHR0(cSVeGawpF6(R=NVh^r=KPPbUNnzQK z@_0}0sv1%M+kbd4uFmF~>%S!m%3KebE^jUm5%=(0zvlgIQx2D5_rosU8zoGs)&Gzx z_H>E*0L8B;R?v+y`vn>bR7N(HH<^jjOC8(ZF?ed-@^idS_lg4t8B=WhnV6SU6?|xp zJ;ZIc#e-1WbJ;hM*@KJH_8|Rr2m!nz`*dt}@&i}u)Y8Sbc^RLx;`?0P57UPbzYu$< zMHd;te;QvKYN8!+^kX@j%>OW1{7Crw689f&;z})9`ARvtK)F7}fkRQ<2A#QFeV`wv zVefW(B&h}IMeeb=Msw?P{N^Qso!u1D=dNGuC0u!Ya>X^=^j6}lTAv!#M?JUH^^Nis zj8?uG=NjgJkBuomia&oKx5xt-vffx1jRRMPz^&?TLB4h)iYlnVFs5}^KC`~~DsuSw zrFQ|&{1ZM6CR<7bUsqgcf%xaHLW#2G6*f`5>f^IZG|8XlUw_^stD~dxxTszQ&$)%! z`v)c|s{ZK;KKJRl&sThh*o5N-?4Q3+7*_m|0_A4&spOWA!|U8oCovC@tvGV{Fnx-^ za}}AJR>A@Dk2j4uu6#qS7;i8=lS}3ol&RYKLQM?W&iRve3(BZUppuYX^vm|~J8Er} z798L+7g}!i2U;?w{jAKPUE!WzCePA%X82iL@NyunowtfMvrm~Ib9Ggfnv;7;*PD>9 zWT_Vt#N+7628K-N_r);Jr?ex%I9icVym?_ZN-VK?C!G=goG3<1}^-dJCpC-wg7f+U@ucQ~T8ZZLsB~vFr+~*L%<3AK zC7hJl9R^md*N2eW6OJvBDzjZYdW!@oKKeyFTl9?cuuNfjWst>5jVvcR;22@nnY)-y zL+h~0><3*)@Is&OL{pxb0AlmC^vsUTp4S*7v4Z=ETI=%LMC`=mP4T*ADd<;`?Q%X( zN1|M(2;iID__12R<4ZSWy|rAwu=)K(spUK71m^Pgev)F;;$-ClvLD@iUk=0&u{Xc= z5O@+jSzv+5&Zdd;yaz=$8xEDaVB5(WSZ=9w3gaCO`Tk}HQR*sdVU;3`dGR7eUbe7yS&=d&+EzwwzgWOeKA{si`oQKS;!-#a}i7Y5&? z@*B;MJsq4Ex#-EQdIQ4`z9;Pl^w!rVd~Hj*)LM-WDxmFa6y)CJ%Etx2NoR`NB<>5VKV7Y7|W-|jJed2+PFCs6mes!^*?n3M$vs+mZHGfgj ztRn^pec4MB;6L$bpY^LYH`z+5rd3wB$KQ#1DmeJ>1V_h-I8SA)egTX$V zS}2pL3S{lLH}Yag@xp~)m_(K8lcpj>p2`H)ZV-TdPlSX|W{U4B_Fc2H?~XC^ zO_?Ro&Mrr@$VR%bXWg9)jsT3lmFM7|!YZ+z&8v*b(FB*1kCl$GelD^VA!|%hIF^%} zd$0d-o9II4iFQlQL}+D9MV57b>8+4eT5w*eplX!L%*%d}cjMv>D+rg|2@ekk=$ z4av8ss}rX%cOZ)8jxI>&hYlZpDag#~YsG*J7aq(yw%9mhQ zoiwRW+rIGn-0r~S?*akS+_6i7A4_j%DfF^9?3@4iH?=JAq|&|BJbm4fTV>Sh6Wp{4 zeEUA4o*j%--$`ZiWI}X#+jzn;3YYQk>k3vYLp_}F?_M|97N)0pc~w;waB^~HTXis! zVVvUwMoFthdcO1`!Sy}pc+@z{-mXElA|r#rM>nCscrw< z%-80>Xf*n+{W$xg<1_!@L6w-ZqCp!!6W*9>YySA5>1*}*%a`O!aiMyU^r~x2G&$kA z%mjxHZ!{q=B-~XaX|1DEcT7Q3^Xa$Lg_`EGnwpgE?$V{VJ3{0m`ulf@LPvbC`vTu? z@s?Y1Z=QzaMt(T9j;Zi}tl*)}YgQGtf*TqmKdp05-Ws%?H({dh%RBMRUUt0Xxx_|k zDHn%xVIzwPN&g5GQ82&zy0agUKRh&Qnys+3Z0I)LY)(!wYfjz^N-5u6nv%82rf7%1 z;+VjBDslHx@n4iteE$CD1g(CieRz9GDxb2)UB>sr2MhwWIhNQ{r{A_nDu zh5G+TNK;#mexY;^8N#xa=Bm98-Iy>B0b?ykdgZ99j$RwNl}l2Qf_e>fQ`opgw7&{o zwnNT*j&H)XR({;0>0nOvYBIZ2-o~X2v>*~^6xch6LblQ zegR7ue5&IOrx7tc=DE)i++f2WLU;=e`mVCe+;ef($#7EZYPVQv@C+ZGD_Az77yL9`SKQJ&&PqfOehf^oBXD_ zQ8BU5f;$$v|9?)spJS;oh+yrBG@M?cJt?qRWWQOV_PGm>4)dgvB}~+|TpnCxO_g!& zxYifHM!=Jh@Dmy6apzBJvkKzL?U8WyaL>Sl6@i~DfJJHBK&ni~6Qek6N+ z$CJ?K**`w?o3lOMe=fbR>s{3IVpT%*<0lnS%^1*)%}L%!xL>&rHUB*^O8hPt%yui3 z<@soRl9eOVD05@wxyElY_-POmnE?QP*O4}uQ#8DtIC5iB7ZB)MCP1Kos-lA2PlrTu zj?U-kwX%Ua@q0N}$$E1muUJID)`G+7YOE}6>CsKx^i-GIuHWSZKOc5fcNE@HC2fIV z@i$YX=$h-C@kR-odQD}b{7NGYvtv6OGxVdI4yop!+wN7 z*jl*{M_ECNQhm<5mP%DLl;txep9a!CTK|Sk2^Em`)zU+SUu&W7Awa<%FYkkx5qX|7zLFx>#@(cVnC;az)PyKf>fXvnNiN{zN< zN@UoCMgNR|9f#-dui6pT&i#+Mi*%H?E1XVFpwX`vr2X=IRVMD^r;5e3xAArz0n^V; zj?!qSxyR;+H=Cu&CUPBRi&buQN?;FayPVw?RAbRQwKipvpx^vFJE5pH)WVzP`{S3- zY871ao^-SBYZOUOjF{dT@=W03F3dFCzD>g_@sgrs|I)4iGxgsoFa8NuQIJb?hh&6s z$3@>5e=wNCAvKseRFU@O{5MA4jP_6)Rfnrwe-_GB9jdra{!WhkQCrJfHREia~MNnAV(S^=p z;7BdHeC6w3_;wqymg_Foyym0n$-c{9$v|)4ZkA@3IJ7WCV43&&+~!Io?Fb8 zogu0WiqosRO_rw|?WY!VkgS^p?>?KkgVrNdsQR#L&YR@)22sl`Dc5HXS&MB1^*msU zeVcc?D_h2*gEvt}RO_9?u_fyX`VMi{L6gT~?qy5QyNo$M=vR+q_1$jwv(d=bW*upL zIx@!Wdm4p+iQCt;g$?!~Dd7;ewM)mOxh(l118aUc@m&Lw~td2Ru z4aZlJ4{D)YHCe)~;NIQ$pjLG31eJ!)6f=GH*wHN2n~y(O`M)gx^}PHMspnC0EoFL4 zRZE#F2if+3carPf(>;|+JJ@b9vL&!5l-+!mb=UVMW=}R>VQ`FhabHp%#Xjq5wt+gW zMlNUW?DO9;_DqphfvP=lr#>vp-MUOhx9UPKUCq7W#3QeD|bu$$RA{ zU-zn%&-<*L@t33D$)Hes$3EKTTLUB^UkqBhO&xpmKE_kslW%Fa&QRywPV(2b{jfSa z6dIB}l+|K%?_M!VAxT#8)f3y-hiq<}LQ`^IScm(}?~9lph%6aj-Ul%Xast0U$YDsA zStk8LhmZv{YEHg(?wlC~VV%=o(^LC1-(tA4`14fRl2)3$Z&Huj%45*f!UjJ_?qX&A ziqQk23vkI~(AP5c0Vj#YJ;C-%T;T&=-7T7>+ooxl?inAt=R?gdy!Ecdhe6p6D6r^$ zalfIV@q1f!*~YaG$G_m+0+0?Y~j>tb!X#X$j^#1Uo~@&OG}sP9DnmDoJYy+ zKC7UqV)0+kt#Nh9=Pvg48W?y<)pLvWdS_6)iiLYJ_ZL#M3s9$Nl(I0TI*(}BIb8^q+gKN^; zXPl4Qidzcu2r%^9Xkcczo|EHv;B=BOeV`1ZGDVgKujI*$A=|c2k@P;QQf-V%nPh9& z#M$@Q#;K>~SOgxurXIii(;pYyA0_ zlVgJIGUu%{?SR&Hn^`m;2unMdvYt9~W)m5vkzn-|MM$%t2h_{BSW;w4d8}J1m{dp^ z(|QKmtx~_*$;~0691=jUM&t)dhm+qtFXQs`|Zf%F6)|pZvMh_)f9fRFL5hY#eeOG z+&*)sBu50YoYVW(9+CvE98&QIc7zZmx)E#`Z#n(uJZ|l`sPvAb&cTfE6AY};vnO16 zy$0vRx(5_9Bb~)O5JrTFUd!7#ZUoX(Hyr!+J`4yzSP+9Ui{960HOcvQRkay;^K6!V zf$Ng9j*5`}!7w`49ZSf&Z?^UwU)ssP;I{3~|B}3(wehk2#qryBYX@J%r>>!bHxN;Y zhSl2iSng%0&Zs&iB=z5^VO^N)wBH5Xe+of8vU_M43)2UM@Bs`RW3FjHS^?fxqHD{v z+ZLTgpGxQd;3Z!Z+e;%fe>j*`{C00ahjl+;np^xA`Yq3)bWg|)V|7WBj`AYx=;M)IC}Mr{cTJr>_iyi)h6Br!!x9PqLcX=RR;+r|(azRhm=W%IFz5 z6oe}LXJ7Z+GGN{+Cgxq#wO$z?6kzZD)zs`UtpbMUe6MSV!D|xzDJYnTy*9j7eOtM# zr|}~7X2t&{2)bS|Av1N=e%QD0fpTx`g@ky;a$2E2`o2qq5c?t&z3RSx{mUg1vv>E_<0TTRYjmlt4_nQXULCQ?z5l`q0u%=MKeMzdWwgEx zgOf#1MaMbn$r&8u9K7m6dCBDFi*arf=7=ZJQ-^&ex}}9K?g~ord;WaWuU~(@`8ksV z?uO`^P4V+7r+~VK2J#%9{LEJK>f;}rmmOcd)=AK^DK@}=P0f4xhnNr{9jq|O-pQ`( z+upuqb-llAzScN*Cp$Z&84~4#HYMk{n&r*Z)`NK5(kAZhL{F;h+C>%@s8SE($!W0j+^Pq>|Z( zevCmb&Zz0-lgLHfOW>tKKF5$bU>|Igjw zQ^eFN?9T{~%aSmbess9E_Ck6o32HdxQUy}a6rNuXiHH!v%4)G zghfV$ajq>qWUs)HSTAz-JoLMek$fDqYkGIjo|iFTlZpqSV?<21js46IK!r`1Gr}k% zLvQy1NG((2Lgb^9oFvdJVQ^e4__@c3_8=e+J1FV4K&91#UAIpWdCNX{dZuJ$1*fI) zx-Irm!c=SjSk8y5*|A3%H@uw98qvj9aAsSnWFh>a00@X2s`Y$nW zw_TpxhOqs$>JTOrsQ{xAOle3#vjI+ies`kVLw@9v`svf;WV^U7RrOCDH~zwmAM(^u zTXe!t5mq12dm_5xqH)mtml2Lh&-+>z=yY^{tKCAUJzYoPtsw~D9D+J9G(ZT!Il`uv zFm2U~WO870>?J2i)L;Qzh9CiCCHinI`v~(8K&9Iq?&*&Z;(cH}BcBz5GitV8#inlf zGY}F$a5H5aoBr?<`VLE2lj8nBiD7(KGQ7HXm<=8L$;})WB`fo61@75^u3=={hd@Dj z<&-!TNTr7{R15sN-$s;BqhoiSrH7UWP#9luA0mc_ho3mndM9aF!V;VCrnc?>*o1~X z8752-qYerStC#VqtH8k+(9|y|azDciB|Oh@1K|Ilz7Ki+oPtNxpK2ap0IL4&$v{H;q zp6p#%!B*oE5ZHBO$sn)e?^sV+0vgAW2W5S1FyTwsn}@IHA5w_G{3uky2q7@coL8IY z8$=%_c3ExkLEh_c!@%tQ<2!c_*eu7MGIH)Py5<0s`NJ zS)4xei?(}pQ|N!f2}BjP!`rE;iwVgys8tifO0Y`ba0Jt3uZ){Z)@K@OUukE^7u{@# zm$K}78xHvA0o-Oz8hzSLuww`{Gx+>|fd&-}oPSff%N*I?YJfi&3kQrZaT1Z}AcW!9 z;e;xRKc|4hi;l?*h|jbbDJ6Oy)DFc50sdjv?-jOWhC2u?;cu6!o^H@tWV>I|F&Gl) zF;MVgho&;z^PnSXou#zn9_jLM)C&T8EMt;u^!6<3O#?u+1x8d)KxC{K>9zyek zs5`b=w#$;;jeCQ8-rk)EY&AC`Y#rf~>;lhMFp_xNjeOH&W&1EBainmqG13t4m0T44 z8`u)o7-XCNT(Tv!4wnFrGRwG0%KFe$9H#+*e+I9a41AztevcpXiL9h(7h31Rni8g$ zl&NDIwMI-vxj|cyyHDz#M9e6LsLQG6EoH~7Q}{18%AC0@4dfq zXRE*5gRwD7Q*(1Ws~0a`Tv1o|dGUf1qu)giBYpb*a+|U>9zA%lsi0b&Du`X?cRj<% z@G#jC`##0XGE&GkXzlwJ7mLM9yJ{U|=~d!7e0VET*xs3?tnBR5 zc#Zn{`u73(R($?UEpn^7DuC%2VFfkmC>a(Japvq<3Uzh$qT3@3nATC%KkGGRi~oYC zAbW`6^tp3X`232Bik<#)le4o|^X}z`;wmG@B13$px1RZYVO{C^vc(Z@ZrHYOd-LWE zUcm2h9$sFr=x7$gAIRfuhMIR~<}o%&M-M2K6W1iO`0XQOV`HLx&a=b!U%fhV)5OFO zL6kVs@?r6rAY0$qsWReh51 z$&(*&sY95i`l2*6b>vWZfwD*KVU?=(5m!;j)2II627UPcoe>j6XK=G)=!eSC&`|LZ z15N=8B11JK2yFiT6iS0@C{Fp-ga%&?UU}S?PYn+d4xy6+b-Re~gm6{_90=bRo!OUq zQ@^SSZAm1!o23(i%F9o!BEl>p+iA?WE&&O=U$|3YHpafb=Ey1k-UPokFgA7pJMuYb zT@M2T58K<@BLfcCDT74olT!x{$jHc?zi=TLx~to_Zzm}!DZO=?d-EVI(-i z2;zEJbo6-`0NgY)JBJD?boB1Zywyn&kra%EHDi;HKx-Gh$HTbV_f>!Vc(iu;XC0J0 z6c6kSK6T(31vR71Uk3l==5%J9nN! zn=l#vwhaDAaBnY+Uq5-`#KFWXN7~vtI$|C@dUOQW*ZK43kFN3wJA8&Kz+o;fE^(V) zvNrWAS584Q_yr1BNEdhxL(NuB?y8p7`N(6sFHd`J4SD>SR#Q_`8NwABk6cEkh^7q6dy%n%df`Dk`C!y;QWcoOn!}-hts^MIwr@JsJy8LQ{?#+x0m$ z6Mt5Ik-zEG%%Va!5!>EF;U@n29VXwZ%8j$<97_>(;I9baY%; zxM`g2jB77dQ3yhIDFD9UO2k`amH0Sk^tLo9FE7l`4^FwmxL&xPpRB3<^5qfSq0S1J z)F;EdF7*9-DJxvFcnhzv-7m$8p1yJ8#-ZKYNAT6m

i?HQ3U0q#XjVv-qNTp>}9}ZDO)65Uo!%bsjlBg&% zZswIX6Y4(#%~mW}2yCEU_-_%%Zh8K103P-9_3NyVE5RtBG)~lJS65efk}zS2&Bo3y zbVd6@+e5grZ!0J$7;Z_SgR=$MyA~Y<1xlP51ZW*M0Tho|{&V{C-}hWqQ^VHY)%ot; zF8uzLYu9|`NcRzx@gkljC3zz5=wofSQBjd!VeHacKegh{lDqLU-|!n14Bso#jcc76 zkuHA2Me){U>15l-?(H&P5JZRjDt+Y+WnRP9_|e|p2#M$-;g>_cA?8;#G-~q);iv+& zo~KAC(+`Am!BqHpQc`hQ<*b`;P*6~3gdF@1AG1nCNJ>hYwrA<*!SR_yD&9kjGZHOe z5_o1T{WER@D1mG69wz-N_>L5!wxZ&+wPo&!PsI9f2y3_LfQ zpj24b+8Q1&L(_lq`DNk9gNw!s*W?*h)m_*W zjjgTK*kB>p_(;ECpw5caJKZ5B%KmBtGHuW7>=Q7MyD}&n=y!lIfRM+tHUQdEfj9}} zMUU3hYxcT}$Kp_jVscpaQp5CWfCUc} zA@?C7si|{CiYv-hS!LgIcIFTi+>NJcFS6xG;kJL+eZpJ3sLEd?PgJbUqCPIJrvA=m)CTE&YOtC4o;TUdM!*FLfl z%t1QB!;=J2LB#t_R~WamGBelid1gcg=2SUqxZ`)nbsp^(V~}LaRPj~CV*%NONssJ} z8*CmqlzoKymEqMutYQ+DI59C%OXYiT{ioBM@@MScfm3d)3rp))nGH>XDQL099|Rk>fJ4rcnYh;mm~#G%P~qTh!EA z8w~c@2mYa2UtceBnmY=kb9@m>{A~ICKsP?55C%(Y}6iCnv)0y)CORBuSeg}bf zK$-LagMaV9K-I-xEG#Tnl$B50*c`tUFX@jG-7YneOnYjJ2Zk-SZP^55Myrl+hrSnWwC6}xdDRzC78Jhi0+n%`!otJ*Mn?P*Hh$r|6Wipxl2WnNC*%oz zxS=K&7BFTm$t-KNwVH7}uEWal(^*S-dW~#VP)7q!Cwf7+p+_qzf zv98Wu009IEP7l}b@#Du&;pHGG`yg2-a1Hz5PfSv+EiIm*p$rc^J^TLr`7|3=(cR64 zYO@9^dOKNJA7e*9VwGUTRkQVqI!!Br5bCY1WX0jh*wd8_4ME^Mcz)X)eH z3Zg^v@S#Pd@!L{|zbtTFvw8Z{|5LNd&g^WSL6~;fm2q)${!8X@2G7$6~*zjfeK>(9%Ts?-CJ}j10jnqn5Y4+w^A0 z;oHc7?`!b-L_{!Rxr_HCBljme(lXPSC`(2rARv%2d9ZvojMVjANLbh>GLngqU;y(@ zAD5JA%S&3aQ@Dl^BYlt@TKANG?V|MHssF4lB+WK*-WJ=Oi1x8!_FIvZUd_|Ivm;dV zDFIKSrR0rl-mZp;%*NY@ViS2@e*V*izO4O%MpN{q1&Bhk^?AZZc%^bP$H1hIU-k@)%A^W_`ZFNe4hE0-LTwI*2n=jh} z_zun!fLO`g1R(>ey>4=~-ivD2z{4>H`jjd7l$Xf9(uJC)bxpkL{%QI&<)D=jJ2M=N4vXt6={@&%p42 z2ph;WkR*{p6c5sWJ6~1GeTU^oEgP9#$kpce9bdj&Aew=YfCC6|8s`;|`LiQWEQAK< z!p^e%g%0y$6z3fL{5wb?ej`id$UQsko$YAhjpoqD{}p zNX@JaudNUG*XgTQ!#WeKd?b-BSL2+Kg$;WD%+lZN@$g;(@Yx5buM+tr{s?C@tyVDN_!lJ$?fg!Hm>WdGncn{pTTujUROtjqgjK5 zEz6{V4gV5K3c2!D#+MVsAB}lVVMLAWfJmZZq_dEakUk)FgdHz$Z+&#MIuk8FO5B`V z1;TwrMTK#&$!BRU{A>MZdILI*C{hf(2BXbZogyaMVYFw_% z@rOtbqFE0L}8o%h_bgH2KqRU`@DM%?z-7W!`jJ%`SRnWD74AB=1cp%Uh#iN<>*|*NAz_D+j+NOzj{&xPPB~2dj8h_!ZkbcdA1a zlbV`@_-oozDh0=Po(wu?wpv-a<3d7b5u1Q4QK7W>5-miH?**{09H*qXv{}m?>52}c ztGB6EQU-Is49CjH;oraI3G|_x`2`QlV9)*+`s-tlUy|69hSpXV2FF9xPntNX*QW$F z{rY8)kt5bJyY}uf>WiJI;O!Cz9&O0uedF#3e)@FpS9oPuxFfGik?8LydJQP{?8=aV z5XeexQXpelCNFp_-E3&h*YrVYJgn2IV+uG6^kS2KiJAd>c&OE?WJW#xBTPq5K(;0cHAnnE&Ch@ z275|qdOBZA0SkA+p~BTaM!0>>%FC-DGk=Yo%F=tB;}x&6HR>f=R`D=+I4OSo_;EKh zr{(0_SEne8i;8T{`CDxMBKF1`i02d3$VFonIYq_AA^G`2g?trkkJvv}R(cX>47xO_ zHo%I_0F~9vuFADDueqRE1DeN+r?`OiqyQoE; zSJ&7Wte&C6EwTje4`?l)+dZdti6L6xif)8DD1q?kIHSm?*s{gO$;Zd%q?8PUu)NGM zwAS)a*I~(TBM~XKXxmwX` zg2pOG8)P<;5txHZN=iDnht5(TmHKj&mea(ASy(=m{fGY`E{8muz7N}Xgeo3-j%?)* zgmzozWhS{tcHSs+yNJmBYt}eWdGOtYPVCtI(NE^C--S^>e-38V$$4@aS63TYDfZDQ zq@0hJQkuyEY&&X?;$Vu zMK~lNI@88y~D<7I6lJPxfjSS~274L*zd?p6jYWQCm zLId}>prD+h;#D`zFcrgS812zV}(sLAH%*bYeq1nv+QA2N#iBNWjtZ{>yph>O$#=0x7PSH;E zYMq>%Oq-rl)W=IvBWlCBRUh4SLWCB%*YR83lI=f!^Z`BfMf&{wVrD+R@x8j*+Hwfy z6FCp2e9M3Q*o(YS4#YT!OM!PqhGR**ZJf{#DD^b5`-AjQLuBRVRsta=uVo6lV{c!H zr0L-9?W3!QZal$pq$F^q!a@S%9LOjxDe1@Y#NsQwZww2kQ$`BBg(zHbw@vTuM0M6(r;*8wzt(ODs~NQa4TqP{sBqdOX`cu~)`EK3x&8sy@`Mu4mE2z*81G|d` zqwv~du-`DA5E%7qBmxB7jw7*U6FCn_lR`ymqO(|b$BrGd;8!(4&7f!${DIqxllnvu zrQLhdt3`mH|B~|IHm7#x|D&xljq7py!v2p`GNcemLQzywq@#|KW<$ypC6sx}Nv24H z(jZeE6q%AlqeSLtB2q~*oY09Tg^ZDi=es%o&-1)^yh{Cs``-87YhCNQuC;e+A5p&m zZdjLW(C^UXLJ}CPr6m?+F=34V(@r?QWRo8ebVW## zYW(GhzBZBRYdb!0;3na|b|FM+Khle|16lY2bFDZsF6#c(YgUOFOEX>C?=A>}iQczQ z30ub&h+8SK8QyM^i&Vr|$6M-JmAj`qptHeFv^#R-h?q3YuF?yC%>yt6G<@BbZs0PK zwJ4-@Nuj#l#0e9aa&l_B`zfY|BNV;BB?_nyqGki{3QGnDbko--kAAt;ihXgdw-1Q4 z(`Tkh%+btxuJ9gq6FQ3nsl{^7pKrjs+}+#)-19<&{K@>2k+ih5n5#z$PJl&iowaT1 zqI=6TyCCkewy6wqii^7fI0C5v2^UHpE{Cm>>On;5WIPBpu@IR);YO(?xi^;`mwfU5^I`Nixwe!f}f))}3l9rYx$u1i>J!hZJ z%yiG^76`bx&=1T-)AOo{&fcG1 zDnhc9UibHxcA355qE~IF#U+R2C{?^SZahX(d74SBGsgR!1OYV=DIZQY*?eMhw>ni{ znJ8W7KtXE(RD>e?^yyPK50BKPXB%}nwm?`S#g3WcqR2VnMu{aODKxEYZSzq3(K_9A z#y+@3tz-Z(hYA&Vw_an&kd83)`%LUIYIU{09e&@ikWv`Vvt#GZd3=ukk`h%(fZd?> zNAE^|b2e!+IQb*bu3tQ3x`4^=CI7c>v-Pyk+S7_r(KO7=)HG~&ZX)k zn}6nFNNVyy_&rHB=H@fL-_}F)O|RJ9@bx`QSt9w~*7lxkaeL8~eZk9GmnkU1<{e8- z?W&`rLjl|a88M$?6t7dwr$Oo^ikH#HB>DxXR3dD?k;H_Wuw}~@i3IUun^kUGm%a8? z>wLSP9=ZuT0cC}#E(w>m)MO{x8n-79$!t)esHQD-=Ur39y;K0v58<_)VL|t^Ogp{u z%r(xtSl!M3e8d;|hM*#rlz17H63cNLuCzj9yXjG76OYCLJ~F#~i5tq&sxzbIl0us+z1{z)_4nm#xpT zcsQgQLAX)F*&Le*qyEOWK;cgm^PHCp-JF}9EfI?g z#&9(>7nOHYD;<$?4h{|}&Sz!JpUn$TnH;Z@iTr>4=Z`vNJI^DL;KI;XT$0mE`G zvQTh&`E<_SbGq06xD)SkTe`F(WBoMCr}xh<{y#$*EOE*j!rm=FBy_llBZ4pG8H+-e zCA0c%rD?CZbLBulGBPqtpIsh>DIp!_Ad|L3hYsd5W+REU9~Vv;HJ?Yr(GA&oc~WAD zar%K`@7|#VXLr)-XrIrz0D&Fkwbgeb-&ADg0RrAph`?@G;Z#^ZaE|*l+$eCpMCW!A zaf&M_RPc%~=wUZMJvS>hC8bO3>z%%^NvIY%%!1=~AhR+#!3d%{zUW>*Z0=R%rluxg z2=)8+<`2}iHQkCyQW*xVY6$o3rn^qrV>d{!MP#M2?lCbjATy+s+7iw2T8y|3#jB7g zMy>MqNVs@W8EQo0_`K+0LEMQGH;ipoPE;1ak3cHRJ+!J`ylBr@GU)+y?zM&cL`W3mpY z9ym^$78DdDg~S=aoZuYUY!){7_IWEIJ`f3NQ1^iZZk}}My?gB>FJV(*1b-U8zHa7; zQ4W4O7FUe^$dNQqR~x=TPF5T1SJbS;Jx-k(h>K?RY6BPsA+t+X?r^#Zzk_#rH?BIm z6#A}OwF(QC>!5O;+@;hJ_+Jh|!HT4dab76JJj$HH!cLOdw6tAhXq;Hqk^K{$ge>&* z%7$(*nlGPS`GfF3_1NpXCqJC{OOE^p`t1Ye-}BAyXYa;jf?I2J%A6~`zP$an0Ja5r zay!9ASe(#)gVWOVFkvXDr~nK*!_RHGTW_h`O9QVasnNBqPeboEbmM97w5ICvnAv{d zmp2R^kz+Xf^JuCyw+r7GsZ`jnnBjD#8?j>4$?xEvVBi#N;jyG7r0IgfpjchkpS+0O zVFP#Vl2=t#O<#IXee!(}@xT!yy5nsTIt^#o?mFt%DhDHgN+YZ2PNBs(t=ti{fS-Ud zwg4mHhwdOPJrI*M-&3o8Z0)*rXV^xUborbDrUiaLAUF}~G2Ow^?fZ7H$Y4_efFmL@ zvK`(Ep<==tiAv%8O6TC)`%hT5jK;Gle9yvOR{H2sXiSXDKHoblh8&b(qtQF9=N{z$ zwp7W@o;{m7S(u7(FPpQB-Ig!!1l;>=uaN7l=W5R4F9lojH3OJzj56ab?dYG?cC$i?16f6i5O=%yNQi&6$%Rrxn2DcBVL@> z>GQVb-=9B1A;;-{RukaVZcr{WLlo(cD=Knfo++sg@|ZXRPh^~2>P<;$md^%;{TQ=e zclF}Ic+fi!pL;c%>@ILLi|*Xgd+n)8Q}mZbJA}HyJ*gg_fB>EgO4eWhdKUC^002Wn zLkfc+&@~7vB)R?4TG^>xyY)BQF%VDin|agE^*1ovhNX;!sbK2;F2G8i7zIO%54jIW z^LDeTS53!%Dk{pz(}41gHBC*ESu7{mSxYAuQh_2D`C%JV^G4>0goLR)=LV`bFW;cR zKW>C=0{olb9=-F;il*(bAm3vg;|VlowV`q5|Xg*dj3k^Ap7QzaSq>D%`{bngCy z{yF6QWt8dXm}tgXRNc2KE03T6*R*n^UKwNblEJph|MXosYWTd#+;Y^4?cw3-fBf;s z<<-?PqVOTZ)~1C92S;K@!)y02K5<x{*e#X5V3-|G7Pt;mj)g29Z7$@pW&~d1@!I5 zwvV*iN+8R=cQ0~wW{1M|I`NA8vabv}D9`})o$y0Zu$f`2+2EKJ8*AP)<|*M}H7tps zl~EP4S<8_e@i}KngF>YP#i_7ILcYY3E67}yEV7GdJrSOSRPWc;Jag6Sn1cuN9z0Nz zlargFxh?f>x<{sixxIZ?xcT(V%vw#ghOI(TB@4%rj71iUxKE9_ zbnV(zxVGRBwiHn4WWbf4 zdUMwI+gmG@pwR8g%DJnYr*7K$-VelltE}vAwsl;}KKa6hNq0wW8ZuT8+U-*O zE1k||tk>w-Nidq89o*hhF6 z&zv~}vAd%LvtLz}8x-nkgC{FS;`i~oPQE(~5Em>B4xjwY-bzY}Q~r5)5XHak$G5ju zpH{i{XMJ_=(PPD}#tNy~#;qM$7Z>lhvREXQS+#aQJz(p=fXG%IsY$7;>0>6&%-r!j zGXnlXon05P@L=GRk`O=VgI5QrtD0`=xqSI@_{EwggFpVT4VCZr?!}GH)(7S!A29Mz zVL0D?elt7mRvp+3lVKo4nX1`lWfCDly1gRVI4>us9pWqL!YREu^B`>x>IMdi!hjJR zES-{)q7MakDZ;B^q;LcP2#{QdaeP2-Uf%P!Z+Am`M~yz)DM>p3yzrO69QYW7WuCcG z+tg$G4Z)W_nm2y@Hj>{@0TXaxJstOoJRph)xWCYoC$-QSBy^{~E#N9Inn#fz1=ERN zOj23wQDz=EyzBb9m!`V2&v#);wI49=oMW$szm0bnSaMY6%AIaWkoMEH8pP)7FsX?YD;=%ZTu($uw+WN&o)fm!Cv&wVG8$;**F4Y$T6@q;YirK z0Eo}KkDidZM{@)IA0gH%C@Y7tsxoxnynXH~9kH$BZ%0Z2k^AB`u0}|rSk&hXbhsPc zq-=j+gftBq0vC|A*hr_UsR?Fuy@^FdQ*1d=7;aJ8wn3Gaf(XCXoR6*JWKw;-byjHwh|JBmqO9 z9RS%#EYnf1w01_c@Wux6jIcrjAIizdWS0<((uWzUT`=#0;nSOYRrs3eAKGT;cZi~K z52@p@@tTmZFnY>{=<4oZ40Nnc8x#r$GIY5K82wSy;luXAfeawK2P-j@L;k|TgaJ2< z17-vH=-ZDTMe{)$PFtzFPg!0YvFk2O{r#jF$wLec_won1V2i@rwQ>oEGt?knC};_5#EGBSMW6?SXv{ldcSW^L9>`QW}fb9vT=rZ{51pjkF**8Jy2wysTI1%-V%!uY&4(aoww4DXFQVu_n^4 zgTMrTRjT6P6P+-;M~eB&%I$?=wsyi}{la6X1}9fc+O+AiYrZ(g(Eg}dR&KA^Uxyf> zBWkMVy-JFFuo%CO zSL7)kx;jM!gTKesaPa+)ZNkmKGnzyl=XLKIDW;~eaRTz`!oU7 zpY*iqKz)r-*o|Z2Tj=Pby95HGBV2$!UJ?3zNQ$D%)n`FU2l(7ee`FCch0RW(Q}C>; zO``-|FL(qzT~FJMv#GTkwfEVsy=e>EFM8qu6wjj}MG|W0+2*pzpc8~$p1`j@ z6p9$;+Unqq5AeFuCyHD31tyZGre*5oW!H9$ATkBV#}Ar3j~&pO0@eXnBaR#?X--X% zWPDP)`Dc90ggx?om6hEn{Dd_Un1I@~8YxEbpTv3_wlWbp6C6BUuR-s)*W>dQJ{0Q~ z=#wD0(XzLxWdXn)pn{>J(rG54)RMt&>$-pJx9&4{O^>D)h$Be4#`l!a<0-6zfST|T zrzbl;c>MRO_tXzkf^H#I>tkcqiwoa*JIVR{%04vSY~z>E3ag()62J>E6$>{M*az2N zH;~Dpyzy2L4eq1b%*q#PIce$jBQq}iz^{VQf8fLPc(zvwjl75PC_v*`8rE~HQVxG(*!Lw z=Yozy{+vV?Mu*Oww@-Mx;OcR058$}rs8I$q!VB#cnJfT*-~IQd-Wav%UD0uUjXOy6 zC~YhzcwgMMnZ+)^CHdhPOa(?t%7)Xqvld^qjFER|1}-g&mm^b>dpZhhC|L{QWr1|W z*vLY|XT&xfalvq?xv)5RaRn!b;bViI0mTRMXL@?N+l`H5Wu!goA52(ycgfzd8kV0{ z-Tj6Xx^Te)OR`*NtwrCSD{VY_B%^*Ibc=BP&^|`vq0gwr^XAl8F4!CP5<9Ko{QQn8 zbOvG*OiGxPyylRZZ_G*$k7b;kYq+klywj3Si$=~rnUL_e%!M(l-4JG zbu`(EbKMbdw&EFN_dB76VA#>o^DUwZcK4q~%0Cbj<22-D@hR0h)i(cyT8F8Zj?k&4 zGu=;9TW4@pbv4#|8`nzZ^(`6d{gdmVTN`MJ)Z|-CfZ+Bqv+7%B*>4L2u#BfX1v1(n zn0bQqN#ygIN^SDn#ArQw z-umrSGmUxWPc60SW8F@Rm8m+LcO(e6_8WUS8Mv~ta-oNZ{V(g3(c-Z(b*hoy z<41BmjZkK7^psjzd1aP`h2cK6!nAb^l2Ji%%?|DA1gpf9d^yR;-N`D({dCsz*_U^T zn>2BmKulU%O}d-ZGTr`8o}uqpHut$d3+9|ws7LgTgx=9BYVkHR>W7UmT)jA@`IcGV zpZ4K$H&1@F{?m4!y$P7t%F5Nmr=gQ#b>8OiBP(-%j7AnUj_Mt>91ke}8`^+E6<_MBiz`G26t;*de5=kOok=LO80m!kyj)bIG}-Weh>XP8Cs+`Q z4cm$~E0#=gbc}c^=lhRrHBD_Vkyujo8*bT1>DRb#avD47eoT<{38&#)Ld}a87DT~# zK$i;?$hwBIV9{gWe98Z(nNX>LdYFs0(aJuQ9(a!7odh8ZnX2Z5gGck8=#|NgQzD_F2`Mdg6y)W~vc&cYOPccx zQ;iq)6HO@WzoPc%`qu&G1B7E8Eq^ByB@PK&A~7`;2dKxw!WS7N1!5mxK;so5dwg(r zZ4#L!9QMpF6FVsiQ*Uc)tJj-Jvp2>o^~Ri`sH7B36c&9V5VQgjTx(Qh zM$Zx2wl1+3ASb*()^&wF#1pieiqn!?fJPXHKH|Q3@y!V{eKi{oCoxZ-_I5)4xv~YaDKdX%(noW z=N7gdUS3;ySu{bz@6Hz;6etWl>aDbkh;0g9Y58tnay6*IxeczZKVyCoFMPlT7fvr@ z23JF+(F=h-_3-q(ZUjQ>m^n)lm+MZrP}Z{9>_YqQ*tvlrd8MW4H(w`jzqMSO840y7 z+z_bdJRT^}0=72T%C-a9H*d-yjoU6>oX~z)T0wc$^S5^k4~$%L3gKD!M!;K!z%>{A z1j1TI%{bJ^C`wERL|4e$HuM6~gvusvh$o)5rZi@b+?u3>NWK*eWDBCYts#t8u zfu5VrhFvdAoG@)!#!O&r1t_7n5?5x2X<^i=`e=(RXJhhBHp@e_<8hjutWbryM7loY zXjG-026W~IA0xqtAmc_$JZ%wl_gGDyylehg^I`uRwpxuJ3uo4X16F@}IYqfopMi9y z;Gwhr-X^>7g0|MA{Q*D|dg^aC;{X}AN7MdS2hP{8>_EH|TlZp}qca>2BmhSxl9@AS z?ocyT0W2!RxL&ADIDNVsCLPJ^E|2PCE3J~3EnVuy+LB061Ig-~L(?7r{`kUQ{SO^J ze2>O;fp&T5pln-k>P4wDFly6Y%br^FJ2W<1>b^K_^L78)jzki8K!KEK=@TtZxL=llu=&TB5gvfF_*A6mPIp>H`KyB%TCeNJDi~ z(|+(Y$?0G0b+xQc{P&IKBNY7w}s|yH8E$PCWs`Gb8=PyaAma7k+y!9ykZX!(W}u`4!@S?qXp;tjFTQH!cVbTgnv~ zVuw&y7ai%a?ZQyCCgO*5io{Dq&p*p&*N55peK=seCLyjk@#F8SI+>&X!)*9}FF9Dg zqCV`@$`4|H79ka&5Gi>~J{pUm1OOsP*?5{ioW~___;YOdP4eZv6Ny z=g%970l%#}WMZg38|sLhky7%nk6X+`-)qYCVL5K)!{gU=eM}Cx{W@sKkY->zS7!%Ut|V{ZVVF+~sDpa43ns0@^O0Ui0mX^w8eX=~GX) z@v_N*O6F-mrRL6css_qCOm>!!gG%YeYLSI6!ylDwOjK?9_j)(UXgte-fq}wjh*uzv z+J^Hkvk^bvx4d6K!)`Yb$SBc;y$tb9Qq-W!=>5n!|3JBmO&0qQ^p}-a4Zso)&)w5w za_V4;UYJ`NG6o??N$7bH5*P{uP2K@mx+r(MF{?#Os^lH8em=y*udj6EA+e-u?!QGlQ z{;YO2cO}e^TycjoK{+{{2FwKQU6x%VzFE9tq1%%G`(ufs_?0qyr~G=l;uR6NsDHmw zd4#^09{gFxl8S*V6(4Xb`;f-34^r;Z=lA9D>w!}L&*%UDKJ@v|cIMj@(%Narf?gz& N@z#^A(k$Ep{}1oC=iUGS literal 0 HcmV?d00001 diff --git a/docs/plots/strided_sum/Strided_sum_fp32.png b/docs/plots/strided_sum/Strided_sum_fp32.png new file mode 100644 index 0000000000000000000000000000000000000000..5e1c1f22ad946c4f486fb07395917fc39299a599 GIT binary patch literal 51839 zcmc$`WmuMN*DZP@ilT&cNGKqnNJ+N=f~0f`5~6e?4GKyMA|M@-A}!rWH_{E#BHayp zUihx{y=xu&&;GSN4j&)o=8E&2=bU4XF($sxWW}+sl3zukP}ol;L={k|3*zwG>Iw$@ zi^L0J_zQ~9R!q%S(ZbO7jjr`el#H&erKyFjsgd4I`Mi%BAESxMH_inznwY9Y2 zXJs|}-ydMHuztnb9w*ub7s0fYP`5#$aCMR2Xx~KAj8LeS<)@+#l^kN0$KN=FzT7?A z*b9|DOzyt*0RQIA%BR=dA9@i;Rqvj=bOSDyLGf;p%-34)r{} z_g#Tx_rBayul**U^k*oK7Cu5qNO+N-4v!DV1fC!`YB*OHT`^5|ZXw=%{`XH$I(qt^ z@5)RVxK!;WRx@`SE7xnU6&UyX-Fp1tBAZ4_Tbq8Z7iNL^c$7Dl)0OCsC$_fRPerOT zGXun_T|>D{26Er5X;)~orl+R|PzgmbWtY73@$vD?YYS&xND303_NI2b$-v<4wDY&S z{EfwG)sZ&wxJuXNtq?3EMQI9$q6 zy&I_QvLW^X`-V!=^q-7nv!xxfMb?ApH zyzgvBu;Z{_C1OAO=01KWji?+t|RN>BFOQobN2LTEr-$RbThpASEcit_W4Nx z`oq1oE@jo{4egN}rSKfG8_#|EnE&#@LI~I|Cn#sAC|{-Z2n`MObAM%MsGy)wdv>}f z0z2E%-i}=EvfCQdNQEPZ`{`Ev;&48Ls+N^lN>kvxEx+UOM$_VWInVQQ`?i`xtg zX(Ri)E2B+1r<)|OLQ>v1l)P&dTMlcLd*1`vT3V)dM=jE%!dbQ#d%c^e-6_&~)n{Bv zv{>StSNT5RP)arhQZDvqkc8{F@wI(n&LHJ5A1jgMFcu$kKRdyq5-geC&3;}EVK^J6 zsy)@gGda7Q-<4{!(5dBqva>v1PEyAf!EbjL9Rn)?f^E!x)Qm#dWxr(O;|-p`qt$X; z4|e@Vr9*d89&-v>@j!!>kwPu|;g`05`+R?TUfzOdi|=!`7%8~(AmubAPRLp1Zb0Bz zctnJRz5VV}N>x!#8#;W2C~mXXzx^4*C04X%BL#f(B@FQ{pM41!deRjcSW0K{QBgb= z+D7fLFEakb%q6hiY*;f9O@~W5XRQ#u_07Swivw9VCf$zA3ezTVs05pDhikoSF_>;; z%Aw=qi@YuMWgTKBEHRNLyK+|};h3|VYf!6hqSBcR9vhpS^LHJt$n2;^b(WPdvTR7G zHU?5krOC!J-o2{_?{vJ`#=5;UnA6yVzP~>cWL2sa0PkPtjk^V3;LxqT=(M-m;YY+& zvX~k@ROKpwM2#;)+>4&%r%i$bS;Z5Q8XDyJh8@${<*SM6`G#Sq)SFvdr3drTWWp}I z3$bsgw&pu#ro5@?X=vUdE6V?+cQN_fw=3RtKSv5p9C!LvjbIU(H41oFkM6Lrq?VLK zBqt{;wt7oEPo9>b^FnrO%(hoX%VFw0viPgx<-KWg)Novt)7jtjeUleFUaw0|O}%g2 z_suw{zsPJ<^5ky6dbOJnwd*z>gx@9iqtQuJ$!06l!B+fPJDk+JA6*X@(>5FgT_7Zh zM0{AQkF8I)(?mALY`Uz5Uxwom5jAA0=MNQ`-3VcjGu&UFLTOpI+-?fka;Q4|%Y=bL z(L4!JPcE{jhQ#l?OV{qodOB8WBdgom+8V*}O18nMsiHzq_;g>N%5fIYlv}f~>F;0; z_0jH%{y?TW7Ac!YAfL@V1*%fH!ou^>v~$|77D4i1jzWn9CP!<`D)(=p3BY~(KQ zuaOM=Q44==KaAOQPrHINq17neB)c@V$E=P`E%yZ)d`lx677iu9(gp()lYedPBe#P& zk^~oZXSfjG-k1$j95p57bM`uA0{ik6xKAD_^V9|J#?2MJ*g7?0Ohu9YbUH(J)Z-ku(IV!)ob*C4X$DPIVk>)&4s&4nxbm z@i$vPFH`^euAC9VFALEp%WJr|Iv&w6e>ZKmr?;1BT-f%{^-Gs7wMTLJLr^L^XVQLb zDSxw;D&|idA)P82l$Mebt28EFzLZ^FaLs7Xk2h4y zT$7*i51bqv96*f4^_XzI{^b!75h1x(XgZ9~*rgcG^8D^c1nZk+=JyLySGZkT!v6fWyJY@X{R zBu!GRm7-c&S`YLHF_odf*FJ#s>#<2itYZwvQ9CD`Qovp*iurJ+HEnj*Ku=$P!>N*C ztS!x`H`R2k1h0dwc-c;I!SEOU6w)@0RlAF9Ep+9SqtCDHAVr3a1qG%wy!r?}rE6rFvbJ<-Uu33A}9=fJ#x7ERuZ!syX4TsfF z(UwMmBMPaFK|;HvDJdy2Hl2LQX=yXorEp2=2p$V6I5Kn7{;b)gqgAfXUs!cIp_&=} zN?b_l z13^8<$1cbbMK0AE#`JbG%*dkZfF4c+b@lP)wVkEG1xWdHckh00JV2@`?^Zj9(PWj7 zudna%@m811?(i!uyMARN{l);Yx&ShclBo|jNI49#N=r*CwmJonJ4{bclUJv*B9RWe zo#(t~u2E##GFEDHursK0a5QUy`&#)uvecN0j zU?DB51|p;shjDvXoShyy{`u-1A?SGj<;#~^w%uY}EP*I0_hYNipFbnT3WEAprPIzd zBpAKlHE+Wj`!iI7hVx%>*!@)!hRiwPcC_NRR5cDKg=i)*DJha&AH)6ZXfl$|hF)G? zKF0m@0LhUzEGqJzApNjn?X2itAJci0Lo7Y4&1C=~QyV6N;``6 z>-FCcAt@Re86k%~UFqVZg81C)ec;_{D+02DXND^d8upA=%) z!(0ZW7}_}&U7}GP^zqdBEQ-Rea(p{NbA833a+6zjvK@wsDNA=SIEci#W_QRb_-D(d zaMZF4KY!M{!^rq^W%qEo#4c=e(YZB!?M_-%ip!z4*(@g?AF=;!PCwdM6#{59`2756 z;g8^?Z@l>b|NS-iYMkfj9fvhtzrma==UoqqtwOt%xZ3ZPJ9l8r@&j)Iw(~CN zB5E+le|_G(iID8^qb|I+I1De|74hC>VKJ=nKr=8j+}!FCQFStfe1Ua?I{<3D_0LNr zxv#qu?{=}?yEpsxGS%;Pj;>^r!R#@+!RO19)ss3?{ysilgce3vr>`_wq2& z0vRpgx`%7-XZ}qy>DFgy@ouf2xWdirQ+1KtX5`PyY;}+KH;&;Dgf+@Uu)T$--B_=~ z-Re`6%d=ZjF)v%BA2TnGpLpxzBk{*FI|jQGzx@ugkEsanuc?bl&xpk}i%h2gy-jxt ztYvNvA(8F45ZmO1N$>|CdXVrTBjkSrD9J?nDmmI6aJCPcL{5MD;L-9PE$2JVN4^?@ zChZ#Wz4Zl6q-Au9bB(q#+b8$jH)R?@Q0EINeWOxXs6!Dc_9A&*d#IZ+0Gt5X+E}kwJH0zdEMQ&ce)$9nzv? zq~+w~)WD83v3vBUrrn~uWpwcVm!Qq$)i5%;a|TcgX>xr0{WCTlpz%W4msj?jh?`S8 zRwN{N?C|LkwoXk*nW#bEfxr?E_ z33mhI+)o{$J0cZwvQ2o*N1s0|s-TeB6=YH@esmmR7x`b`KQQ+8B?lYD?IH5iZpI}S zX+x=$rX^!p&s5%#N@pJE{q910l9HQy-2>8|U$wu#|5FJ@9Thb-H3`6IOmUjeo}r#? zl#H7*Ku6VTGP|o;Nlo%8VF!xkgTnb*YrWaUCp@7^g0Qss07YO3CeuS$zKjI;i$ z*}9N!wP2dMp2mviv+PXdz14P?D&u3!$V&Gu4gw<1hHmJeEK9Y`TN-+o!b*!V8e4+BKPBDA^xp)K!pkx zdZHhgUC|7j4|@xyJl!RllACz2ge~NJ)^vajq@%e%0wVS;DBwgIu`>G=4WqHymXJtJ zBYa$;?d#^n(V3a|=w)Mk`pK;=)I~+>U&Sy@9+@#LE75e4DlM`H4YqAjA3t)-^Q_h$ z^rwF+kLGtjSNel4>02C{PzOP69Ll@9!yB0@98aI-&CU@GwrrJErT!mFo^Qc%n_3v- z!S*HhhLLY}b4``n#MvVawbb?;2Xe2btjb)z^v=$zaT!2Dl2|`Kc$CAKG;e2C)-EM}@_sWW*pniC9Sg79$Tx)$( z8@O!6J|EHQe7Yj6*8Vsr)1c#v=i4i+f6g;LrcGy9{!jU1O@iIul0jr2ehOUU$vb+% z?(aTEkZ{G*AE^$(Z1cm_mj(}SqeOSh%UW+gavO9 zDgkam54V^^zxm@;KfS4UZ(ZNnNkH%1lw7#|t*ktu>$(_19Z2o=!dz(G3mo-2Su7rH zVY0nWX_TUVGK3N(CI7cfXkDkvlW%IXQ!$TzPbtjj458JecxvM5W{o5>&;9!dFS3%u z8*sPh1!TodQls;}wkAnWmS^Oxkms1zS|XVd%H4^O8Coh{$TO}(lcq1xlei(RQRc37 z{-Tf6Qs|edNR*9gcb%{&4ik?Ljb7QAN#P&T*a+_&5P0~pUqs2KIyXIb%GS?BZkKu2xhtU&6>bo zC-Wmmkbw64?iR)KXM3l0;|oPGrgPU%J^HRwU!lwX=+~dW{`WP#fFCpzOs~+zqjCwD zM9fJm@+}EWie@)YKQF`bjQo3d(k9HPT=$XBZ6jIUxf;>|l%aSYR~|>72f=lP39m}h zZ&jG1D;t}gwVNqN3)|8ux(Wf4`dD`VUffHUqTg%$yIi5nkWS`E!)b*AzMfgYa&ONX z`v>&%D7;l$c>0Zgc_LiJQLwuCr+8b@1vaM_7~R1dU+wSydj^kInIRGtqgo8LTw{ip zvbfAQ8hVTMX+^|Ig_RWiV2e>rZ^U_YantSF->Wayi;uqUzlWBe9dF-y{{0vEy544$ zxCz-4?Be06a4hT^uXg>a{VbaS(Q%QzZe5{h+b(d>tCFXfw76;M?00P_blRe#?}i{c z<@>pH1!Ab;oEF3rO^H{@$9+Gvz_ynCi@;=9K}bsI-zVW9te(f$Z)@kLMdF8iwd+9E zh>Frl@KrhE*2#PEr61BSdwNSnR$E_2h2xluc@-XN)1{N0uffA_3Iz5lJ2FFSZiSb^ zNB;$w;^gG54+yhKL-R1T0`q8JK-Xd@xas@O5qsqC&0A({MvGNl|6=EUHnz@;&?nb+ zdefZ2-&w4!Iq~)?;i<{oK3i{mbY%bvL)vF=^|Q?0hy1zP&!bBcR_gQd2R?z(ToOk}oY|B5*CJvn#TS*W3ZQdA+FfACWDxEWfDw|-p`|6#U#&iKgF=yUtp3B`X^K8y ztZbaaG$s(Z;y@Ubtd=b`cXsaXX+qlJyTj0nY(_+}@T7!QWZN0qz^#XN8Y_VG5W351 zrZG4oqB&J2I^zDztI^zM)BUPCS)}1nQLTz{LduJ!fO^8rMRteuk1`6krIB;C3XC{Z!7?i8tTeJVf~-cTrms!vudpl`%L2(0_Bf8CsLT%ZKF znX2K83tPu!cVA`*PSp?lrCaw536g&Fc|DJQ_qRa94xfMk&9YS1?W)ycPl`jO_Bnyn z1k{yISg!?q=i7O;ol^^$A(;%bT6^b;iu_!c#nWGL>73F%JH5t=&pIe5r#2ic%g0S8 zDk9Sr)9yR0zlh>8QQ1H^U>9Oke{|~KHM23$`hVQNZ__B0Nd9o893^UBeci&~!i4d(NVSEOmN~9E zW%TbIQU~^9V*PJ(eX6hv{2GRa^w4?$Ei#iF9}*YW32)re)s>@!^%8bIE>8P~E+or7 zAN~F#(wFb6xpZ;)=6M0Vn0Oa&=^Nm0Z)u90Y!k4AXDB@0?FP0D=%O^cr2%+cv`=AS z?}tqdf4;j!Gy`n5dYSDVkO|hM#vo3crulNS5-|mSOe*)UdOd%(y!}RPGddxPk6gq} zkdoi_CI?5zw{MICIXYcTITerKAs>Ih(gk)B;dx^KZWg@meg`BDkP~JD+0WZRD!Uul z`%Rt}A!I`2gnfriQ{muM>`(jGH#H$!cxj!|&qw04hr|2BaU(_{(cOnr7yH-I~lI%+=_C0RJ4Ws>KjJ;UaUe z4V(cIVOLXJ$X|*PRv(cB{CGs;tmBu}yM9+6XFhy$(1^*hv~tW%_p(!g+3!7G6WX+v zmb_c}8qA!DgaFLY5v*`4v5v1lFtYOXHJ0M=q7H(^IQ7XcysPIGVdGlY&!p(wCgoAD zN?mcP$*jqsmW$!Bp=n zl&Y-kQBCH{9ogt&^-ZrA-EJDqLlsOW=BtXfsij}0rO7melXF5CN%0TNYE1K$U=QXr z!F{4N+iTa}Ol@H7abL9aJ+IDv@BHXZ>uRF9Ge+P~HV5Y^`T;{~nuTYWs&T#asNh!W z|6IO=hehB!X8EC%YSu77mF353?ergPb56+&!wnN^ra{>tS;;i`T@1C7m9%qKSzD8kRl8R|`nvB0H4*0=gRx(g%% z2m8@kr~s-6S=(d{4^Q8JA7RI0ec2W`>4m+1I?Wu)FIe!JP_8=g;|WDdjE|%;oKb2k z3#|`tCg*3kbU8j{6^8wb|gf7^XCC6Uc+>LBYWf}Fm z=Vw-~nA$N}V&A9pTPZ4~W0oI5UxA=u04s*ZG~*mgM;xMjm(JHvRvus~&f^Tq{QrqO zT%(Uq)B4`U{ky=k6wFbDLd?-6E{7xk&80N#<(|-a?3oktq!{w^(B!iHAv08tOPqgZ zd51k%g=IN@et~gL$%sG#Vh*~&!+F3cGs|AFBR2z|^6VK7zSUc1=N5UwcuxC)$WI!m zM={xXp=Q90aK2=ACQ#1DpY9e*5A|2ypg#rBkV#oxdrA z<+killS%EVUO&q&<=vlD1V0a4H4oVzZQTC%_1|hcA^|<`AW;4**dE#@6MK2zJ^w`0 zRLl5=GlBi&-#4`w`;(vlcf84dJRwWmk2qg)eKsugrXE@W-M#ve%oFw|d+8kUN-xH2 ziXIHW$;3UJ3k-mVu)?BqSw&WdRwH*7d-yN;t8d|al|A!>9N!O^PxuGH?xiNvT5hbQ z>B+kRwVtOW(3E_i+CTN&q+j-WGQ^LQ;TF(Vm&Box_j<>?LOW@M9pCAEC63=Ryt$mM zQ-unwCfGeD5*Vt=`Bd6gJhSTnkJv|Z{vP7&STo;ZbA!gcVxx5$D=do9Ef;^Q4AI;> z#Zoj5K7Wg(5*}AihF7#|W=&%NxOY{ft;#A5Qt5ky@~fz6Lq~{}8oH+&{7e6S&p=2! zs6D@-*c*Z}OGr$tLcRw&Kf*r#%F`D|6cXT?zf-h<#!^op_bW$7q|A1)7C1<8d;6m5 z38byKS*LETM6k=)ZgrJ&UV`uGG|Y%DkRnn~1;VM}4thSUl;;$M3e~z|9Pqa#Oji zKLofZUiSeT0X>T#p&?u)3Un<)FiQvm1dnlEEk(`F&LVhkqqzFyBZ$r@WDPK|$?FBy zD(Yl-#%CvaK#qRc6;i$hy_o)*SEbRQuKg+#4S!GTd-NV8ke&kIRAyjj$3um;z3*K# zXz(K<68nWm`y}86XnM;dh1Wa)7EkXuF@2PhmQL2Gb_3F~-XhQ6N8&ZoF+k%{K&2LK z$2&jNKL1qE)=Vz@#ssl*Ku3_gUp2tDS-`Tq6o~pR9m^jE(4{w1ox^^4=xy5^jZmd^ zN?dR-+OuxO(AZcRNy)1~*DE)suR&5fMS7?BnKdu$Uy1U9LL=Pkj}7pzAPs}hLr;u6 zP^M6bgQNQF!~zA-1ub`UC9ILU)OB{_-Mb6ZGZ}H{MWZW3`m)~tuWY7W*#RfTZNe-E z5|2MrNx8RGsy9vDwK1u39?pagDPH6B>f7mh4eT*%Djlm6f3pTFOQN{j7U5@)=uWJ`B>L} z-gDmaS6jW9Tn2sp=eGc=_)-G!aJ|k@QS^kpWkQzbEB~tNxwf3QvZbHJ)>F(9tztk5hug) zvMGX@*ykmvyMwipx3_J2OvS?+q)(f)sf+>F!%PAt!!+c}<7 z4N(K=iZdMFU8_cCUe3S!51$Yx^u6=Ax*rzdSGeUrgz69am@Wv$pq|IO?y9G-gK&78 zmev#g`M;s6{(nJLlVf0NV#cDwrX zfV_DigK9g@5h3K}eS<5g;&J;Mi0KJe_&T{c@71W&emiJFZ1zTts>IMu=s zUvJ6{67yrk`7k{*Q@Z>^-{KN04UL`W)V-e@D*~vYGHuKD*xYM=u`UIsOeloIp(Ilh z;ZFH-vH#2()cpv7H!MgrebD_?V5A7%ls0UhF^>+Fs&LUDDj}rj1q;S}<=#XRsDpyg z=7L9{q9+fCQVCPjdk7hgm_{^<&86doD@T+~X=oIRc~x9JmFxz8eV(dD|=8U$A)L z;7et-X2s(Yv$tX|d}w&Xy)2m{EBf{!+uz?RH{!eh7jt^?Mje~wJ=df}%p-^Qcs1)2 z%%nppnC$ZXtZU)xgygqlfFH$;F$KC*iL_q`fe^{EY%wXkRk4>Ssxe2WHrX6^<&Mi% zK8e(Y+&p*S{k_<KJ??I=4$`zJ!{G+Mub5CXqHvi}Uw3>obGY;3pkKO*u#X92 zFA5+EVsP%v#OBIVF5Lwpe#SdC2kiGT&oLQYCTXVvMcX9WIVK%B-Z`F2u~3hWUCLRf zyLb+Wyuw1vYPIukhjVFK1CHsF3fR^6!e2h8xpb4P33kJv@}IL1mISDUZHW|l>wrnU z(BJ>o=21WWvTPH@(cE5nGoR8sA^La00Ag4rFfozsq`OzbO<4#@x-Ez=J7K8ByR8V# za-#-IFR@RTCJdpPgFh|Ug+ANt0A%}ITrV)^O?%$i5r4FTcQi5fOv~S)A)Q;2SI$w_ zE!MTAH|&x)`agK;2ORg=o5()A9W;4yfrYwy-od-AF2J$+he%F+ki6u#9)u19p%N}4l{zzAih+{yv?bTE z7@DP--&38cLQd<&LjHQ7V(1o?hclqfZK%{Es@x7`R4VX z`!0qY>{HU@SC0hruB6E1Uvw%m`yFI?yZ(Gx6JC;Htje@89?f@1=8F}{*0Du0Gq{{K z#(JwCES*k%SuOsVtWL}KZEza*zJ?78Ip)YLJ(_~Jo0J+v-5CV7$$=d1rP zrPVZe*e|O|Y=}SBSs`Cc+@+kwoASudC$PY}+hbcJK&|%Ud5Dvz3253DP(@-lGh1FK zdD%dvzh(Cgzw!J@$S1MpL#cVq7T>d(9Qk ze1Y+<9Ug&Ezg_Qa==uA2Fvc?<&KFN4(&mei=q+&`hraShX`dQUI=;urrtX%N8M2l< z2b`120-BubPwus(ackPvM=_UX4uAG_+3sZ~Wiw^n|Iib8`oED_wDou^TLPdS&8Locem|fG>3(DN! zDGe%{wC9P`{#?Pi>Jq;>XxfdZ;vf06cu&6}O2~jSZ?M%wV*EVwxzxtzi=oPK2!r*4 zXeNy74DJUH3*r@zE^Q44moWfq34Vr+ZgJ|3Hr8rj*&cw060vJ0i3c?r+>NKFg#Tw! z&Cc8exp3igaIm~pd2a3}s3RK~ z4T}C?+-vDDp4(A5EeldzIF&s5(lX}ybV0)rJH7nfcit#)lOXOMFugL|x#J5ioDY|& zT|ULcP&5Wn>#%1-O&WVM=^7R{nyEg=S_5{HTyXzVx$W!H(9%YNsfB0UmH>3dpT^kV zi%%_@`znhjXWz#{ zA567ij5`_JtZg>^Ibg%dP9m&k^1gdF<`GqFo}W(f0v?&})PVd7nX?L0mUM^)o)U+6 z{{bwg(NOL!D=RCay)3QL&n(Z&es4JZ0p%GMlSjyTH`75-Kp+|{1%ARQv|#pY0VZ8@ znTBSnaWY@*ncLl!_qce-)S)JC<9Agtl%!+6(>Z- zWlV^O%3#6#U%gDlM&k|Jl@TJ3e^^=Dm+#iCw}mEyarRok8`F>7BeqGYp4(@amhwDW0$yaKdYFL zCoyhmjA%zs0tP@9MnEhwo)kV8@smGv@(2*G&iw2ccasWr? z3-J6Q-dw~a$Z0kb3Wh8W*F#$z3f`J?rJxA99fOxIH)_yvEhk;~@W{woV8}oQVUWFX zuxkVe@G#w)qM5(Tc>7Zv@41@{R|rrOuzhzYr#&jFH|@FaiWvaL6L8yM)QFrNblnYN z)++e~hZEe0hy|3-WuL9PyBp#=Oq|NmOE#8YnOwwi(|}$ks{S#qkk{x6J(wN9oNoNa z6fal=D3rbVxw&$!vjI(mCGM-5;dl}4g=a!Gi&2%P6R~o($zjo5BA%UDfBO$j{3wE2 z*sy4SJ@mp4YHIuXxXR>F!T->}T}y{i|JgM~*^F3batGQkp(C-(-w8bjf3LicChe&t z!w%6mK0bcY`l{sH#~6Bsrth~jL6Z#EB`qm-ksAIH=@!pyT`J^KWX(|5DSf-v_@^ni z)f``1wc3+ij9H8oHgeNy9jqx0n)6Ve5QbrEMJm=51Si)6Mg6RhBNM{je# zQ*Hckjf32}W4%c9iJI|Ap_nX~e?jRdY93Ccz3La6Ic$ABzoq`R=Y8I^SH>w!Bt93< zy$ZVYzW{7xgp+n;k=WOm-tW#9`>dm*Ije+}OibE|&&QL7{#BQ2VtQf#7Hy822J4a{ zOC$5Qq2~_mC68U^kdw_3nkuA-(qSn@GT%Sp*6izd&lkf<^IqI?dpVbL9FSwMs7^z_@sA$^+T-9wrx1QXDS zQo?)Qi}syjyxMGvSJO3ITCqRH3Z;KY-wYx0N0R9eS9Zi)5cnr?}su3*0`ENLO#kT#a@up?e{jKu%J5`-q%xAAWx)u^pvv*3tpQ^aZv2 zxtm??``2J&44moSX!QjbG}>1;-I}&Qg7n=3whm&-U{Vs_DX!O}g29@s%N{~c!Y>4u zJwzVRqqHcqK5u&`A%Ypm51?L99XA?nCd0;oKg01?KO%WLZq=&e=)AkE3k45M+DX$@ z;1J$|k~3|~PX^8;(55z?euCoJtf}x|ahj~Ui}-fhmo-hogyF>4BY%gfx{1mOv`!Wp+tP$q0ZysLQxivv9G-IcvNdH`)*nHe|B8Utb@)5@kSB%BWEs z@XoQ9t@SFpu;#^Fy>a2xt|OwfwzhJAZkw~PakfZlG+!~gU$O@omXI8^TzC43ibeK+ zqr()%{%nZc>w!!%aB|Ut3GO=*BuS(oATc{*4}MwmsOchhtk9k~GQJj{HHP`%sKCIy zG@kUTA!);nZJWPVPli^S|cA_Lk zl6K<6ve=-){Ifk~{>0q+mTT0fwK=0(DAMloOC-1wIe+-@(=%ibnC^oo8~_Lkpk@us zVl)FQp}A4gH1l)CKHkj>+OSSGEW{k6#!XOR-j0cFBzUMTnMgiu?~IPGsQuNmX>vbq z?A?7D+Iw2=+5%30@86`UF(MRqJ=&f7$TM-1g(Z-PN#zA82CSof!%Q@fMXHs)86iE5 zUG$VaxHhoIZd7B`j3hEyJvP7fL*7+#(+{0kRWIfv@yF>rHR*ur655WxO_%Nw2fwgs zZRn0aJAPGUKHdt><%clccedw#CJsJ2lx$iEz-B9;Pmk_B!wY^k!#pInxS+T(Z+o(>Xy8(3sj?l}rvhtHsQ|r9cQc@xWI&en@S>yV3 zC#%!-Tb(FW zqF2d?Oioq<#U8yVnJ4BPpvFa*oZushF%?hh{-G#tC4rRWmVV%#xNN-h*(+v^z+mn&D`rXA`us7Vq(%i*qj4q+pEE!1jab1V3@55 z9apdhjKD5C_t8?O8B5L$3{()J)BA?>**j9GT+TJ=?SyD{u|}#r$J6G;50sA5CzBfH z6vKhCWIM)lT<_isCsnBq`qRdD<~$J-V+R)rkn2h7&r4B@z2V#ScAEx9!%q5z7%E-gtWnU=T7<` zKI8E+t}M;s-8m`nJjFOKbcMPdJUY04(##8CNMw>1W_Hk+;hLY!zam;S23sTf4gg>!FVkW|*i+`a0r-qQUdf4Jr*w$?prq z%o<8_YT%<1T(3co0kb)RS<6?;`~fi!GE(8;4K}8Py6lranL?A^*zqOfVow03)z9t- zQ7#=SFe1y#Ww7CId?oLon46zhDbpSacIScTXdMFZub`o!k@Ui_Jp!>ulmbqF z3QrzwQ8fiaWpv!qyX9vBRs?~H8&%G?p=e?Z}qYRIFPj;KM* zH!$hr8xUZyIopDaDZ<$7OG6`6IOZxi5ac%N@3PDr)Rq z%vkIadHOcX158Vo45}#2moEh&&&;ZOUUuh*?1Ho)Z4$i}n7x zE)KP@G~$eJXlNkXqdZUqQ~S(|@8!!9VAh5i(!}IsuULUMKg>qCKj2csEGuLNH86t9 zrV)6+bgGh8&#VuqBQWaO^!c`wF-5pfALZ)v>Sw^H&%Kv7xYu2`cI(H{H;+{vPWJ{J zs`vvCH^;QBs;|-pk0{P;Ng>0V`=gVmDvMPh-sQLQ*0H(V?>{=(n?!U6Fth9fVeocS zJt66(fdOzjL!~whV2LvT3(%J@Dswx%=LCW9N`xrgG3f`Ag`oIptsP-fjCm(!AFBIQ z`IT-pde*!`((!vtOzzBU;rDFHHV`NUX}&Q1NkeyV6}7v1p)nCiE_0CV(Z5Bc|3D-) z%Kt5?&16up{x?nF<|+6i%;}qHI@K~v%4A&Q+u`@SqrQ)yZI*$STX;i4`pl}ZNczmc zx(qa#uBze>fU|NVWVHASuwT8gNIgw7?t9<$t-60F2E)uMG$f=BCM_T5Xjhhi0smyT zj6sh(T0(O31jEF>TqSD%Gc;CHQHo7fX3NfJIwf1yZ0a-;WcIYX*^iGmN19p}_m=iH zav~=cwf@qu3x7V!S$%r6O;}w|*jl^8K`vp(mBuBURRk0A?S?iDSGiy+7sNc}ms+5( z%9U=E!W2Zbd6{p0y(lu&*L^YW(lh_G3?LUE*S7 z{N^aqpQm=uaC!GN$Z;DEwm<88k?|kig}yvpvfKV?o}<2eRaAL(jWb4{WVPegSXiFN z&de2*v^Rw07E2*3gj%Y^Wmx;k0$nDsTtA}|D+(X*HbQHVlD{!U$I z?Ew$Lj{_i@fhjuelrNrY3~wT%Y2YEOfe~Y4!hq2gL-_Fp5UFg=m&hh4eH zADc76Unl;%^Thc^gHwK^<0E|wQ8uIaQ+20L@c10LqWCoO^e!#L*vO_-1_nwzefkw% zo~T$nkfH_H5%r`O$k+uW;d(e_a`N(cFm|lpp#Lhy3Ii-;a0){pZHGhZ>jk?gk`}>+ z8Np{`*f0e1BA>&;8jy_vM}WA20iB%42`4CwWih@Il=Bmnfj{1N>4y@%WB%~xA&a1nLye!Y2%!CksxDxzNwqRAf`x&fOp#dcoE)yOI^8~rjg)GE^oJ%T=< zd9JI5!#`f4kfA%6U8;S5%?SDgn9)xzC>p-PEer3Cqv4{P+Go0`z6c#m5(arMEH zp_N*Js16-tUx~v0j9n+_oCi=Bz6WqbN-H6TQ8D9LWZ`k+S5164 zGyJ^D!4rgigWCXRdlpBo2IuY&SNHyLCyoZ9`E9i}^*D8r+p+klMHTHK_hp*fu$ zIcC01E$A0qjjL#SpPfX|V@`z0C!^zw7Lk97?pnEL=ve&3P@x2^65gs4aUffP7^D*la zBQ#M_0L(uM1v#q+%gh6T0V~3S!min&0m`L!!w}X_bp| zT3RuFGeMBNr&TR2o?r4PRHjbE(7N&K<)ORMUrUayt&gYz<1aqy&Sbt9g;snR zX>PTI6np>ZO8YHT0Zl8-k(O*d*{jPgTqD$#)x6Oak1k{5CzU4Xv_0^qrC~rA0xyce zFJYm~ZkNVzr1PGN;+CRI)v3J2$?#(?-#XazevS$d;33fTx?O|k+uVW(Qn%{X%d9dZ zJ}xbE0gg8k?;9@G4aBLsy zQ%cPRydkH+MgyGA;fOAk)fanj^`;YPVNhqi!`Qbu5BQJZgoVywiQxrBcFC<Z$C)Dw0;~3q^9I=G49R=T@yi5EPcT;kx?39Hv-|j+dE_ZI5ut$NpZ+kcKVuPFN2z$a*l}%N zAh3H?vwbcSsqVfBJzkTqaCByCpu=}PJsIMUA=n1!DWGGX2ug2MN9o1xxiQFIh>-m7 z=em<+X3R&E4j0tDj~wtcO>`LCxgiDI0^t_!!`>|y2eB;t#rlIKnvrKhTVQTUGo9LU z*v-CtGqJZ+G>#7=gpadIj4y^t`^*#_^|&nU^8oMu?sNR$5EAl9M^QRcM{92R!v>{* z&vkQc+m|OEh&pvW@K&a!W*Kk&d3;}CIarJHCiAZ;pJt=;JLr;PWb0JbOClj1Am z!~aU~OEUdioUA8B3xu1)Zv;>!#;@t+xm`7hFzpWS65~~KXyLS0#RWQV6{FO8o$R{LQCkXP z{EY7Jy+;903XDHL@M5{kgwKr-wv#1Au5!bP;XjrA?u^LxeGZHOUu&M`_tj@ zpJ35=@%o|6aOZEn{Ncv&61yR)cG_9%0q13vJZ)Tk&t)X{v|F{#c6EHz8Y8*qHBrEhS#A zfl>J4fJ7pOww&mboCn@^8h(<`!A<%E^<>xV;iF7uI37MZ2zeI|6eA1(Te@t~I|xM( zkmK2P#;Sde#^5jL<#z5iyeg^1nNHH?Gr@d;GSH{*0>tmM;)l_P!-F|f6WiFRfiL5} z{7a3Wv{KK+VRmX`C#n*qWn)gS;?GYYT8p^;Hpnbat6+D$?ccSC&PM9{{^1lf2w$Y` znA=?+u?WAymtmr)!6Sub`hZ5k1X)J8lH#0H+it1f$I!LGVKwVv`K5c%-m0^g5AYJx zin|f3{R-@lfcj0dk8XJ$P*!CxZM^0QCc8B(_z54a&dkk;UOHT5O z)kap;ip_>Pwq3oWxql&Uyc%`A+wDqlC2O4lX0-(~P&V4HII(O=X{Ikv?NLgL9@plJ zL_<#2Bb^`jJ8ZqXmf zzJzGD;oAJy4v-_ka12v7Qm)TMWRg42Kim*f9sl{DYr10`jh>(=E-BBB>TL^Qob!oG z^%P3PRL}zmZNNi%+}@}57ip}mk(I#=Gd+!~#iiHhm1GlZ2xslJaC(wZ_nj*RhsVNA zyDF`_v@)tU^P!dpE&e~!w@JC|FYTsE)#d+(v^NjOvTfVN?4l*m{rQpuPpQz9}Z zg^;nLlvz|l$}B@tWGa**LYe0&^U#0{k$Ily`L|!s^M31F+uGLptv}ZD&%15U>$&gy zy3X@F&g0mRecun4EGNhR4USINK z<`U=?E-~?so;^1nY);`NjcGq^KK~Lw99`&Ec=CMCo!&_>#h?yi6BmzwXF%+a5D14- ztoy2tBZag-b>KjO#gIOvDX=equz(w?-r0%HD8h;8&Dkf~&a7-*g-a#n^lsaE%GiS( zoBDd`Je&=;?xABYnj1(f89BWf5`?YtV1VK~jHbHFY{%OVd~WtJyu4BS@xL0~<7Tk7 zdDn*0)Bv78ge{NE`SX0}FcE6>{+iGyjSQD9bdC%t@T+j$prJzwJse5A;3m6bCjJA_=WW`Ix|LlF^3ByPDVk%Y(VwFHc>A2U4!I zhQ@vpOci{gCx_(8lJKOJ3uA z6hUQ7^CA&*eeB-V?I5|-PfFCLESi$7I24P=twSfQcdh?SD2~NjB_(n zVOF71H`&-x*uWnaL_>F0FVb0~7yo*T?9y!B|EL@f>4VUV;{l<#G5z^>`7rB2E<*+F7XuK4Fli$^o!W(f{1Qyee8>A`k8n0_{K>XM?RIdFq4!oUi*RoK+S#+LIe{+_OI-)v#mB^Jl~+f*`MwQ-M1`KLe5YBCb; z6QBp~t;r~&tl~!*>OR`wc}d2xZt4${?~lzj8Z&tA$1u(SijDd*~EpV**G0?lA1V_iCP){P)b5-p0(VbZr8mp+Vda2l$~$a+(l5+ z3JY7aS)16$HZxyJJaOD^YQ9*R6yBM!4RM+X&zj(mlYka$V8b6 zLU0IoD*lrwfE-FB3R^{pKlIEvMpq6H*>lv6yx;Vp!ItSK5YR5ozj_qnVpf0G=d=ui zwCoYfuSwTpcN4d#`SzFR?|N2@vJP6VM(Y^~P+e;a0;%SEe9O=^30S+35g+>0tc?m8 z0$Gq-#V0eK(BV1?1pv6{U~dzpKrhv9ASXti`DSKSmB}-b&VWtI5g|%TnKTRNr|*rP zAZ~aXSKN-jHu+uIC^;talrvu<-JEW#Xt?&k97C??f+oS_=_}~nigcZv7q786eBkdr zT{Fq;2je@1m6|F@a+g>3Cw3^$M{d4DeX~_ibtiHW8Y6t>Q(nl-M-ihZjg%+6((cFY zmE1Cf3pL$ks zB2S=s4~i(631|QEEtznudv8iU+8w=Ne$y$R+NCQ`r{Pon%Wp!VvTGt;|6Wld0XMWb_{8V_S=Bpjlf$DL}2W5+WyY9UdGTcHv=3(%$nJ;p?*(s2v)3_pd1 zGb|0t*gl|YLl|=$W!JHFIUN7Urr=KaQ_D7_o zkFpE~-2T(iZX~gElhNG&tWydv*lCh(wr4Ov=7FQKi#DM{2fVhqVSZaxr@RSgUAF7$mv|?)c`C&}zI5;V5W1L@4`3tDVSB zvpH8bOIRb1N(zMuuUs3kD}aHu+usn9*f0k#=MD=htJvMdk=e}DnY+z=d%t_4r|Fr4 z4PA@McP>BGh(p;&I2H!jSm-}{*tTGhx~Fl2LFxVEda7Nvm}7>Mq`To((S6h(V~SUD zKFNC$<>$-Vt%Xgu=wJ5!B4UYzcKS0kkE>_1Mz$a&{iTK%=o-?qvSGNd&&XS`sOj!y z;y9gIpyNe5fs~#8k?yZ4*odfqX=25)&RDJGlK$~1Vq+tIK5)L9H9G#yJcE%}qmmN; zxeh+YU|Vkm5b|>3)^&%J_j84UDeL|~G2l{C}07$J{?XYvwf+ZUv;!qWz+lOSN?|qm#Fwm8*2+wd)iwhF} zMoM_JWcCuTw`0|jh^ZRNdm&o5@TP=1Q1sM?c@YLN2Ocm*HvL7f#RM$RE8qT6T0zXL z6vDH~1$Lufta{GBSBa-Q+`Q z3%-2OX&YO%mX8BpQl?mM(vC($>(+_nAY<)s_%PwgnHzr_Prw$DlZ02CSYN2U)0>(h!|FP*-K_I} znlB&Q(O7>g8O@K+g4VY)GS(1&Ciae?R~%h&lA3T`u(#p?{r$ibX*}Z4*A08I=!}h?${%hbC1!% zLL2I>TJajYc;4!8yYnbx6&3+1t6#rD~MHKj1ouKED3YI{4o|U>XZ=3lfC<2hpHL z6`<+c^4~;Hr)O7t9SDb*&ipMdi7VSe#=0I=QhYX?Twa|6`6Ghw(qK#aSptyw$o4@c zA^7FXRM>xz?oD(k1D%#VWD85Q=i%Ws@ID7NE{%ooL&FzH5pX>7UEr<3Oxf?=& z{k$req<$45IRD^X5{(n_P%sUkf7YUFZ}LpK&xYYRQ3w9R@4TJ%+LI}4W?;!-=AN;) zHwUo7$GgivGaTLCyspf~=1eH10Oa+k9W(; z^u{7xnL1p;b4+KR%pqLs_1BWtYzrbe<$*yUSeB|Xb=DRg6Y;I=*n@5aaes0xnsY$3Kn?Wdok_-SWV~z~1QC+e zVfUS~xoT;b>7TbWbclz64b`IM&_5Ub&WB141^Q^^;YPb1`acScc%i17V_Q&QtW10S z$!ynaO_Ptd@BjcVClh=AYkiz#W7&NdxrcK`)yX z(tQbth#GN!vvU2ZdR%hjNSPd=vmx3U|9t;q$UuWixS7NjR2Nr5{=LFmQ$7cv)v(o| zy+JAZa9Z6t8EFDSW95S>fqns*`*yQxb1fZPN~CmXPlsR$^-A8z-F%e&HHi-Su8hh* ze{G$p?`q$9)6XN}FBEK3n-<3HGKILeEA>5O2rYTY`S7gQmmhbpvB}63!&@Qn=XD2NyhLUfPku7v zTYEYbTYRc@#b&D%^p*$d6sp>ZT~IoJg$ZW|vB9W!z`$Dt3;dmUfLgt;p{_<8Z40zC zhK(6}sL3Ul>)5Rs0MJ_|t#5Vthe1@f70Z$a29csu>#jmnM^}Kg8eZsZkx>Do7~f2p zYmTnBfwClj!=c9Gacw1LGeRVuWW~MU_mT-7qz=@^3FzIICB$9+WkB;E&XC=2p`*^J zcNlT>yo?;Xw7jTD*)gpZ3@o?Y$G>z8x8n=EdGp7PFZ9PlO*_Z=xa&Rhk0QS4uQNy0kZF zNr7vhi>{MzaL4#14)YGZlz1W9Zs@#xQAP%3((S^@>}u|lyR_P7%Ugx{3#l7f;2c%1 zy#0iExy)5K!ENJVqo&@#x?S2*cY4=k|5)+919I0V6gMLVw`3J~I_IW%n+$yFTA^Io zLhJ>7w~ZwdHS5%V7R0CRM10z3bJ@{gOZ>J|86N%xEpQ$a&0CB1T|t-M5r)qtU1SV3f7)=`qJnR?WT`djDLV$oz_XY1BSEKqSp zqU>;Qb?ePtfmfTKUQ?fjXV`R6v(6^GUQpog4t^KR&EVKWEwUeHiNiox(F)xOQhMFt zYZ~Tz88rlqNsEIw9slcOZt3F7G0|OVHYVq0d&z(5l!%S@Jh5O@y#f(<}L#!uT*~ zA1nErVtJWfA^dDVbib+>1o$Y>h)Wh6L20Q{T~zWecfEzI$y|J^6i^5S=5c})CsTzA zUxchVn|q4o_s(=vv^7yjsDw6~B~Pwg%Ply<=u!Qo(dg!74Mq2e7N!JQod_?_RWwTi zZ#($RSUN5c=8NS9eizUbiK~FXtyd?3kVd!&*3x7mp=5|FB@^bO4*W}NT_BhbvKPQG zjU~b<^$LR!ED68|JC9KUS+FtJ`f94?!uIlFuh#nh%qpH{RgUOCZ=^hIsz8q#QXa)> z64>zu3Lemd*f{p2^2@<8m~U#TVB%ss(tU4d=l`VR?GHv0XUG49A@SgIT6O4foKc3~ z0*#C5(HEKapNb;Zm_urio}$C|)yd;`?wP3H$IN7@ZPhkx-RG5#f&}rm0uFfRD)!5b zwPkRKh-9^vM*=W~TdNp&XC$eKjyz(h0K6Coosg67CFy5G%V5-w#3ma4Tx~AYr1H;rx_&Cu^I6Ayrkwc;m z4UCmpYg>Blo3{=7S>jJ%cEQ_E!^?;eOH{AQkf{$$WamBZt5{sM1idp7%P&x(&b zT0&z256eA&^-2y*i_G5VibQ)IG$7!WTLQquy5)HOJQ+Bw=te+^N9`GKwBr8mv2RO1 z2`Tkq)T~JR-ub|CRl?waT&UVY@gtY^rLnznuVJorDSPxZ zkC@EYQ3czC$TaFm)iKeFrLjW!aml4_m0MB3)(t*BAR+1Y9$eV}m=^yFivxa-L#ala z@g+6Y)7Q5*ty!-JR^^HR0mhe{()|AGdxMXG7cRDS8nK}V;<E*u*KSb2`vJUb8$MQ8_6Y{hDW#6I2#I^{x`gVYzrvA%xazvx&(k&?XA0@X) zuN1%S))^Ns=;aS9^MV-eC#)5dzl&3H3#-baav%JKLoM=_KK7SeO&u z2;do!PD1hoo#3m4&-vNi0q*ILO(KWgtV;OOLSsDTxprlB9|N6I#HK$WDCkM^$UFEg6XaCP%1AP*!ITb8E-vCA z5~f?R@7@uEB)&s8NIHM22wPk?cS$59dJG+X_J*#@(~bT)_l?(4hDflx644%#b_gLJ z?^Xuw*-h6fk@zfJVWPQ`Etf)!!OvAa-mWH0p*RgLYt6Zcz*SHJA8+Z&-n1K1t)e91y1|;Q?G9#ZINsZQQ>$b5x z-ad-OH+?>Rr@HD>2H`gAnHEB7_;cvZhhtPOPoKVM-JRIFt5mt}#<> z?o5Va+11*fDN?~kKyZUi??zpZm9e15e5C7fKUh2Df)eFK$&;Ku5%CvlCINox(04SfGE66ufEUn~$DM%}7K7Y=$v)BY=x;5_T z78D*TCL zcOBap1H0WbyaH{>vgtm>we{?)SH&Gh5`|XRvx{W-3%Jd z*QYIwh=k~q6&~OU;`thIUgq{+_}jm?=sdYRJ5|5HwG`ofnA|MsZf8$NK7-PJwl7IV z*W>?G{($83%YJjCmR2SbI4Hi1O)pgrye;^81(|t?+WG6T^QvbKz{lRO+VW+$);y7` zl%VmD*w}pHq!r&(AMHt z*CYPl0|FnYGr1N&^+FVAjOG-jk=Tzh<+0TrwomZ+w$qMvIH%bkp_dALkQJAJ9=p zZ4XMEs^!|8&|X&7`kK(sQv;IzxcJt~mj?7JK3%n#{$qav5l^+uMq+Ii8)8CUTU!*6 zANP9aMoyR?bB<-p4b4sH5Au`Vt2}atZKnqUVkFv=V6W>V^$D7EX!m7#bgOJi_Y|GK z7+;{`sVr9=^Qbs34nm7-;ksF`UY|npK)hv9i(Ly&Q^mWA}&3g%%Ya5Xp z&<|^-8S4mo54<&d7->{`@nS$d?7mgA?YSk-zdH3NYq5JK_+tk^I70FwLvod`JRiKS`cC!*xeYHSy8b{5O7T9>ak; zGfvj3@)6BwtHr>mx)u|vM2Gdm{z|2{pZ&$VKO`u)+a*#wK~JCW=sOlTPf@j-bHjV; zM$fl7NQsL@)tQFQWIkms8F_nsih_sY(~yag@>uUo^KZEQ1z9Ba&n+<$ZL4n;5I0oJu5-lwZEqPb{gOHiiX?%_NCc&W>z_W zC>R7{f2=_vw%dbh0Fb4h!oo&M=n9NqdmtW9I_B%w=f6$8`sFPq;pC0XdUB4}BF~f6 zCrbt#=1tDY2te1gFqHC#g?mPG^l!|KHp_>(kfxe8^O0}a5j9pX<61rA%-~Q{zU~n0 zZ8ARUT$AG59y6(*F(*EhaymF97?lBXiqP%j{>J8H+V28=ZxX1l~r(t7|g#4hioG! z?{`U;1wN%L87X>Xuw7O^SjbRrp6QuL`moRk^)y63{{BRhf%WMV?)LJE{lkW#gkMXG znWy}f!1SPrwU3D@r|pnC1+PFLC8MBl-w=wlvWB+pV@X&PwpOnwAc;if$)!6*> zNL^nabF|YM8STPAU*G!t;zXWb%F#Rr6bQyGC`4Wy_8cAO z&P?@>k*VI_aCzs%Ky5-X-!ih6xpYvy|5Fjq!Be8H)%E7e6Z4cMw{sk+ry`=G*-&TX zc&nSq%F2GyPJ8ad&7#BGzdFO+y|!S&f1}D7!bTCCvqW2pgtXy1$~pI#L32K-{WSm% zwi@jlaBd5D@j~hL?U|l$jWOk+*jv&NfYj}+yVQ8&xZjZv2lS{qRv$Uo*#8xueR$~l zKFzg63X<6oHDw*+5ux3{%H7PH*}NW*tHlak_&fbY$Jn28%;&hb{8(z+-bK)D^=_rT zHl)X}sfoB{;H&3y?wJr8C(CLss2O6A+&FHm9f#aOr@LoNz`Xq_`2Xkq^Iv3iJYdFv zy_I=pup16UIZH%69A9##t^HkY{l`vd3bDne?XHkDkpH0Pm;CRxrE;D)(F5Ab)O{9d zD4)q0I9^jOA69pK1Y-@JIDzu;b_zw{BhMM1hcl__>G7J|&z;avId2u}!|nXL_I{p@ zfnb=YZ}MbAnz+|9Z{a>;Z}~rg&z~3H&G=IWmr@E$bYkP;ih$5@XJuvZf;c0O!5&5- zbA!dnz$<=MGqv`U7@!eUus$cDpx})_UchTqy3}q}{s+#gGlptg5BZ-E>0 z#Rm6&3x3@i4o9C zBj>4RE?$ygeDP!rOVO3$YjB%w6(SgpVE}dYrt43k9I26~oI{Wi>q|<}j zZ%P6s0fk6dX@5D(SF}gl_$Zv2@b+A+ zdM2c&tgFie0jP0W7>|Mh5;KOCM}nr?BipZ?6=$^N>sWW(yUY3aHYsi6GtHD1ieJde zI}a+$pw?b)_D~hRIrQU78mnhGAFZ6VQk($ei{}q62ig9;`Q&!A^T0R9nQFcBbBf03 zrPti#s;8s=Sbdp^{O^;NH!u1_x<|Iv-f5V+Q_p$bOl6aa-L0Kca1s@>dng%wc&_gt z8{2We;c)dR%H_2eAtGCEK8f#vVW{blhJ;Sm-XL!0J@3cHb_xFEhHt)oIZLh9ccQG=KyXk-p-MM5$CRG{(4?Sf z?Qy^P4h~+6^%r8iod4vj$UBjjFMcJ5Qi@WebsY0K(a2eE z>CX2zNy-OoG~89*dU;k@4aLa>{J7IF^d+R0V!ZW*tmJrCcGuloL9givxbn!>55hY) zM?RO<&1vcpO}_T!Y&jY~w2GsM>5>+e?>^MXT73-b=A%1$qtBV{RaTaxmd@+u69r!@ z0$4s$3^!d46Y=)!Jb87R7TWE<-9F`!mZMaXpD@19yboa~?rs}D2})Kvk(rtfIAdg` zp9?m=Xthtuz4=^GctKExh2q?n;yZ#BJQT>Ry%no{LXL&GxmWhd`j!lJmE6!d!E$0_ zWx;vDw9SQ4?xl0jq_tN848B+gk7Qc^4xGl7Kg!ZcTN#)Yg@x~}PL`3BD2a_jadfdo}fq(|z zwD4BECZOQ=_S}P|D{+8Fm7EshrZ*2{SarlC*|cYzi}7|XzwQdtG^Ux;K8n|9H_}0N z`g!uPd|2>r(-SFb)TQUIdOhhWCTezxoeV(42+$&fm zf%$$N5fV4nCiqF{&hD}uY1#rcmm;@vpAYx`i&|PzGyPnUYJ`D&ckx{_1YFDu?K93u zpILzbokaqPk)8UcZRrt`>eN3jGi?j0q+VzT*3+JF&6ckussI=8bvZhM&mocbc}bZXp$& zyFz{s?tmNe2^Rlk?aY~OzfzgCb%#4IXmw=f;U)6O1{1Z|Rh_(Hcm5a{-uGm~!(hW{4lQx# z@PAI?%_e!p6Ow)NgE4zCd{#uL)A3AgSO!MPsD=tpZG;F6ai*q{X)yIP zyw#1o-_MzpJFQJvWKWe1IQ)J5@czOdsub-x5cEiuh`;Flx0juu? zbUfH}eX`27TWg_Kr*7Jj5L#(}1IAxvw?(En3zF^*M_rPTk)bGg(;MUb_fLk10WU#) zUv}NDk}ZYddG@ z%(;)Bk8vhA-DnLMuRZR4_zSsHU0ve6@Y>t`AMUXj#T>2}{*@Ws@bTCKru&IEh}3& zC@sRa*yPpB3)_+Rb3LB^Da;u>F#T&VNIM_ibn+Q~&yelq?9b|Pr${HYXJy~5&~Yuh zHYM_(xcRJ9sQ5v@DGX)_r%PfyFbvxXoivCX$t}P537<70JFAIUp(e7gU%&d{(bP1! z#HAgZ0(co~s9{Kr1_XT0tyj;ySQ z7cSgVl98!3SujgHl(ReGmWt}Eb4WOoPv4Pr7jm+<)P0=9G_gm^vmcN7lAuTSYaY1) zso>*X_^)JKwSVuIYiVf#6I(O8-aJFS!0A!y7ymqwXjpRox@*{GGh5#0CxI5-s?hYr1cZ4%D(xfyJWUR&ve(*K^@5OcGbP}Kf2P3zVbD6PT7 zZyPJSlXO)@>HT#012Mf?rlpFHy}!ohVNhII&;9P}FZDjF9C6X>JAdPAV49XNyPtOd zerrrkJf0_i`U>L-HcSW8&|dtHyB23$0l)9`jIf?XBWpZ|lX)Lp&mt65{8u{!L^Pn;;alWws4 zNrL#(v6}BUUR?u+J-3LMkpcHFU<)e_FqegUY_-aL|HB|0F!wcYSB!mg@Ktp5jyvfe z7_Ln=jv(ze9j~(uj_vvbr?c%uI*+cNSqFN z`N}PU*W}BDK#@RW0J9dkV6f&Gq5h?1A7AzA6=;26l`Tv7b^bq$X8*taw?|?RxOe^R zA%11x@mxJHDfWoGg`|OGz!&QHKsTss#2UR!DvFZHC85&nQH*ILSZ$bhf$8iNFX-fN zesS`ccY|5TPk1>1a@oSH@O#mDV`5{IB_`Q^$AGpdmbT%aF&8S9K_?e+m^_HLK@9!G zB*or5Oj#oabNiGPV~d=JIUbnR6~rWP48xI;llvJhyGw~ZXy*~uEX?f1x!#2WkR3D7 ziJb^n^A9}Jfv|3vfxF`*6uQ>1xG~OX19_q%2Q7l%X=ZeUQzH0lGfPufn8;Dho&s@u$Tnhr^I(H?6KFT(gi{Ho%n>8y z!MXva3gRx+>QcS)O$>%1+|f~B5)O>SP*(`HRnXZW)&7V`!hl>-*Yum+Q#o!5L&iKAKf4HrU) z_(MVCa&TT94h_J!f*2N0u<`^jwiPn*Q#qr2a6lIaHkU5tLH8X0D@b(u2*-CNB?|EK zZeZZZFAPkDiUu+zx37dT6xMS(PIAo)jJ_=f8EqJtdj6f&abK0%K-LCa4}^6NuDB_@ zYtjcPQA!v~9fG0)#_w3gBK$7mH{o){0B@j|&3hRcA8LPo+x=m88{Rz%^InWoeFI7q ziHNb0k(ffC>!kDPZEmg@aF0^hQ{8G#Vg^S0^yyYY$8hNq&A;&b=x{fuTOcK<5513yoEh^YF(wSQ`NVeVUn>Ns>G8z}nh+(m@O-CHdA*_EUMY zmoTFmUhPbxww{=bVGsD+{*zjT zgpGFdNaEnk^B@ygN-R3Rv+uxAM;90|Y-4IqYe!d&GZf2%uz6+|nYE3L2n5=p z!d7f(6mzP-C+4O~c`(f)AI}_&_Q}Nk;OXa=G^+)29Xiyoc|<7 zV*x7P2qpaJF~xBbjtVn_G5*sSlgbYr8nMYxBt;0Z7Y1* zZ0M^t^GkqJpuJXNK$b&qpxhwzU>M=f4-Hk!!-a7Y@ivG#_PGdOD6V-6skd{((;7Yr z7#n^F4^5cqlFOq({ zai-+|8Wy7NO5mWwv?8yhAWI$k?u$r=R)HNNhlRXgmxzwP%5 zUpjTmQ5kvxR-+GJv(xYJzpmhWdDfV`iTx;EaH$MrJQ_Qb%H zzv#q_x!!`vKz{wmqq_TfdKwxU@+vC*t?xWMj?$1Lzd@t>_kNwDVvdUeXB1--5_Y|* z`M5AORRn*D$@wvz+2KZ?5loH38QT8U{0{?^MjOio)!{MRo`A?~x|VTaxW>mHXJTS9 zxOtN-Q2r@G&D+dOdLXT2Bu^PO0kbBG{rmSjtS(x)nYwUCWM&HErMT1&lr%N%Pt;Cx zefRDtCRy!ZWo6Yxd0ZbQDTkn1)H8KaTYKk~D_691bb8=6;V{#$>_$udt(^FObag2L z<+s906rcap-Cg@Q05d&&tZW z<$O*~-ZMNrOsoTGf0hRA`ucjN{RGQ+N1hKNurU@bIU{4>>nNt1K{ZFTo>2oE!~0`$ zW~Ky_2qtl1J@MoL!DMpjj`mAF_ymbLtXV{;CcvrnZD ztuOXaX=!U0mzHiN3CpSL=!CU-ul(YzUgvf^R1+!A2-AIO@ToiUANe3HGXiV7p|R2J z<;(pG{nxa$y)pLy)bq`b60C@L4lDCq`AhwbFE!7*hw__re~*!?sI8UN(_^WrsrfrL z_6#g{X=PF!&LV+3QEe*n5%wJs}d$87%E)yu>7^w zNq7AHi{Rj_?fN(z81?H3OXKFREq1Q(Yd#24Io=cxFYl(PdJp*{@jNgeWW-g-j2=T5 zdlbkThdDXPVOK_0nBI8Y7|u-aLz2dm8JL)O;37SE`0y4k?XNCgUgSq;$hFgMyTJq1 z0FRn!-M16hwI6IF9@h_?JGnmwS4<7>bHk3@`wp{%)VX zh^_3~x94td+Y+@>q?0-^xctJwJr|{rIIX=tdGaJg3dTqF?ZlTiUbILGyiCyyi9(6{*gU9Dk>|z$SCNgAXkm;Dfd6Ixc8TytE;Oi z>~miP1W2*LHr~^`Ghg}r`}g>ozGeLLJSBy<3>F_3E?jUcDEnr9fRmH6cXU(<*;7v| z!oVOBzF(L>#sHycg!sdko}PdUhTw!J0ROwxw6vpUW@bb_79niq>baixzD6W|fViL0 zO+(C8v>rpUaRi$}K}F>yqgYp07x6P&TVF$K`T&C|C`uqE@TUe-eVvM}=`i~|d;>pv~(|boo6rroUZfJN`PfxFX zuFCc}e6S+LA3o2?IYYj6v#53Nj^tp5w73{>@mOqIhQ(g$pXq#XGQzA4wXg6)#)xtDX#3YN zE_s5y?Hq=$=;~Nm@&3uN_s8ap;Rzer3}>7e+_7pv0I2gGlEI;bnDId@D~2Q=!@Q6j z!kLZOSg(|qdA31%WJ-fPis;W-Ax3BLloYZN5G%8Yk?H?G};_zzqF5L2X`1p#!%6jwuy*mc= z#2{%@QazxZe#Z+ih9OR3Lvyp-6Ee!b6BGV$p)`b|fHwU%PT|g7y8;p4FasG;G#dMB zWBJb6N9VJZjg5_;puPwfyHA6alO;GpMrAuqO-{v0_2Ze)YSCA)7skES@l)XBJrXzG&CetMe9_vnr^5X&+F&h^A#-u!zZ(m;wqIW zM*blsRifk8>uvq5)8%m=vEomGbChTA9JU6Vj*gBfa6uAG-?}lILR#7hjdIJvL-K=-LOw|TwIPJp`n#&)YN*p4x5;nBVO}=^*h-31rmbb zq>CTo<6X;p4q+Ra1<4=5#r+LXs(R0S(>)_&W1_HAfn^w@b+dqgz~ABFo&TtSa&vPt zna+rah&ZgTiYO^5WljqQuWc+o4@D?}ulrU_O-%}B(S1@^n??|MO3KT(vjy(JsUTHp zh{USBz!%F(OK(>6n%uO$Ji-nCaTGTsgb~x$ubeLSJ*x57kO@vL&zZr6G#VKtI9g>; z3gKQR+yk~<)juQhVD=bZbm72E*Wlm|J3BifG*{Hrl;XEqqRU`<>((J`iU$uKgoxT5 z$LmhMUP*(b;0+SpM)SQke!~ZVEa0knoSJGKOeMnRs&(=_9yH7AU2N=Do%B0rQKV=j z=~h1KDzHVk|DN~IUPQcFwV1|htISqqWo7vzG^863a4c!3rlyFRk&dkPA7lyzAMy#! zs}Z`xsDU%e`bS25CMPYApoYMq2!CJ0`Jfzu8D0y!pr@t=-QuZ_&n|?GEhOCLPqa>c zLn@Mj@PQsmvn5Q3yuEj7-aD(W&q~_W{Mj4n^Iwc)!agE~Wf|fS;d*_G_^?tqc00<#K!*5r~SywUf;V2e|{=Z{^;>p`k8>=5M%piTYLJS-b&21!8N*Ur8wJ znF@IQ+8Zy2NQ;n6oJZ(+6CYnCe)zTE-JKFq{;sZ@@!qxp@YE`B633bPIOu=LYzGAe zpmkB?EERdWxay2>r#Vdi-GsAXU|~`J%#ahL08byEb6A$Q@88c~y<=RbCVq&AXAkI0 zMB1}?^JaW^1HMsVrVNGXhYufIJOiFTcSY`Z7=N~D(d`sI807?&D$ex{UJUl!k65&aTI*xEmJG5sG-$T( zW-3G}kxYCa+s2OzSZ9oIYru)!;U;Z*`!*XN9}RB)aN{F?B!b=nmKH|v0Jh-+W1}%7!HVtcvf62 zUB`>Wf`|%4)(Y`kd#2SEY!l*GWGLSssAFy~aAtk{_;Hk74d1{AmWE6#V|sde1FRM9 zaA{aBqaG(pbCK;AMBl1kbviLPIQSPJ5ti+SzU~2r)3=}2MVv7PPmV`GKn5EC$HKd! zLLLa-E1Q5SI`PKlW)JKJ?9#6O{%^A{ii(Oh5zh!LU=Jgs4;*PTb8>o+)RC&Z-;$A$ z8FdxAkq%}&x5b{`x^-)^<=Syi;2bQ-%!2yJD6n-1;U+11-y z`Ynm#N0ur2yqWiB*a2*8eRDEdR@=wCE?k61Q1BaQL0Q>O#Cb_bIDyb1S-`(>)#xw% z={n?TUY?mU*HI~Q;F7ntvwMe(#LDn}YB-Ws;?80-dfU(!IDBkP2(uN=`!QWWY>z=Y zb4pk9C61LEt~YAzt-#>`WXga9-=gSxjxuykk0zpsi)kT8Vwz zp!CyUF8*irEAGmwDsQ+?M;rJIOAORUaVRP(LU0;`Kw>@iL#bIaN%xSty80>gGiq10 zcd`lIdne{FDK)+T0E0;8p8-=y(#l^?#wY5pUY9MRqdQim?^RJ;Wf>dx|=*o$HVO9+ z#^Lz(=diLu#W(Y-g#mGK9K={UL2+?$P_=i%81^?%W1;i!60Cn*S*_BLkQ74c;34m$ zK%gjqX!@99QAi#V5ZH%#B+{2IJ#QD+U#44lBwt@>YG!sA6`4%{+u_61G&HUV5S0<9 zNgAw1c3mkiH{dEfq zW5haP5c<)q5#OoK>TJ;}ljVR`6HNGVXudUd4q#l?EXq*AWdiKHvuXejzGO`#bYG7)*BT&9{c9oR+T7l}C z!lBST>)6;BgxG}w5c!p{-*>r#35gya+X>LHaehm$=8KbDT)42^g)BOLWk%TH8BYC0 z451giL+|M5XgU0g69(5*E{;;WF1}yDPCXPT55BTB+MK&UwRu`lAc;)o(0(6aKwDv~ zj-EW@uKg#aF!~1PyH_J%d49|X-r(fK^eTi%Jf+oG>*=jzWYKc|ET=ad6I%|e_zp|Y zOHBf`mBNOLMZIC^u^!vJYu7H$g9lx9p~%#uLV>9)y9H@T-*~VNkMV}@-_L?=hR@5&$7(NP2nAkPXyinf z@wa3C<>xTuNUy-@8Op4!+qM<8w1g%WK606ET}LHLL;z$xdl(pY>g(&Dn!Vb}D&=<3 z`P&)K9q#V#!f;a3lrF2Sr6fEXl3kDF#D9GI_A%h_Xp|$w_hd?p-akl1T~t#;q5N9l ztcJ#ZCT3>z8YwU{9bn%s{PNyq1+%qVrlyuxXKABvJu-XIQ7HCm)+B%~@d*mDb8v8E1;@0KP`&J#c*ujN(x^x& z`rtWQT0Q&8Ddc^l>QbV0bZX+)=~hgf4ZO8-prXG1Rhv;YRQHmSlGgV2pKV7X)ToCv zVxyv1x%6^TVVTN#9zU4FN8p1n|guH!V93X^G=N`1r?_X|er z>Cy|-mtP(eLX!aCwKR$e4pg+>US5aTqTxjRMEBaYtk}}DP5n%b(u9pYFc{r=gR5d< zVin)MxwpMEp!!DD)7K|EK0a;>_;zPAoU36Ir=e7@`t;fNdO5V%l)-kuX<37vg>H`4 z$@c()hkwTTw`(x`oG&vGpQrr@*n z6S?LPaxmR6TUpo(XrhNI87Ot|4SFzaMgu+Z(W_0_h<`V3-n708G%E&|W@Ut1@LEQC zVR(!-!3_aEwG8HKAA1za7q;UM8BG*(CgkS3=5T#n!t1L1`BU9|+lA$A!7Mj}-JkPI zP%y?c`j3`QFHRZ1mUMR7w*A5p8X|468*dN2wZgji!gdGhi)gg7tRL`~f3WVbKczBX zXm=w`!i*7j>y3)tr&=mGU}|mVJU@DXaQYXg?|dQ=~}atYe}L z781!yjxSY33JOnm%LM65EE10LxaoqE4x;M*73($UO(lC7iP{UB#jBfo{d>+8(F5?9 zUFdW^QI)Iej#VJNy(o{U8*5S@c_lL)B>`3uH2_~$hD}R+1I2x$@SM2Jk@J)$8p(?Q zL4SwrX1K+-$`IT2kew5d!iqnCA_y^FR!V4KAOZCOL>w%9yhZm+*qaUJ4MFD3KXYzN z^wKw$jp;kp-n?S;!UP$ku+yqAa^tP2PK8B8ih#2d6;pdAF95J@_?IWIW$%-DinTn4 zJOOhEyan?#Qmy+GR?!|HRSmNOvMFkBXF{F`R`wV%sG9&iFeh*mwIODXB!lzi?tcDV zS^+&aj0zBo224dJsuv+4I_z?-FE=TP)Ev`8iW(bf0j3b`JCr?SfN)Vh0)q=jThj7P9dB8K?TByocZ+!9C{)aS_JG9wCyB`T2Rm-`~*OJjwK@ z85eOxfQiC2$LXn?n<45#RKi6#57f-e&rl=n+qbXq`*&&-UPMk{iR)8ibn$QqMkJ1o zjS(;dX}Ziw>xQnmyyVD{BkBHMjS2XU!$!E*j_--EV+B)@$_WQoH+<=iUeoaD zQx2@$(`U~%!{Xf8Zz#tr=I{%lAANrJSoFAR|MuI2;ujA_!)*RxW77zfiNA26C%~(P z-IPrPdysOrr zc1Q-3GYwkG#dI`V>-DsQ49UBBQ$$fBila15A;GWe%9?GY!a79nc!HSMgC5RD$;8{YfcU>TUK6+UA(-PL&Rc<$QTndv$enF%Ju{j69pT zsPwh2o%#F9%RwON&4n))h~4b%ZGXXYO)v2}C_5PZ)J1Sl@GOlCxzVpkm@^q|;?mN8 zH@|wdND{DncMlr3oVEJjfV>yHk07MP1EIc>qh5b)?&a@Y+RfS zygSC7?mSvSFvB2*5}f8EPb{jsdij{m*|TY?hZKeBf!3p%gF-c z@g?IqYPIJw$HTndoi*2|T^;sUh7{DfI>JISHMGGu-LGj7tZDJ}O`8;Z4{wKXW=TF< zUUfAb(qO!;tx|b;Ip?xw!zdXtJQJGk+&mL&66SI1-;bi^LxS2Dzqs$4;Mc2XPfMOO zf9U*B&3`QCZBRnJ^lm+yssVuWQyb$6lZ2R`$ojaBAuHA9mm=pz@Hoqv=N*tbpmTPU@ zQIxoABHeD!w*PEaBMzF)<#|FC2g0R`>?4t&{aYLz)_mT(b-}dnp%4Lr;9}C*^M1Os zUvEo%;%Z?c% zHMf8M$ak(=XRc}a)7Q77-iwij;-$6#DpSt<4* z@LH{RZ`arNPl^3j5ycVo>rTtk)Fk#z%i+RxO{(fP)) z7cET1HNZ&3o6#OLNa3eQ@j`OWu(i(mVlm7t>spY!BIJ-V1@LNX`?^9{8imn*>mn*; zHptx+b;Bh2Q$Zr73ITyS!{Nf@i(5B+-Bk+1$+<3F$!5>GUV1QTR`nUpM(m7QNHMsSW_-GJ(t)4iL1*RVb!3s} z{`mgnhoB)C*c}MP2Z-d@>Ie}>kSp2fa)=ee1V&H@4c_t zv|%eLk;%ZwC>smg)7skhtcCqCF@suqluc{98|O1*$Ph7H5b$zVRoT95@)yIe191R# z=3r>(-hUUSp7zOf+>xZrt6`M62Jl?XaDF>XZnPY<<49jQoK0EOTE)DOekNW4PCG~^ zLxd-snxL8xq0rXuV&^)AgD~VE*bqorNv81GQ{jp{vguo+Jep1?_+yFtiZ_@_LU~kJ zN;M|L{G}f?&7h*9Vi5;b?=x8eZ!I7pzop^PitAZaq3ai2@+>7YGz))F?6a*gQtd3h zQbqzoM=n7`3_*5YYdm?h)odrH?CfkgPNF`&^6L^K+>>}{+6OR?)uI;^;yuIpPB$zO zw~%b*o?cIpo!URXHEE!eX{!=?D9~@U(gnw)yrtH|IDMzyL~(8%9YW;wm4`O*m`8%8}Rg0pU+x3{x|Sejg@Y9 zEi<#ecwKPs4# zkk);4Q}lsnxk;#X#RjA`J|q@kSX5dXbmWM-APm*jyTYHdIDc7xbZ{As9zELmd+$2c z??Bp_4ZqtBW{(L7ePryW9h7s`Fuvl|jGB#^PkH0UjX28;K_n5pOidFLHHJ%yS(pM& zG0A)6$dLdcNA-5@)vZeEV=U=M!C!zZ9me2#uNdBM527e0(OhJebDVrlJEpDpHfmKX zsVKBsHdyqHuND6Vj43)Z*Gw4Nw6wGYFU+BVm#y{SxtCU4=!CCWXk@~QBx(f?K^hMs zhts3BZNTb?Dgiji>Nj$88MWSC_@YInFnG3|>u`^{Zp;ML|5Mw@sE2SaDk>_*o;fqm z{mF+u1&LNIb}eZZ%SIX!0^Qu*v#?iiRQSV7%leWYiW#MeyO^RiW5G zo^;}CkUWD}U&uI;`+uGNI&=T173u)#R7zh_y;@kPBTnUf`S`M!Q>6%3s7;xKD+-Am z@QmnAzEM@BPJGhTepO$8z4>nNs_;(?d|W1`hP-0Nwkj40B5e0;ood^$|O zRFRmRyba*(%2Alqn=%s8PZu7$a3M@RG`kbN+f3?Nrculh_d5Ta8XCqeAd~(d6}6>W zvu4jWymhiWE`*=>=AVDEi;B9Dlv$$a*0)IEuI}#b2{S_<2igl)-+Vsj{8d%u?W71> zZ4;CI;=Ir1caL>2TuG%A>u$@JPvh%X`76=x&Q%IeRE*KWuD*VLc7W1X@M~KQt~vhX zZGC(a*-Ku_Sc>;SMYvQBIX|J^gAVD5O@x@O(iSb%{_^H?d3(?rn&>oW%v%@D*EIxC zxYhxUSNZcxr;yKtsFPSXsiC2Iyl^p*S|(3BOVz!~%ZnZr&C6{#9K6A7egq*zt+64f zt0hdueO>{o%*)OWg1et63Lkt?m+OI1@F|NBAx3&GK3okB7UbU*V1Tl+a5ecgzq2tm zHs&+JUnb^kQUq3-6Ulyq9Ya8xx#wSn+Rm%wewc~uNGS+Qume&E4YoIcUx=7Y$y!Mt z92NY#B3fPK&M~r+992W|EGjCpL@*`r)xXucWGmbps6)N>5zZ9ZZ<&f=YNX*cm?D66S;j9q%l9DqPXMyo?l ziNoLMwQAKf$031fnF^lL-x`MIdk~XFeMicYxcFr#g-dVrw;!gb7ip%nmNEN+C7}%^ z^Q!O1r|;hFfDm~%n!;iuN6N|a+JwGIuGr6e$0dC4aByi^*-a8bBNqNu3m)9KA-jC} zayW^k;ggs8hb&9>mxZ_>Bno53s7CAGdlD)tudM0JuM%18Z|5qo1)9XcuX7z7_L@!{ z&ZayzBYvXEn%6y#2b)Pf?y%FMny&jkQZ6c(~Ac@@WbWRiJB2I6{#$q zXDGHfjOJz>@qK)KeL>qd*%@NaZm-d8oxmg2oW{JF@}v21^zu<7JWIA5l)Ckw`J$VD zzmg#bsV$y?WW)etAbaDdx|z6X7r=`N?w^M*FbY0tk?JM0MsJxCxnnA5oG8nPjE*ct zaa%mB&A8OEGBYKFco3=)*(U@;UzaET&HX8Uiy{px6M1?P)NMtMVn zr|#>2bG{+goQvXkL>76ZQ}mvB3$7mJ2Gi2ggsZC$hy%W^SkSrkL|GjY2qa{AgRNG>&<* z)~R){pz*pua4s2!IS>M^gnt*`dpJ4|i;bSX{?s#XNq!fIopZ3Bbm3$`vc6-rnDAo9 z5p>FX&Eq*yV<6@OGR5LMpDX@wb&ach4|{PO{(5(OypE}v+3L4AQKa^lnmpXxq}PA6 znJHiE`+!A!oRZ*vWZ6aeORGL9lMh#(Xc{?Ugyib_+fi+guKKPP8K0sJryPE(+Hr-U zUHT@fWiQc!(wVq?`B1VH3|?nR>iP58e3_NhWJ8O={jxGwH@BR}3qXQkzZ&BF*5Us+ zG7F<84JFk)K?W&@V7H&3o>U|w0( z9FDDQ@%^y+vk3_~C0gG$ag6GAx@_LXHbrWUXWfTk-on$}7lbJZ5OpLjM`z8c@N4Xg zB3hCp?_U^uYG&>0V7B^`I%#w>$M;Dhht1L3=AC~CT9XDT{Z2yfLy%+`~0iE+$@P!xe$*njX{2#XH%gTPbBmtLLv9j}d|(kbN}Q6x;a)b(-j2$7dBn_`(R zXxcr|TF~X6Ufk^qsENlm_qsf#FZ!H~Rr!Ny!XHZ>+XHDYOc7)YaFtYd>@@Cc8U|=~ z;{>Uc^eY^Z0CJUM57gXgS&GEo(cj!}&k{P$;)EX#Rd(_Q>rY;>jYT{odB&hI>MG$=XckZ0N zalU5i?=OJ?5tVWgJ6T{wd@Z}`>z_WAP=B4CbxsyiD42=>^H255m!YKWL9cq;x2SylxDj$y zbU463fQw(1o>0=()^=I4WE=Os+W+m_x5A4i_!@47LR45jF-CSG#@+M}o1+M$f>et6 zRc230&PXhNLg9c*gm~X3=oAo6<>CpL1)?ZLu@U_eA%gvXBwJpRd?03=BMS5USLRP5 zMr~&qQqwPZS9uxA`WA?p`b8067KWc)UhjkVgA8Rk%P^?HTO-+e#mvRIt0t<&R%L*R^l@}yl zDE>P*v;9YY9HnEQ))6fvDSa!sJ*LsBL!C3pM~%!|TiNMj1}%s!EiIV3xtl5{xO6=|J1*{<02sfiRi&uCb>>_QCgV@b{cJ5ppAkmH*ibl*R{H^Bxt@c)qiev950!%mFMzsNfpWQ(cSmqOo*7npWwG1EpPm}s*QhIbd3j-2>-AJ^ z@;_}D3%hlKcNBHv#0lpME1)A8wkA|aQxg-hJyjODKMw4lccxvPaeHMe`aiJ#&AQ?} zP;|L)Bq;=jtloN^I~2{izZRLaz|Ns)qr>YZlGxYe0QthFAw zcR+vR5hHSlBMi&fLOnN5ucz=73jR~5gQB&EFiE|7&begn#M;zl7n_N}LbRmxx@X0r z&&E9?jMUz*rVFRximI!DSXfkF6lUb?)hTnQx#JaWNvZ?eORikGf=|!V&+p2yJIOul zM_x*Bx(b4y^yt#El`rn}j^J?IZ71h1NLws&BZayPr7i)Y(#MVMFFNm@zkDfdgdIC} z3?R|J^y;omVLaRBy3%xq(OYbX1l)Q3?OOcRM0V3nVHaNd(BNcpat*x=m{bMIyL0z0 zu}WUDuJzMEDp8fC{*MDpC)J)@svVVa04l8y)j|+X2Feg&JfzNaVHN-7Xq90bd-X_3 z_@r_}*@CV;uuEo@l%$n>ReOJOO5bp)te~KXXK1fk$OE|A3%oZd{Zc`y6;#)+TSuvJ z;Pw)arAzPFJV*A(!J+r``SW(XH9=xfHk>ht#(tDIUTlBSWPle&le%Ek@=8nSSwIo; zwQa(18`jA-D132lp4XZ+!9hWCf|EOXwD9oM4c}Ww0yo8O-}-T*qC&;J}O9Zq#x!RKuK4;JR`h50 zB*L7a!KjC_e!#bL8>~xRvqM%69N^~L(x8Q?xck6?*}X42TIx#36Kp9?E^sYsjtMVZE{B0P(Qt zRdJBuZ?!p;&{Hafvxz0*m6CeH!b~UbZ9L~^L8l^?V^0e9RXYa6YilH*`s6x$)+|x3 zC(qWYSJUXbcTm}$?RRZ;%*+l_YkKDI`c0~<@%B#&0ASpG=n<->1KM{_&)jf25qFEu z+MU+A$7pL`mHc7py;hTqbuX=bN*Ws)V&`4+n?bLEvU2+l9ZDeRYpXnPKg~{mx4y;R zeD|(huMifrv=bLPY)DG1b&bi;pq}&`T(7yTGCit(H%C9at9oo0qj9jS*2s|#Q6A-f zZb!HO7;v*)P3f0ohvN$d90v1hYY#nq%{w@((bZytt+DH-eoG_D`;~N>SZ>8^+aB%< z7nb5rUIg8#@?oN36=!TuaGIy=dZmmN0tm(3Z z8Y3Qo=N!2WEAP#4&u=WS9UT`Jcb_Q%J`X?WxcE7a{iC$d*5Te|Z`mpIz7SAFe^(by zwP7jrAG0oXfX(Tdz~oXuIfkg|l=hx%;>wGpgW^2{XPzY0sN7RYOh}l{K84AE8HfSrtJH6AQb_3=ErOt@cP>73xs(%nCI2iGqQGD;5qyE;X^NZr+*VZ zy6!(xrc`?2h0BanWw(Jno)8z_@T`}YfI5TQC&V}>yZ(F& zF;+1FJtb(DjjxJzKpphQ#^cbs?WH7vY&AjW!vkIY^l9c4S%-1B7eynFzrR1g2`MTt ztO%fm@dYT;j<7M*EoCK&+iLiqw0)*ob?M~tIUQ@QD`u8PBk4HL*k81SY{QNW|wF81`DafDA939e@= zf~$xu#&z~GrRQ5+W6Jxb?GUR(5YLFdR+`;3_4I^q=>}nsNvCp>0KgGbDvQLCF=`H0 zctl0!L!wC^*J-bC|D~tzPyYH_a2UO25^&~apTv|gC0hkWj3}N3du_lhc?>qfwgSO0 zgnI~r7=eZ#ZZlS{DeXnDcZrrZtjaj_99jSZ5ZZYY9pa{kVSHQpoe>G4s9yb{D5uAk zT~tk6*pxjY#dOf>2yu6GMPSSk${1O&wx5n0F<>X7Q5@bL{Pu z&6k%T`290}sZlGTtL=$tha0|qS$)XDzmKE=oGCQXzyk+* zBlran1IH7cmb~7(S|CKxJwgM`suhnx6t}{ZbyDeg@_t*PBCNBb&1OZ_ou=IxH;t;Y zep(xk?&O&t8oNn7(<~)nM={!cBcihDEynT5Lh%NlKYw2P?wRxUBYPcp@}wWgNhUlZ zB8NMMov>fl$i1w{`zMuQKk+`byRK*L^SvdS4KLLJIHJEdZVy zQZzVagQJyuO!S^T5(z2VVDWuDdb1V~yDFm(1_sK|{3l5@ay}a5*CQqI)TwZisH_B_ z+*N%{7|L5tZti?88g^Y&5)I^b!A;Vu|LW1-!`5mdrX~LOVpBGTP54%(r?I;$Vgt;x z1a6U*J}Hrp_PFe0ZAr0%Y>}~6k=-Cu6ZMsuCSWhn#5ZtX_ zKSeV$Ge`>=iCBFtE$eVUxsat*K4*m2ZaLZW*EUqQ9`Nie-vopa%}-S1o7ooeGzsn> z?yj)U=ry&P0Qz*Zt1R_>oRF6NmJD!ac>f!)8CVMB?q$h8)SY)k$+MN=+d^G{dQ!Ob zsk2nZ$@l1C(G40AkpPb01?%ykjY=CAk6o<$njXJi&kC-}e$hYZ+1Qh=tbW;*D6n1N z2|NwnFx+R9%-(-^-!g2%@!1}Y1+Ej0FB#QTVyf*o>I>?F2z=>2o}<@)cK<%&XLkAY z#1PmmG=Sc{2fl3SrYy3XNN7?isO0(c=TdQqbJvpV0V4Fu{v&4PJb4-XpO5VJd)rTWuZ;!hS{J`=%oyLaV|Pz|{FzbfZUI0<&$Q_AhL`rP zzR_Mt2wl|2?58`aj;ALeX=`<;M*4c%N%KLp48NaBzS(M&5uIJ4We+^KGWA-xW3mG1 zT8LWo@%GO--Qm~$)VL~dI;!L-xh!J++}Cq2%yF=emt~pQcmAXBb*N`+BU|kC_@sOQ zu)(Ib4T32Vf@X1XAP;=h0XZ@%K_HYsbfE0ZdL_QBP_?F?p;|<1@O>*t3*q*`U@jw4 z;IBp($JF498uRPACG}7A{7$){*dhL&_L@jI5z+M~BDLl0xS1A53mvysjfX7s_V#{W zK`KLH9^e9c(HlB6S0tZAo4%LJx+Z#-8Q9b-H6?E77?joW0d*pio{6V*b>U)`AEg>S zQb%M5*SR>P%H|1turce9PKTfwC$O9dl>l?V)9UKDMY|V|#HXK2u#}MyX2($k@PHd( zN}+sWH~nZ6CLi?KFi`=LqJ#yB?1m#qDZunFxUqcf!%KaFvQDasq6{QOZ`YQTloVd& zMLdGlh$zshAvid&ado2$hKWY#Lh2}v!p*~hZ6ZKRGI@~Uyw#e6D>AH*S+=$6%a>kg zoAG}9m)?rG`>asw8gX5m54*0E`RnDj$~&3PH&BZh)-AegoOp{G`&Z|<7^wZFCoFSh zLJfnLVOcH)87^@BywN$$uJ@FKvRpP)=S_Qo>1O<*MJJEhBz@#58V}xcRD9CwKcD>D zwRo{&2R_?Aa;zM0w(}#$1qN!09pv=`oWJvV^*iF5#U~;I5C8W+#Vh594Ah4I`er^c zzQYUF#LxeomnojV_-n=9zpj-3twr(h>#q&mTz+02e;uOue}4b}_e(>5$hV%hS+1bd SF+;*XlP64*o*Cz|`F{X#mqU*L literal 0 HcmV?d00001 diff --git a/tests/test_triton_basics.py b/tests/test_triton_basics.py index e41ab7afa..1376aff45 100644 --- a/tests/test_triton_basics.py +++ b/tests/test_triton_basics.py @@ -4,14 +4,32 @@ # LICENSE file in the root directory of this source tree. +import pytest import torch +SHAPES = [ + (384, 128), + (8 * 384, 128), + (34, 128), + (16, 128), + (16, 512), + (8, 384), + (8, 1024), + (8, 2048), + (8, 4096), + (8, 4096), + (4, 12288), +] + + _triton_available = torch.cuda.is_available() if _triton_available: try: import triton import triton.language as tl + from xformers.triton.sum_strided import sum_2d_dim_0 + except (ImportError, ModuleNotFoundError): _triton_available = False @@ -39,7 +57,7 @@ def k_mean(X, Mean, Var, stride, N, **META): # Compute variance x_mean = tl.sum(x, axis=0) / N x_zm = x - x_mean - x_zm = tl.where(cols < N, x_zm, 0.0) # THIS SHOULD NOT BE NEEDED + x_zm = tl.where(cols < N, x_zm, 0.0) x_var = tl.sum(x_zm * x_zm, axis=0) / N tl.store(Mean + row, x_mean) tl.store(Var + row, x_var) @@ -88,3 +106,33 @@ def test_mean(): assert torch.allclose(mean, t_mean, rtol=1e-1) assert torch.allclose(var, t_var, rtol=1e-1) + + @pytest.mark.parametrize("shape", SHAPES) + @pytest.mark.parametrize("dtype", [torch.float16, torch.float32]) + def test_sum_strided(shape, dtype): + torch.random.manual_seed(0) + a = torch.rand(shape, device=torch.device("cuda"), dtype=dtype) + + torch_sum = torch.sum(a, dim=0) + triton_sum = sum_2d_dim_0(a) + assert torch.allclose( + torch_sum, triton_sum, rtol=0.01 + ), f"{torch_sum}\n{triton_sum}" + + def test_sum_strided_asserts(): + torch.random.manual_seed(0) + a = torch.rand((128, 256), device=torch.device("cuda"), dtype=torch.float16) + + with pytest.raises(AssertionError): + # This kernel is not useful in that case, assert to prevent misuse + sum_2d_dim_0(a.transpose(1, 0)) + + a = torch.rand((3, 128, 256), device=torch.device("cuda"), dtype=torch.float16) + with pytest.raises(AssertionError): + # This kernel expects 2D tensors, assert to prevent misuse + sum_2d_dim_0(a) + + a = torch.rand((2, 128), device=torch.device("cuda"), dtype=torch.float16) + with pytest.raises(AssertionError): + # This kernel cannot sum over dimensions < 4 + sum_2d_dim_0(a) diff --git a/tests/test_triton_dropout.py b/tests/test_triton_dropout.py index 441f56de7..bf510ad61 100644 --- a/tests/test_triton_dropout.py +++ b/tests/test_triton_dropout.py @@ -25,7 +25,7 @@ ) _triton_available = False -# Testing odd shapes on purpose +# Testing odd (non-power-of-two for instance) shapes on purpose SHAPES = [ (384, 128), (8, 384, 128), @@ -90,6 +90,10 @@ def test_dropout(shape, amp, bias): == y.shape[1] ) + # Check that the drop probability is about right + drop_p = (y_a.numel() - y_a.count_nonzero()) / y_a.numel() + assert drop_p < 0.55 and drop_p > 0.45 + @pytest.mark.skipif(not _triton_available, reason="Triton is not available") @pytest.mark.skipif( @@ -151,4 +155,4 @@ def test_dropout_parity(shape, amp, bias, activation, p): if bias: assert torch.allclose( torch.norm(b.grad), torch.norm(b_.grad), rtol=0.01 - ), f"{b.grad}\n{b_.grad}" + ), f"{b.grad.norm()}\n{b_.grad.norm()}" diff --git a/xformers/benchmarks/benchmark_triton_dropout.py b/xformers/benchmarks/benchmark_triton_dropout.py index 376f2d70c..aa806e6f3 100644 --- a/xformers/benchmarks/benchmark_triton_dropout.py +++ b/xformers/benchmarks/benchmark_triton_dropout.py @@ -18,8 +18,8 @@ (8, 512, 1024), (4, 1024, 1024), (2, 2048, 2048), - (2, 4096, 4096), (1, 2048, 12288), + (2, 4096, 4096), ] P = 0.1 @@ -105,7 +105,7 @@ def triton_step(x): ) -for activation in [Activation.GeLU, None]: +for activation in [Activation.SquaredReLU, Activation.GeLU, None]: for bw in [True, False]: for bias in [True, False]: bench_dropout(bias, bw, activation) diff --git a/xformers/benchmarks/benchmark_triton_stride_sum.py b/xformers/benchmarks/benchmark_triton_stride_sum.py new file mode 100644 index 000000000..6fb887e78 --- /dev/null +++ b/xformers/benchmarks/benchmark_triton_stride_sum.py @@ -0,0 +1,71 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. +# +# This source code is licensed under the BSD license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Any, Dict, List + +import torch +import triton + +from xformers.benchmarks.utils import TestCase, pretty_plot, pretty_print +from xformers.triton.sum_strided import sum_2d_dim_0 + +SHAPES = [ + (128, 128), + (384, 128), + (784, 512), + (1024, 768), + (2048, 1024), + (4096, 4096), +] + + +def to_gbs(a, ms): + # Read the full array, write the non-reduced dimension + return ((a.numel() + a.shape[1]) * a.element_size() * 1e-9) / (ms * 1e-3) + + +def bench_functions( + test_cases: List[TestCase], shapes, metric_transform, unit, title="" +): + device = torch.device("cuda") + + for dtype in [torch.float16, torch.float32]: + results: Dict[str, Any] = {} + + for M, N in shapes: + a = torch.rand(M, N, device=device, dtype=dtype, requires_grad=True) + + for testcase in test_cases: + time = triton.testing.do_bench(lambda: testcase.function(a))[0] + + metric = metric_transform(a, time) + + key = f"M={M}, N={N}" + if key not in results: + results[key] = {} + + results[key][testcase.name] = f"{metric:.1f}" + + _type = " fp16" if dtype == torch.float16 else " fp32" + + pretty_print( + results, + title=" ------------- Type: {} ------------- ".format(_type), + units=unit, + ) + + pretty_plot(results, title + _type, unit, dash_key="pytorch") + + +bench_functions( + [ + TestCase(lambda x: torch.sum(x, dim=0), "pytorch"), + TestCase(sum_2d_dim_0, "triton"), + ], + SHAPES, + to_gbs, + "GB/s", + "Strided_sum", +) diff --git a/xformers/benchmarks/utils.py b/xformers/benchmarks/utils.py index d8ab2e386..365d010fa 100644 --- a/xformers/benchmarks/utils.py +++ b/xformers/benchmarks/utils.py @@ -28,12 +28,12 @@ def pretty_print(results, title, units): """ Printout the contents of a dict as a human-readable and Markdown compatible array""" print(title) - header = " Units: {:<40}".format(units) - print("|" + header + "|" + "".join("{0:<20}|".format(k) for k in results.keys())) + header = " Units: {:<45}".format(units) + print("| " + header + "|" + "".join("{0:<20}|".format(k) for k in results.keys())) offset = len(header) print( - "|{}|".format("-" * offset) + "|-{}|".format("-" * offset) + "".join("{}|".format("-" * 20) for _ in results.keys()) ) @@ -44,7 +44,7 @@ def pretty_print(results, title, units): for k, w in workloads.items(): print( - "|{0:<{offset}}|".format(k, offset=offset) + "| {0:<{offset}}|".format(k, offset=offset) + "".join("{:<20}|".format(v) for v in w) ) @@ -85,7 +85,7 @@ def pretty_plot(results, title, units: str, filename=None, dash_key=""): plt.xticks(rotation=45) plt.savefig(filename, bbox_inches="tight") - plt.clf() + plt.close(f) if _triton_is_available: diff --git a/xformers/components/__init__.py b/xformers/components/__init__.py index 60536ef0d..2bf470714 100644 --- a/xformers/components/__init__.py +++ b/xformers/components/__init__.py @@ -13,7 +13,8 @@ from .activations import Activation, build_activation # noqa from .attention import Attention, build_attention # noqa from .in_proj_container import InProjContainer, InProjParams # noqa -from .multi_head_dispatch import MultiHeadDispatch, MultiHeadDispatchConfig # noqa +from .multi_head_dispatch import MultiHeadDispatch # noqa +from .multi_head_dispatch import MultiHeadDispatchConfig from .residual import LayerNormStyle, PostNorm, PreNorm, Residual # noqa # automatically import any Python files in the directory diff --git a/xformers/triton/dropout.py b/xformers/triton/dropout.py index 30e44ff2d..965629ea9 100644 --- a/xformers/triton/dropout.py +++ b/xformers/triton/dropout.py @@ -25,34 +25,31 @@ class _dropout(torch.autograd.Function): @staticmethod @custom_fwd(cast_inputs=torch.float16) - def forward(ctx, x, p, bias, activation, activation_grad): + def forward(ctx, x, p, bias, activation, activation_grad, trainable_bias): # Soft-flatten an hypothetical 3rd dimension x_ = x.reshape(-1, x.shape[-1]).contiguous() y = torch.empty_like(x_) - _, N = x_.shape + M, N = x_.shape - assert bias is None or bias.dtype == x.dtype, bias + assert bias is None or (bias.dtype == x.dtype and bias.shape[0] == N) # Generate one seed per sample # seed max is int32 max for positive numbers: 2**16 - seeds = torch.randint(65536, (x_.shape[0],), device=x.device).to(torch.int32) + seeds = torch.randint(65536, (N,), device=x.device).to(torch.int32) - # SPMD launch grid def grid(meta): - return ( - x_.shape[0], - triton.cdiv(x_.shape[1], meta["BLOCK_SIZE"]), - ) + return (triton.cdiv(N, meta["BLOCK_N"]),) # fmt: off k_dropout_fw[grid]( - y, x_, bias if bias is not None else x_, + y, x_, + bias if bias is not None else x_, seeds, y.stride(0), - N, + M, N, p, USE_BIAS=bias is not None, - ACTIVATION=activation + ACTIVATION=activation, ) # fmt: on @@ -60,7 +57,8 @@ def grid(meta): ctx.save_for_backward(seeds, bias, x) else: ctx.save_for_backward(seeds, bias, None) - ctx.trainable_bias = bias is not None + + ctx.trainable_bias = bias is not None and trainable_bias ctx.activation_grad = activation_grad ctx.p = p @@ -75,7 +73,7 @@ def backward(ctx, grad_out): grad_out_ = grad_out.reshape(-1, grad_out.shape[-1]).contiguous() grad_in = torch.empty_like(grad_out_) - _, N = grad_out_.shape + M, N = grad_out_.shape # Optional inputs to compute the activation contribution to the gradient assert inputs is not None or ctx.activation_grad is None @@ -83,32 +81,38 @@ def backward(ctx, grad_out): if inputs is None: inputs = grad_out_ elif inputs.ndim > 2: - inputs = inputs.reshape(-1, grad_out.shape[-1]) + inputs = inputs.reshape(-1, N) + + if ctx.trainable_bias: + grad_bias = torch.empty((N,), device=grad_in.device, dtype=grad_in.dtype) + else: + grad_bias = grad_in # will not be used - # SPMD launch grid def grid(meta): - return ( - grad_out_.shape[0], - triton.cdiv(grad_out_.shape[1], meta["BLOCK_SIZE"]), - ) + return (triton.cdiv(N, meta["BLOCK_N"]),) # fmt: off k_dropout_bw[grid]( - grad_in, grad_out_, inputs, bias if bias is not None else inputs, + grad_in, grad_bias, grad_out_, + inputs, bias if bias is not None else inputs, seeds, grad_out_.stride(0), inputs.stride(0), - N, + M, N, ctx.p, USE_BIAS=bias is not None, - ACTIVATION_GRAD=ctx.activation_grad) + ACTIVATION_GRAD=ctx.activation_grad, + TRAINABLE_BIAS=ctx.trainable_bias + ) # fmt: on - if ctx.trainable_bias: - grad_bias: Optional[torch.Tensor] = torch.sum(grad_in, dim=0) - else: - grad_bias = None - - return grad_in.reshape_as(grad_out), None, grad_bias, None, None + return ( + grad_in.reshape_as(grad_out), + None, + grad_bias if ctx.trainable_bias else None, + None, + None, + None, + ) def dropout( @@ -128,7 +132,14 @@ def dropout( act_kernel = get_triton_activation_kernel(activation) act_grad_kernel = get_triton_activation_bwd_kernel(activation) - return _dropout.apply(x, p, bias, act_kernel, act_grad_kernel) + return _dropout.apply( + x, + p, + bias, + act_kernel, + act_grad_kernel, + bias is not None and bias.requires_grad, + ) class FusedDropoutBias(torch.nn.Module): @@ -141,8 +152,10 @@ def __init__( super().__init__() self.p = p self.activation = activation - self.register_buffer( - "bias", torch.zeros(bias_shape) if bias_shape is not None else None + self.bias = ( + torch.zeros(bias_shape, requires_grad=True) + if bias_shape is not None + else None ) self.activation = get_triton_activation_kernel(activation) self.activation_grad = get_triton_activation_bwd_kernel(activation) @@ -153,4 +166,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: self.bias = self.bias.to(dtype=x.dtype, device=x.device) # type: ignore p = self.p if self.training else 0.0 - return _dropout.apply(x, p, self.bias, self.activation, self.activation_grad) + return _dropout.apply( + x, p, self.bias, self.activation, self.activation_grad, True + ) diff --git a/xformers/triton/k_activations.py b/xformers/triton/k_activations.py index 0964096d6..31049101c 100644 --- a/xformers/triton/k_activations.py +++ b/xformers/triton/k_activations.py @@ -64,8 +64,7 @@ def relu(x): .. _ReLU: https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html """ zero = 0.0 - zero = zero.to(x.dtype) - return tl.where(x >= 0, x, zero) + return tl.where(x >= 0, x, zero.to(x.dtype)) @triton.jit @@ -74,10 +73,8 @@ def relu_grad(x): # in that it does not require the input to retrospectively compute its gradient # here the input is the downstream gradient, and we return the upstream gradient directly zero = 0.0 - zero = zero.to(x.dtype) one = 1.0 - one = one.to(x.dtype) - return tl.where(x >= 0, one, zero) + return tl.where(x >= 0, one.to(x.dtype), zero.to(x.dtype)) @triton.jit @@ -88,7 +85,7 @@ def squared_relu(x): .. _Primer: https://arxiv.org/abs/2109.08668 """ x_ = relu(x) - return x_ * x_ + return (x_ * x_).to(x.dtype) @triton.jit diff --git a/xformers/triton/k_dropout.py b/xformers/triton/k_dropout.py index 61878840f..1fa4a18f7 100644 --- a/xformers/triton/k_dropout.py +++ b/xformers/triton/k_dropout.py @@ -10,138 +10,246 @@ import triton import triton.language as tl -_k_configs = [ - triton.Config({"BLOCK_SIZE": 128}, num_warps=1), - triton.Config({"BLOCK_SIZE": 512}, num_warps=2), - triton.Config({"BLOCK_SIZE": 1024}, num_warps=4), - triton.Config({"BLOCK_SIZE": 2048}, num_warps=8), - triton.Config({"BLOCK_SIZE": 4096}, num_warps=16), +k_configs = [ + triton.Config({"BLOCK_M": 128, "BLOCK_N": 32}), + triton.Config({"BLOCK_M": 128, "BLOCK_N": 64}), + triton.Config({"BLOCK_M": 128, "BLOCK_N": 128}), + triton.Config({"BLOCK_M": 256, "BLOCK_N": 32}), + triton.Config({"BLOCK_M": 256, "BLOCK_N": 64}), + triton.Config({"BLOCK_M": 256, "BLOCK_N": 128}), + triton.Config({"BLOCK_M": 512, "BLOCK_N": 32}), + triton.Config({"BLOCK_M": 512, "BLOCK_N": 64}), + triton.Config({"BLOCK_M": 512, "BLOCK_N": 128}), ] -@triton.jit -def _drop_and_scale(SEEDS, row, p, offsets, x): - # randomly prune the weights - seed = SEEDS + row - random = tl.rand(seed.to(tl.int32), offsets) - x_keep = random > p - - zero = 0.0 - zero = zero.to(x.dtype) - - # prune and normalize in one go - return tl.where(x_keep, (x / (1 - p)).to(x.dtype), zero) - - # fmt: off +@triton.heuristics({"SIZE_BLOCK": lambda *_, **meta: meta["BLOCK_M"]*meta["BLOCK_N"]}) @triton.autotune( - configs=_k_configs, - key=["N"], + configs=k_configs, + key=["M", "N"], ) @triton.jit def k_dropout_fw( Y, X, BIAS, SEEDS, stride, - N, + M, N, p, - **META, + **meta, ): """ Apply dropout on an input tensor - Y : Output (M, N) - X : Input (M, N) - S : Seeds (M,) + Y : Output (M, N) + X : Input (M, N) + BIAS (N,) + SEEDS (M,) p : dropout probability """ # fmt: on - BLOCK_SIZE = META["BLOCK_SIZE"] - row = tl.program_id(axis=0) - col = tl.program_id(axis=1) + BLOCK_M = meta["BLOCK_M"] + BLOCK_N = meta["BLOCK_N"] + SIZE_BLOCK = meta["SIZE_BLOCK"] + + rows = tl.arange(0, BLOCK_M) + col_id = tl.program_id(axis=0) + cols = col_id * BLOCK_N + tl.arange(0, BLOCK_N) + seed = SEEDS + col_id + tiles = tl.cdiv(M, BLOCK_M) + + # pointers starting point + x_ptrs = X + rows[:, None] * stride + cols[None, :] + y_ptrs = Y + rows[:, None] * stride + cols[None, :] + + # go over all the tiles, one by one + rand_offsets = tl.arange(0, SIZE_BLOCK) + rand1, rand2, rand3, rand4 = tl.randint4x(seed.to(tl.int32), rand_offsets) + threshold = ((p - 0.5) * 2147483648.).to(tl.int32) + + # binarize masks, save registers + rand_mask1 = rand1 > threshold + rand_mask2 = rand2 > threshold + rand_mask3 = rand3 > threshold + rand_mask4 = rand4 > threshold + rand_mask = rand_mask1 + + col_mask = cols[None, :] < N + p_scale = 1/(1-p) if p < 1. else 1. + zero = 0.0 + + if meta["USE_BIAS"]: + b_ptrs = BIAS + cols[None, :] + bias = tl.load(b_ptrs, mask=cols[None, :] < N, other=0.) + + i = 0 - # compute memory offsets of elements handled by this instance - offsets = row * stride + col * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) - mask = col * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) < N + for _ in range(tiles): + block_mask = (rows[:, None] < M) & col_mask + x = tl.load(x_ptrs, mask=block_mask, other=0.) - # load data from x - x_ptrs = X + offsets - x = tl.load(x_ptrs, mask=mask) + # optionally apply a fused bias + if meta["USE_BIAS"]: + x += bias - # optionally apply a fused bias - if META["USE_BIAS"]: - b_ptrs = BIAS + col * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) - b = tl.load(b_ptrs, mask=mask) - x += b + # optional: fused activation (while the data is in shared memory) + if meta["ACTIVATION"]: + x = meta["ACTIVATION"](x) - # optional: fused activation (while the data is in shared memory) - if META["ACTIVATION"]: - x = META["ACTIVATION"](x) + # randomly prune and scale + if p > 0.: + # generate all the random numbers for the block at once, then reshape + keep = tl.reshape(rand_mask, x.shape) - # randomly prune it - if p > 0.: - output = _drop_and_scale(SEEDS, row, p, offsets, x) - else: - output = x + # prune and normalize in one go + output = tl.where(keep, (x * p_scale).to(x.dtype), zero.to(x.dtype)) + else: + output = x - y_ptrs = Y + offsets - tl.store(y_ptrs, output, mask=mask) + tl.store(y_ptrs, output, mask=block_mask) + + # Update the pointers + rows += BLOCK_M # needs to be updated for the mask to be correct + x_ptrs += BLOCK_M * stride + y_ptrs += BLOCK_M * stride + + # update the seed offset + rand_offsets += SIZE_BLOCK + + # cycle through the binary masks + if i == 0: + rand_mask = rand_mask2 + elif i == 1: + rand_mask = rand_mask3 + elif i == 2: + rand_mask = rand_mask4 + else: + rand_mask = rand_mask1 + + i = (i+1) % 4 # fmt: off +@triton.heuristics({"SIZE_BLOCK": lambda *_, **meta: meta["BLOCK_M"]*meta["BLOCK_N"]}) @triton.autotune( - configs=_k_configs, - key=["N"], + configs=k_configs, + key=["M", "N"], ) @triton.jit def k_dropout_bw( - GRAD_IN, GRAD_OUT, INPUTS, BIAS, SEEDS, + GRAD_IN, GRAD_BIAS, GRAD_OUT, + INPUTS, BIAS, SEEDS, stride_grad, stride_inputs, - N, + M, N, p, - **META, + **meta, ): """ Apply dropout on an input tensor GRAD_OUT (M, N) + GRAD_BIAS (N,) GRAD_IN (M, N) BIAS (N,) - SEEDS (M,) + SEEDS (N,) p : dropout probability """ # fmt: on - BLOCK_SIZE = META["BLOCK_SIZE"] - row = tl.program_id(axis=0) - col = tl.program_id(axis=1) - - # compute memory offsets of elements handled by this instance - grad_offsets = row * stride_grad + col * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) - mask = col * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) < N - - # load data from x - grad_out_ptrs = GRAD_OUT + grad_offsets - grad_out = tl.load(grad_out_ptrs, mask=mask) - - # optional: fused activation (while the data is in shared memory) - if META["ACTIVATION_GRAD"]: - input_ptrs = INPUTS + row * stride_inputs + col * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) - inputs = tl.load(input_ptrs, mask=mask) - - # optionally apply a fused bias - if META["USE_BIAS"]: - b_ptrs = BIAS + col * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) - b = tl.load(b_ptrs, mask=mask) - inputs += b - - act_grad = META["ACTIVATION_GRAD"](inputs) - grad_out *= act_grad - - # randomly prune it - if p > 0.: - output = _drop_and_scale(SEEDS, row, p, grad_offsets, grad_out) - else: - output = grad_out - - # write-back - y_ptrs = GRAD_IN + grad_offsets - tl.store(y_ptrs, output, mask=mask) + BLOCK_M = meta["BLOCK_M"] + BLOCK_N = meta["BLOCK_N"] + SIZE_BLOCK = meta["SIZE_BLOCK"] + TRAINABLE_BIAS = meta["TRAINABLE_BIAS"] + + rows = tl.arange(0, BLOCK_M) + col_id = tl.program_id(axis=0) + cols = col_id * BLOCK_N + tl.arange(0, BLOCK_N) + seed = SEEDS + col_id + tiles = tl.cdiv(M, BLOCK_M) + + # pointers starting point + grad_out_ptrs = GRAD_OUT + rows[:, None] * stride_grad + cols[None, :] + grad_in_ptrs = GRAD_IN + rows[:, None] * stride_grad + cols[None, :] + input_ptrs = INPUTS + rows[:, None] * stride_inputs + cols[None, :] + + # random binary masks, save registers + rand_offsets = tl.arange(0, SIZE_BLOCK) + rand1, rand2, rand3, rand4 = tl.randint4x(seed.to(tl.int32), rand_offsets) + threshold = ((p - 0.5) * 2147483648.).to(tl.int32) + + rand_mask1 = rand1 > threshold + rand_mask2 = rand2 > threshold + rand_mask3 = rand3 > threshold + rand_mask4 = rand4 > threshold + rand_mask = rand_mask1 + + # now go over the tiles + grad_bias = tl.zeros((BLOCK_N,), dtype=tl.float32) + col_mask = cols[None, :] < N + zero = 0.0 + p_scale = 1/(1-p) if p < 1. else 1. + + if meta["USE_BIAS"]: + b_ptrs = BIAS + cols[None, :] + bias = tl.load(b_ptrs, mask=col_mask, other=0.) + + i = 0 + + for _ in range(tiles): + block_mask = (rows[:, None] < M) & col_mask + grad_out = tl.load(grad_out_ptrs, mask=block_mask, other=0.) + + # optional: fused activation (while the data is in shared memory) + if meta["ACTIVATION_GRAD"]: + inputs = tl.load(input_ptrs, mask=block_mask, other=0.) + + # optionally apply a fused bias + if meta["USE_BIAS"]: + inputs += bias + + act_grad = meta["ACTIVATION_GRAD"](inputs).to(grad_out.dtype) + grad_out *= act_grad + + # randomly prune and scale + if p > 0.: + # generate all the random numbers for the block at once, then reshape + keep = tl.reshape(rand_mask, grad_out.shape) + + # prune and normalize in one go + output = tl.where( + keep, + (grad_out * p_scale).to(grad_out.dtype), + zero.to(grad_out.dtype) + ) + else: + output = grad_out + + # write-back + tl.store(grad_in_ptrs, output, mask=block_mask) + + # optionally accumulate the bias gradient + if TRAINABLE_BIAS: + grad_bias += tl.sum(output, axis=0) + + # Update the pointers + rows += BLOCK_M # needs to be updated for the mask to be correct + grad_out_ptrs += BLOCK_M * stride_grad + input_ptrs += BLOCK_M * stride_inputs + grad_in_ptrs += BLOCK_M * stride_grad + + # update the seed offset + rand_offsets += SIZE_BLOCK + + # cycle through the binary masks + if i == 0: + rand_mask = rand_mask2 + elif i == 1: + rand_mask = rand_mask3 + elif i == 2: + rand_mask = rand_mask4 + else: + rand_mask = rand_mask1 + + i = (i+1) % 4 + + if TRAINABLE_BIAS: + grad_bias_ptr = GRAD_BIAS + cols + tl.store(grad_bias_ptr, grad_bias, mask=cols < N) diff --git a/xformers/triton/k_fused_matmul_bw.py b/xformers/triton/k_fused_matmul_bw.py index 49d25f3c3..bfe322924 100644 --- a/xformers/triton/k_fused_matmul_bw.py +++ b/xformers/triton/k_fused_matmul_bw.py @@ -10,6 +10,8 @@ import triton import triton.language as tl +from xformers.triton.sum_strided import sum_2d_dim_0 + # fmt: off @triton.heuristics({ @@ -144,6 +146,6 @@ def grid(META): # The following ops can also be handled by triton grad_in = grad_out_ @ weight grad_weight = grad_out_.transpose(1, 0) @ inputs_ if trainable_weight else None - grad_bias = torch.sum(grad_out_, 0) if trainable_bias else None + grad_bias = sum_2d_dim_0(grad_out_) if trainable_bias else None return grad_in.reshape_as(inputs), grad_weight, grad_bias diff --git a/xformers/triton/k_sum.py b/xformers/triton/k_sum.py new file mode 100644 index 000000000..998d6f04d --- /dev/null +++ b/xformers/triton/k_sum.py @@ -0,0 +1,66 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. +# +# This source code is licensed under the BSD license found in the +# LICENSE file in the root directory of this source tree. + +import triton +import triton.language as tl + + +# fmt: off +@triton.autotune( + configs=[ + triton.Config({"BLOCK_M": 32, "BLOCK_N": 16}, num_stages=5, num_warps=1), + triton.Config({"BLOCK_M": 64, "BLOCK_N": 16}, num_stages=5, num_warps=2), + triton.Config({"BLOCK_M": 128, "BLOCK_N": 16}, num_stages=5, num_warps=2), + triton.Config({"BLOCK_M": 256, "BLOCK_N": 16}, num_stages=5, num_warps=2), + triton.Config({"BLOCK_M": 512, "BLOCK_N": 16}, num_stages=5, num_warps=2), + triton.Config({"BLOCK_M": 1024, "BLOCK_N": 8}, num_stages=5, num_warps=2), + triton.Config({"BLOCK_M": 2048, "BLOCK_N": 8}, num_stages=5, num_warps=2), + triton.Config({"BLOCK_M": 4096, "BLOCK_N": 8}, num_stages=4, num_warps=2), + ], + key=["M", "N", "is_fp16"], +) +@triton.jit +def k_sum_0( + Y, X, + stride_xm, + M, N, + is_fp16, + **meta, +): + # fmt: om + + """ + Sum a 2d tensor over the first (strided) dimension. + This extracts some speed through a parallel sum across the second dimension + """ + BLOCK_M = meta["BLOCK_M"] + BLOCK_N = meta["BLOCK_N"] + + # partial row indices. We'll reduce over this dimension + m = tl.arange(0, BLOCK_M) + + # To get some extra parallelization, we handle several columns in the same thread block + rn = tl.program_id(axis=0) * BLOCK_N + tl.arange(0, BLOCK_N) + + # the memory address of all the elements that we want to load can be computed as follows + x_ptrs = X + m[:, None] * stride_xm + rn[None, :] + x_sum = tl.zeros((BLOCK_N,), dtype=tl.float32) + + tiles = M // BLOCK_M + if M % BLOCK_M > 0: + tiles += 1 + + for _ in range(tiles): + # load input data; pad out-of-bounds elements with 0 + # NOTE: make sure to accumulate in fp32 to prevent a trivial overflow + mask = (m[:, None] < M) & (rn[None, :] < N) + x = tl.load(x_ptrs, mask=mask, other=0.0) + x_sum += tl.sum(x, 0) + + # move the load pointer + x_ptrs += BLOCK_M * stride_xm + m += BLOCK_M # update the mask check + + tl.store(Y + rn, x_sum, mask=rn < N) diff --git a/xformers/triton/sum_strided.py b/xformers/triton/sum_strided.py new file mode 100644 index 000000000..8d15bd5e7 --- /dev/null +++ b/xformers/triton/sum_strided.py @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. +# +# This source code is licensed under the BSD license found in the +# LICENSE file in the root directory of this source tree. + + +import torch +import triton + +from xformers.triton.k_sum import k_sum_0 + + +def sum_2d_dim_0(x: torch.Tensor): + """ + Sum a 2D tensor across the first dimension + """ + + out = torch.empty(x.shape[1], device=x.device, dtype=x.dtype) + + assert ( + x.ndim == 2 + ), "This is a very specific kernel, only for 2-dim tensors and summing along dim 0" + + assert ( + x.shape[0] >= 4 + ), "This is a very specific kernel, requires the reduction dimension to be bigger than 4" + + assert x.stride(1) == 1, ( + "We're expecting x to be contiguous along dim 1, and non contiguous along dim 0.\n" + " You would probably be better served with torch.sum()" + ) + + # Manually handle the scheduling + M, N = x.shape + + def grid(meta): + return (triton.cdiv(N, meta["BLOCK_N"]),) + + # fmt: off + k_sum_0[grid]( + out, x, + x.stride(0), + M, N, + x.dtype == torch.float16, + ) + # fmt: on + + return out