From 90ce8f159cd210ec87822710662ac6c3d0244946 Mon Sep 17 00:00:00 2001 From: Orhan Kislal Date: Tue, 21 Feb 2017 10:26:45 -0800 Subject: [PATCH 1/4] Graph: - Create generic graph validation and help message to standardize future graph algorithm development. - Expand the design document with more detail on the graph representation as well as the SSSP implementation. Closes #105 --- doc/design/figures/graph_example.pdf | Bin 0 -> 23083 bytes doc/design/modules/graph.tex | 97 ++++++++++++++++- .../postgres/modules/graph/graph_utils.py_in | 102 ++++++++++++++++++ .../postgres/modules/graph/graph_utils.sql_in | 0 src/ports/postgres/modules/graph/sssp.py_in | 60 ++--------- src/ports/postgres/modules/graph/sssp.sql_in | 1 - 6 files changed, 205 insertions(+), 55 deletions(-) create mode 100644 doc/design/figures/graph_example.pdf create mode 100644 src/ports/postgres/modules/graph/graph_utils.py_in create mode 100644 src/ports/postgres/modules/graph/graph_utils.sql_in diff --git a/doc/design/figures/graph_example.pdf b/doc/design/figures/graph_example.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fd29e5f15db831c47ab567bdeb7e2523de565900 GIT binary patch literal 23083 zcma%>1yG#Jw(rs4uE8O=5AN=63GNK;?iM7tyE}y71b26LcXxLl+57B!&a3=w9#J=WD#L z-fxZ#>sc%JKEf9^D4x!9r+H@9spqThZ4UiwEfr*6A4?Myl=mt!h2{B1Qoxw4h|hWP z%bIKhT5q)$kEIu{Mfa{VxtEJQ#sDH%4=ql*^(Xh=ESUW~>={H%%THu5hM#_Y!%U<8 zGMe+XaxpbCeQ1s=D{183@SeAywV@(Iv}&Z$e-y$(k=$J;4Ulr!(f4i8>1$EB;q{r% zShH@1yhk{v%Y7Vo)Lp;s-sIa=Uhy~e?Jv*KaUBX5C3Yd+L6}sgOjy6VvY9w!_Hc>u5W6`c8@RR&%!m2Ue`x{^Pedx z_jndC*plmB^piQIglkr9r-ARI@@2Ey8^PPAtF6x?}Nzzeaj}^-lX@Lk-X)3hTobseL29a5*M zc@i~SQr;|oo_ef5Pu}a@?RrsT?=jvlDja8x0^gJwWmbYxM*$UJ0{g3b6#$l~?<^1x z3y}fCT!QSlp?-^6MA%@f*@&zPj1=zZpKcdw`o(_?KWt_&ck`ndmN9f>^MQbqFME9n zU%1@L&>2{$8JbaH_atb`s|DV6KCeP$B|^-$MuSrQqAf7UvI3RBZ_J2%>q%w~p$p5o zp@{>;56$SXc;ObFt@X8rjgD z^gzihX58oW$o+;5PCF}uiXlgf5 zdZ5xv$^jiqZa%gQz;Wrqy5`QYyDzi5SK5AVV%9pwms&JBLqxKuH|2`PXhXPBXIM-! z2FC`puR})R1x&(05#{98(h8lxuha(LoA5D8FfmzFsvU2!+3F?)u@F5MKCRpoMp%v3 zEixf;Y>J{IToywe;j0npt<=I`PZmuqcZv0=HFjdN)t1${O_S@HYFfEPuv=>HD_gGA z0bdPUaTp!>xiagV-kK|8G#8^D#89>gkp#L2{8p_PScZ&TSqc=dm0Hx|*XpqhnFa%^ zl4a7T^%SW!Xr&ld#3C(0u~30bVoIn5Nd{jOkJo)9P@JBg;q`8IIPi6<3UkW}nDe zeIG5o+OTmi&4l|r+MBsTwXm0XnYSW&KD+LiBY{LD$mzE@5A{WgTw_I2Ur$T8 z%x#i1PD>(mkbXuI7{*0~7>=?CMn5uWgteDn*!0;!$w6nBX%MJR-W*MlVD%Fl|D`~K zAfVF-DfD|Hh~(=V8_|nTJ1gCPeB4jc){(&zbnBfq~G1|~^gY*HCn|ct!5$<+h;^Ji_PynlRRBu|L(OgB;z*R7qLyC5PW2Pz1xuvGmL zOio0h(Z^xqdkg{FT572-c)TQ%W}71whW%mTphyOXAeQjy)1C_KkF-KskEA1ex#G52S|=jESm+-?_> zrw$TM)r3hFUvbGiZx(FjgTBtW14&}_PZf1)gCa9cFyE|dbA=gxKVHMn~U z(lC6#x`~h^;9V15@RS-3A9+F?zm_Xt;eBEDqV_`m zxu(WI1yu-YE12JbNPIuaMiExkv<0#PsnLTm)8F?;zH9hPk;)qJ+0>{rlFqJISXETl zbW}LOKuo>^U-_{Se$3?p&K;2|>^iloqoQK3|J?|ejsw%yJZ*ss054&XdzxK6lGgpA zc2q`O*~TZ}6fa`lx3&7*!7(Gf4rhGQO@4LPbMK|%P&B=w@$)da_Dc9*Td#XzS|h2s z96$52i~88AqS+HlA9>mM-Q8wm58jLG4D$Pc@&bSKqH99tUF(G)c~<2RlzC-Ns*M8J8>wwr8FNE^~unsHR!e2QCc$j zQ^?o?Y`&keG;!=Y&=MNQzmhD{{|G%4!JXKIpg{A4b^Gx1PZw$=R2++pZDY2dj7-a!RPeqHudd zOudPqG9^aX>Vz-;60L4jG#y5u==xeJU>8yL6MaMZSVTPT$mM=AECcL*(wK$vifB;@ zBHoLSevoHYVLaWx7o+AuWI(O*<&yWltkCaAS%D&q4FzN9hf zT7sF^(|C|QAloJc0#&2{#Dq?4SjqYQ#$nTF-+g|c6co%sQ)L)XZ+9weZveEU%y==~j9qJyALYMoa zCw%&)TQ3HX78na0)1^CGVK4NIwjS~)1Q&Fu@&yq`&?Om&CRlmwZF!|vSK^LaytC>g zI`p#^*k&=d;I&Gj-V=Zn#WY;b(PDM##dw|kMVPx(KcpEO8R7$)lZtleVR!2GsCshM zh?EACw%N>_9!*j4Lrq5`TC>zzCpH5OV6Vk*X|s4F zc6JRiQZ(*uqfpv&oNKBdz5igR-g;sdnK$Y&gEb96-0Js3cs<)VPvjnK66RY?QUIRyrGB+P*Z#yZqb`Kc$=HQEuY0 zR%9DGgVBx7*}eUPf-{j(936m?$RU~(CNh;tEesQ7IrbD~Ml_@gOv~crazpA@#)Lw- zRq2w-_CpAvO_RB5r2bCX5fMUyI%d2cb`rCy{?6(0u=PDd;5;Yq%T{Dpj9*pF8}F7u zV`Ov$FDS9oX{BHS%mDP*{$1^PJg=)O!~WaP>1Mt*ExPLwU*wL#WG&R;SgyEIXciT; zP}*f96;)*?iBF+P{HGowAqR9!MFP_3lK>RTa?zo(1jl%*}|ei zpP1-`&_YS8p~kImv!+*S>ahej?=m*#O!mGH!`Jf=_q{a3ol)bhs`q`u{z@)x!bIZf z_gL9H>oQ2d*);1j_6h|eZyIl8-NK90$qTPNhZHaS%82IFO@%pA? zT3sh+UDyL2(@kiyJs6r8pL^eJ8DwcArFCW_*F>tgqp#MHCI(Ko+ST&u>!rkAxASgo z)?ry;aIv?(lyH(kKkUYL`SPk$G99@JSC{uo2kCa(^v(VK)Cb}?>x5sxN zXxW#tEsvDhvu#1150u5Cl3zn!lSZ`8!!C+5Xcff(+J+bn;$ZOmUQpEMiQQ+)%bLfo z%7Wjn-$8lFuUuh))<*wg<&UR7i2aAF|NJv^urvRu{qxN9ANZ}}W(y=@P|!E|>(?G= z?MTG@H;O9(9c-NJ4S^0sY=0LB+gLk()H@LU!S@eVmjxP`=?mGo5@|Ai6tJhaXRpETJYi7-%S8S9RHO5 z+mAn;{_*4Q`TpB5gQ5OM?Z4apW%Yk*4EWE+fd8=kuY(N0pXh!>NA4qz41#}7Gpzqi zfI$T4Y-R{l5*Pa44>>XtT~w48F@wij*;^-2Nr=d9aNJ>trnn_AAQVF+eHFlh0?3_3 zAjQy8sJpIFL{R9FlI?Kl6O^&q-7Oz~>`~jlCW_Yv0S}NIQUN}UKRx;U!NuDPZuvJAe6=e6G za(en9T?T{^Jw?r^I6Ch7zlKH4`+)#V%Q_6X(otkp~+4tM2Mt8A_qg8fo}7Z3$mNYOOvA>~8*&KPLjYf&{5u zOTQ)vveb#~*@CBl3p+WnbO6n=pr<)tf6nr^8EOyWyiXZEd;Z2ch*uz|LW3mKfDJE*@E34WYkqeyuVYPQ0PH|pi)mR*g+mmR``i8# zA)HpI^`?v#hMmiK$$MU9B112A0eHYdy z?ga>Kpl(+jw7>u~_@tN`H14H{P_!B;CGKdsQiIgg()=x#!oJ5b@=3@MghA)b;P zU0w=LRA^C-Q>szGO~6gKKnbDPdPePMa#Mzee5;6;lox||9COlj()M6kTxpC&!f}!Z znSA_wl1q{-y*Gphtv4`0GN9%z zUNr8KIi8`*^1kyDd{o&K;@rv z%1?{vCi(JLOL@x-i)++O^5km9(6nReHQ5F0wd$2!gyf|e)EIOt6wh<`6tl{*^4rDR zHGPIUCWw~w!UWSgi*rJA+VwN_^EM5?{ER7`q|VdG>AvPc)#H2OxYaZHH&h5u-Pzn8NLlg;XzR~Em^dlp*}`6hA@@~ z2ak1uHA9DOKWVqwfM*{=Z$uxi>uQ@b^L064TjTg?M0P%6-?_)CXLW&h@wMTZ21yzz zMm(%Pz9?=VXBh=7^ z{?q)kJOl^CFPH<^d5x_Z01K+qY8iU{{KOGFJ!4Ty(DU`k`Uhz*MOiq4A5ih779iApD_ z(W=+%h=pr}6BY7e65?WbkUHC!+)u4a2XCKj!ET??GixQ*oGjV@-Ypp(FOSykRJWrh zz$L_AM}7}M>nhomDZkNeblHWNk82Koj@|F0*^ju?egMA2KqLiJ2TY12^=O9}N|sAv zNN$743;y14s@A>o+bM~Uj?bfFE}1MDH4CVHsGSxf(+{nr(PrkQXl3YT$zv358Zw(b z`-+w;Fc~95>7Z`hoE-Tkb1M-}nnw0r9j?Z_dd)-}m^Gw0g0gqBle%L)j6Zli`0UMU z%yKQOs7X zY|}HXIp6COY7|o3Y^591M6~R+#IQVFHLfW$Urr*?nY=X^-U{84CtHa0eOw*6 z3pi~pxGQ*e*>Hw-Sy?@7x>iduP}fUIQOWj-NZ-NN90uEGG<&nP`seOE6Qn>duE=!m_44YD6IEDBvKZR5Z5 zd(C+^yy=fQJduBpPsobkckzUIxpF19GTvLi54e8*@4Ejl8TyylX9F<(Bgp?K>px{5 zB3x2fSV-RiXha0~qhXbZwEt=INB#fb`u7hUIQ%!c|1khNC(~aF-swYq1Be);&5RsA z^!?xZSNlI@@_)A}tna9AX=Cy?c{u!o9N&QU4rVsiL<|6WmjCGgN7Db_F8w9=8I+w2 z9RI?XKcb%T0~f46?hDPVKded`eSnIYqZ^&XAK~p__yHvP){cLAeRv4?i(j;18U6&| zZ%fi@>XITdG{zqd82|MF09k>o?Cc+ARcyXmoBeeY3HbY#@;_+dAH4dHn@qsJaOr=$ z%G8ctmg-?d4m!U^cWBh1HFHr!k5tPNp0j-nNVm^#98{Hav_2<5jwJ!F9Tf2AAq2kEN)-GYdX@nd9JeOFa8qmNttzlQtE zk&kTnSh9~0{LSqD6`ucO^M4UE)8AnKuQvk#M9dtl9DgGHzq}f7+6v>UB09(K`SWZ_ zdP>qnSCT6rpkUD`iZS00B6>iMQC|-lHChxB;S5$WhAcNX5ta&viTZy-6IpMjo-WujVa=<~zLc8g6pKDpDF7rh}zJcrZjRzZj1ue{&pvr=OojXOhN>q|Mrj>xO%E4-uPmi40W-1?UM)?|pI@ygv4V4c^02!+D%6jHTSZ;;raLyO@O z>e1_cnfjoG7>l-}pk4SXNfq-!{5zn}pU_(5 z^gQRc=^`SRfX&u5pBE)O3Yo#hWeA=kb$Pc*PJ^r^GH$P7E@D9oIUl<251U?>t$b2X znG|9NBTY~H-Q6qdM-R7l9^Y|`D~rdx#N6MrFB&#hzTe76h07AnX)YHFy|Rmrp&a2} zSGv`JJf@Z`1nim2k!h2WY3VyJ$KyVh2QwEsKZJ&bN`-#sa2 zXx-K0#K~G8{CvgKsI*3QyrcgZw>Obnt41ezoR2VFXv$m*xx~6eO;`N-)kF1J;aTVT zGoiH0@uIsXukQVE_f5$Y#*@Y?^DDDq^=0T~$RQg8ASwBFk00L)4;T;=%oiu9b6VjF)e-!~ro*(||j9nmq>v`K>;9%C`( zG9q;WQk|KmWA9T8cWDn@j+@(?x-(3wtzu6VTyXk_IKBzWJiJqHcco^T6@g_VK+JQ# z1LT8i`~pNHqu>ql`yB1m4yCQ4?&+7Nl)WLJ3B{3Z;0=^wiLOY!vb-IymsphEH%AtF zR_+u@gNpRB`2h$;aU~!4G8*L50AtcA1AwTeAwitY6o#XK%e2Q5TJbCBV?LV_zjZiT z4I{X9Hko6J(-&>S_``~@FbOm?DtUgeL+92x;c-xHh!UpHOjg{+_i_V%f=dz)`Th=L zP`dF#LccmqSa$*t5mO^Ly+QLAN>xncqHi5zP`(g3{kTyT+BS{7g`@QN$Yf zUNCn%VE4sK$YnyzPWpf=RbD-r^E~7_BIn8%Fmps=v6E)sqBZO3Il1gclF-((FBv`} zV2Cw);8S#|Vp>*-=-13G(6uIwH3j{IUH0;c41WGjVSIwJu@~p(sJO>^QPo`Dw>sNt z+G7J8+3|^P#P5ZiFWB4JuK1^|k;Vgdb6{3trfg4}h&MtYtxk|T<5wNgQ!wAb0>OQl zv)>ybU!U&IW^4L6SDT$qrxw>s41@0;-A?V7_a3TimW+(^iwX)!Dagos`rQl-c_tOD zlcx1?n#duYK5l(9HsSVEA#Pt*HoKcExm|F03Ry3Up`SKbYu(bbqo!qr6>5?}d(sz4 zjEV4=C$N=Wy}(TclE&J$v~+EK6w51gWN(EkA3~K) zFdqGLyHxS;+;@MaZiVL3XY?jg-bb$xGp}fKO=G&C|5B`kRJPx@L7@TXfaXZGbDElyfs(J5p_o0xH?cjJi zE1cAvS@fc@JG-6K)PlB+rYfN9l<|%&;_mJMxEXM5A~ZFJ(j8Llr%?-1!Jy{Qu>Te& z`r&q>ZE4qOT8!ohABKacpz7JiiaM(UH6zO*t00bxtelm*yI!zUz*LwQE838~gMZ-X z#@*>qrg~DP(FTjULP$@uQQ;=XwXFy^my^Wgg&$-~(=$xq83c&RY#132Q< z0oo|uZhgi~w`I8J*8T<2h|kREcXqPz@^^2aN@WUTlz!xJsv{1CMh}-mPhjiFXvX=i zU-q*p{FAfBW8>yUNBtbsfe?rPtl@B%HKBI0qO_m&+g0|jjc8tH2_>pvm1W|H zK^ptcL8qQ0bES!t^#{UZSvkr~Cu;@Lcd3hTTi3ydmDsAdAG1>@V)h4h{47MShe>sQ75<|}y(e5E*Zu|I;(W-1&Qw!q zY+7z(Dl0l#>t6cQrIp2AZyf9?$EV8Njy**`!f`t{rpfFttQw!!Zye$P;3Y*^i=|17 zSVcu1?vtlp-t5)UUks?$pxTKJ?cCK|BK~C=97UP!N%mY#A%0o)T?zt6ohF0YyDt(% z@i**jLa@Dj1y=q`SIh>5se3E@v)idFs<-z4@ zde-Bzad5j6Hl@YWi|UllHbqOge*b3?qQ&jcTB&B$pK(VEyNCBMZ-xV^eYtt1*->kF zGSRV6=X?8xlKR zYR9$}6K%lE6csCbQ9{|J2XXO7$)8z`-KfX8rjzzYA@U3da^K*I)8Ko+tL9=lC|9+p zlSJvlR&fm*@Dxj`IW}MNQOtyy$Vp}7Qq$S!4a*I!XKn(*;`R!^8fks0V5$gjVskEO zT&fysX1}F#Tj0UFb+&Xj?4Fc#T8X2&ovU|RajSgcjvmN}SK5W$*||G9L*2P*B3*J2 z>Q&H=9|UO8)BxZm@Ugljx6@BPtBNM$gA&F*Wm`xn0R{#EmF^^;%0}ED=TzG7JskJY z-xZTBGwf5U#kju7G{S2(dB0HjUfK7+zQ?R*ZA6?;*NEHOd&a7!Hd-z3Q@F-?+#R6p zkB(a%GfMBbRhHbu(dlIEPNT4cIIoprJu9SC$h`s!GzXQ}s%U&7aSLvl zFX7LD!%7L7(F*#JXpp7p6qx1_6M*zMuPJ`iqQj`vdJ80yt5PjAsKD;k!M+y2VU8Ll z4%M8>Ly+;+`Q%~&zYl~ayvv=E2wS51^h1+!mVf9O@WilEup(;h(K&+^k#>;^Wmv|gC}7~GZ_8fpe6>6)vL2y(6Jpc#txiQr z8PL*m^lVXq{G?T7R~b0WiTWc{KDt0Xu>9(4A)h$g~>GpWCzqzlaJ z3HNnWyU*GoX&bn|Ce!aWxjTtwuqUS4r-kcd)phk?vc)g?PP*;tesfk_gk}`PKE2$R zw#)h+-u(%xumWu+5Jhg>-Xo*1k%5^Pz|6dwl2Q(HC^J68qrT?;I59VAM^dhnZ@rC) ztm;-8J!$Qc+l880{IMmo^HZ9scM%vF3VJ==oCT_za4p?$Ql-R0k9D=5Jv3Ix3_fqC8seU}o)td#Mh5u2pGKSq z^FOTw4XUTHBjGT5jdx-3rEuP#JzJn6wmk0cRm+a^)LP=O{q!f-$T}L_;B7BnE-#=a z9`6Qfc3EO#t}B|AfM&9lq>#E=Kjdv9<2$UX(HYdNoJC5ir?`LXm&wbcm`BSt2Q7Zt|9d|-?M|*kD zYlqdNosttF-L}9@QN{EKJI)#+&BGcp%uI0CfRGi$8RFV)A9|j3Pi=vLF%R0+GPT^I;BkDQJv#FBQff@-b zP5LvBS~uj>Q}A)kGbAz7HC1`H;v`ms;c2$8mDoAdn}dA??T2~%Bn|g`STo7WCjZZe zFCecY^ZydFpM^@_j(tbNuf>M+w@BDm44$FPUqClXLQZ6v;rI zLIXUqF8#nL8rrc0GfF#a>S+wj(;-I=&O+rB#NW1@35Nm@S6DTPehL+}i6m%z!I z+<6gx%vSa#4qfi`YY*+P{+mKmWe4mt%2B`DiS5Or!;sWM3xe25g4LYU*k)TU%HAg5 zG)H_z(=5%3B<3tOc$aKhX9k(Oi!M+&3yf;?M4Nc`&2lU+a02mcD{BUECpnw`A+^Cc z;-R>OuzW13tWWnS37-<9>X{h^tN^zLfjH+CZU}p~_Yh)8LYvAJ8f+~TpFH`A%6*|4 zv-QkW+Y#N6adE>ogRI-X^UQhtzJx?RhXPmRg{!U8hGl7$O#@9A(0}Gk-EA~G) z0zgjsz4#y0rDSWva`@{4LaSK;14y!!^#`BlnW@<2!qDu$b{jl)tTME!U>(sF^8K=U zckAiSxkU!vvy1>aJD>C;FQI`US>P}*<{aAzp9=zll$3H^vE^Z61Yn!d;t@Yr$jBZ> z;i=M@s_9qJUrr@v$M?Dd#q_5HXLlqlHy32{ClFB&tdNjG2Ey%3$jE?w*Ql5nrN`A6Jg(}@}(M+^PT=p@AQ(s4?Vs9*#W@^x{j9$zf1_t0UIRh!*;<~xi>hixH*hcX;gzHtXdIF zKc$7T8#r-XbXh|QQ<|z)Wp1}&{V+RjCPC}N7;jhOMje`u z8S?B}s;2HjWudd9e)Y>fS`H0GaNGOxu*)2Qeh3rlOG1ECF@jU<5@PXse?mvZ_s~LG zK1~Q=IN0H>J2u>1wrWK=SD)Mw>|Pmawi*;R00o$J9cRlVc7dExQrlq zy0-o5;`M7srEVH(Aa>n26P7YvYm{1k@fQ|j&_Skj6D($)@HebK=ZN%2(2%HIIA{`!*!6&g3KO0lweBTYX z;-m(F8;xnYYtzz2gzB-Q>vttRN+Dj;Tdm4WVrd@p$HH3*YPD+!(}Q79trSTtZK1B% zqOBleU@>I5c$jYCzA8U~ADm-|jxI2EN<JzeUGXJU*GiZL2UOrF1*I+Q1$`+vC;dRBV>sT?! zKv{5I{yN;LE8wuvZHN`!Dglnq$XUytdakX6e3xQ~|DM@cT)z2p9@7W;>A`O|%f`Dt z=lozEVz=(v0}KPoC^+RNzPZ@q=NmE@-Tm{lBWtK-f6a-jw{>-lk&DsAX^zTEqW z>gIsum(^gy{q@9)zz7^c&bBU>6BZ^CCd+EUg!W`6wQSr3kQHv<$l0kh~A^8&(MUchcO=ofiB{N2|~7dRb9sAGRfJE?;_cKWo~32ur((ox5q zyO5}JZrSBJ635zn`4hy`LLa($pQVbPP5@f9=QHXeHeO9o=Ds^V5;_>#tam=YR_#eu&}aY8CsD*h^HJr-XFd47F%29C2tw)23%XGd0T079mzZ zVc%3(rz&8?w@wo4I>4mMUFHJ!^DRwfe?rOlM62^1rU1{inn|j-rhhYa*XWK9 zqB<-Q=g;g~0(+|}x!6*MMldLuoNd1e!b)_O1u8|pLhPZ2T`Bv>?rTIIq(rAr71;>M z6KBF}zwAd{S1Z5jojcE_RIc%TZ+Mk>=;Uo&L?i5BFRuz{ay{+!J=*O#(gC zK7D>eEd@5_P@7X49-=~?I7QkRQf^{uMVZYnnDph1yaCGZY^U~zE^G@DJP0}1A;@zC zL?LKHRDn{@YNuIDzvr0vBz5Y}NTFMwNGa%U8Iw+7N{7Y*`vKvkSdA~e-!pOkav9p~ zF2{gHLSx9c?sxA1`QUYp38semZobclP2wl?{Z_7pdL)e?Xm9v2<7FWAjjeaIx09v6 zkR}rCcR0b+{F(ZE5h10JDL@?=D^*%#hI@wRJ_UTA8m081dIlFo;lg`d@xKttw}5I- z)0Y?Lm6=c@f;u3|$_7i3825D+erABK$F=fFG0us^73j7S(u{_hKTLw2uY%Bz5J>R> zvj_U!!W6^lJFhl)%i1xo$l7Jj#r@o5#~J|X8+vsN@#cfMnQ!f*$Om+|t;nB)wolTS zp|5~>@vRl-v7USqqgGKzczUgs;}(2Hb}nm#JuMdTG2EtUFS5R}I5OCqB)L#UxoSsI z3*+HeeNm7^Q}9;pIWO_abv1=qp;4FvG-tWCY__3nb0HeuGoJ+9Z2ReBCz)>;xqztzO@>Q{iwD7|Y+VZr)D}fYLbEc(ohI7QO*_x@CHewk#z?4Z3W&4HPt}xNOiWK55T} z-sgxw_UN)pVM-0MBY2PSnXY{=(I#Lc2p;iVd79!dQa2sf=AppT4LS22FTALL{OKnr z*RFP&PIH9M;m;Z(2XcJ_l3@W+%s@{LdrLatHUjyOghM!Ceafa$PFTH$KC429G>IyX zD%;KLLz_f~G(8GGM(F5`#%JE=z=rfYpRK(veCEX6o|WTUBXM{SQO}SN z#TPVQHJ^C5T!J%}_nESCMf)Vfxs8aPFL_iPkY7Wb$7g9K_AN{ypNM5OLm#1i^qB5L zaNu%Pbg=Z~=g<5`wpkC^n$$gTpTLSvCGE^xDf8hJ?puc{U>}J_0XB;FIUkH#hPP)KpxvvtkB9%NGFAl8B z2A?h_XR0N3v1O6RY|cGiwQKdP2B`7LhmwVK>H5xdwiR|~fyVkS8^E58%gE}k#JEGS z6NJ#>@aDJ~9Dsay!iGMRArgcvc~**$=qprw=dOkBgu4|49XKJ9{n~f$7RYfRO08*^ z1HXvc=;xtUWaqw6#(2Wg^dB_#EDWn1Z`Bm7kzUH^V@7TWt2yfuHMb!%!rV{}?2%0e zKHXZcF=x{TWoelbAdz3q6w*;-C z-Pl%CQbQ|PDyAo+s9VQg%5b!#Cf^)cRFY**aSypPcsg;uy-dZ!EA>q$IjB#H2NdR4 zoXsRM&%9H_g6XM9JEE1}rtGS_jUDgA*K=8UaQ3dX&o|>8%KGe%^=qzAW$df|Em@GO zL(gDAcrsqeT{d);IIg{j7E+BZg=f=?5%3l!%07m;b2tL&mChe2T&3NP;_ha%!9FMV z$<+>`TT3J)l7>pr0_mML-(r)Ei7~5AkvrlSp~l|hdc$Plnd}o_qBi2r)tCGoY6Gz2 zD&r*sz!JZBdRd>@_aUM0b3tJYyq;>$$!A1T4Ckqgs~wAaz<}UvG8gZ zh4ym(?TxMvsl`7S|HE>mg|y4C9dDYZbwP=&{`{^AtSOu6)hbNS~1o6#)@Px8= z*nW5kX!Xv%b;lQx9GGmMV8U+?3gR8$9(XVGU|zllJ1&*Fi0{JsYD(kTPAIKR2;R$I z%!p%Eytlg#<=+AiC|cq!r(K_6{ue2vj1XaJhB@LJW4jeWwf=X<4 zVR*prRWJIR2@!ym6%evur|_g8mlExvolif?*x%15_dL`OY)9|vOdNzwQX<`rzn?+0 zuB~yu%+C;v*C-rBj^6d$J!D1X;+r#EN#m^T8;;))-?U|GpC|674@O+Vvj*yzvkDZ2 z8}p_*sn2C4ykX}I?B`SxgxQCn+P&pc9_<>uCODZK{OrwM87nBv8H1NntblC@3;*Fg zK`BkF+6$ur=js;@odLIapJ!>)3d@Nec6-Zx5*YHe-mzH}bZS{y`<6}raWF^tHTE6O%X@sC zR_XYjb)$2=BYXL_iCSJ-`83`9>HC~j#WkA~LOub!c*oM9X-QHw;)DzmM_4oP>0Oik z7*F;#@_lJlh(RBJ*U@xF_~Zi9-7}9bE|M7pmu@eE_x(+I$OFy`cMm9XUw^DuWOHcK zJXzE$3(7eGdKEE0fe`&=;=POwWR9{0R5&MaEUP5gf%(?g1J~Dy*%7@qwp)M@z5eo% zZ4D%ULe|#9&b!;L>WoUIZs%P0!UnDl_^NdnJOSD$YfpBbP>ef4_yejABx#|g(?cTu zwXo;su!pu+>}|#y^@I=67L@@Y&#PZ#Xylap{;6eNj> z=eO@}Wue_J7{ZU9nD%c<=@lC%!|tF=uc*8tRgtDnQTEJp6P5v`4qU$qB-N0V@cH-{ zs*YkZ$M>m6Y;x|kKwA5IEue;aQ_vZ)?8L7Z(dAG>qmia*e7vX zuZ6VF_kj@=f!{Yql5pGxU)Iwo9P2uLR+;mAFQM>LM^l7st{=IHj8YMe;&kjw^po9ea+WrqsbXwjon_KDo?8i=3!;DQ?&YTg!%Uh zP;_zDb=q`Uv}^DKr~R~yAjR8?T&o)wGS(RGLy|Y{&x!J-g>5^nygpR?3_(6Cf!GvW zHq&P5Ib~Q*-G|~*1!_xR5(6mbZn`r)sNs$U)zp+zAC5>C3GyE=ZMe|xm113Q)u$hs z49+4qd-P)9B)q`7t%}d8lJ4<|z2eLH4x1}m-C?!dQ@HQ&uSL|B-9dY+$67023S#0U zz)K@VWai5;Y=lGJ&N0x>56`6 zqU$0fR(xsN)2ZO1Upm52S?%F5e;Or;n#k~Crp{ZwCnj8VB9kd~-6j@MR1;guKI(Rc zK5AN~YfAoHoqWT1RE_46B&zwwfbwPk_3ZK1ODV<))7CV%$_;#hKifPW+8WntyWw%IkYWa>vKR9*z6TRd^|C>qgvU^uh zO&>J1bBoL_4WhCgmRN3vD#wh#7flzGr!dN{b@}jR2WnU0g>a=-qPL%A*RiA?vX>;#wf z@$Q%}xYMq}9Is;6YSy+X7M)iJ#y8EQ42dRj+Z5TCEtnm;Legg&QaQb|Rz$&>Rgjk1 z2DD}RsbQOpz@|0Ic|vLNk;45&VEKKwz-#;9X#rQ@0>U*W{9Y%CCOYMY>>dQ)oW3kIPECz?Jr68j7R;DqUQ;C zsP}nQ!s(T!3MJAlU&Lp0t)3!)LUG-{>q|}SAaKLKY*fDN_4OvqW5y=ErH6heIIm;m z$sJ~Hh$M(nk@2H2U-y2CLZ;ka*oI*|0*?DPdv$VO6%|0T@IyarLLAZOIMoo(RS zWgj7&zHW9q>|A|3HcK-!cCwD>8MmlFZ}Jda`Oq1$G84cPFr zWHdT)vo$?YOLbvtsstvDt-I-l}5&FRjnZ%W*8_?nX0(ONYkJ2Cz+3UxVjs}#GiwA?eBwIQiZafrio z)1OLQ95Sn)t*@i|PFu_4E(qSd| zSiql~_F4M77+#SO67CCx%|APBjD7``o?!+VNKP+K?I>X_OLbsMv!quRPVO$D!E^dK z_n8Ggx3wC#%iK1eZ9oib&B_@mbLSeS6jKmWi;M!L--RJ%osU|Ks8cCacsQ@U=S(PO zsaNXtWk^U5X@;A5L%dHb4zdl8*-FW>e11m-X?;lm_1>A?e3IrI<^~n_t5TxM&Bco)kAt)ZA{K?o#|CJGFO>oYmih81W7Z2 z_HCjniaQkWc!#~JGb)PlP40NXZ-%aZ+OI#Plh*2O?A`B8>|FXDU8klHdTt!HGj0_y z_?W=&1a8Gh?^%h@JSjhZQ%DPjG<;H(yy`F9=UBx>Y=;-04Pf3k{o_y)u+WgkaE37f zKm-`}$$dUKzc(k`43q|M9y8pvaB-<$Z=*8^qT+qGJ2sa0$Dj8Ew;0>xU-!9ILtZ|H z2jf-b9HCp66?{0CnWNQiMGR|%P6jJ@g%j7Q@C>}>Ev2)Snk`aK`m~8i1?8s{;%(gD z`BLr$Jiz-3>F~A=0d`u4w*wv#K?T>q6uqW{CY8jF_HD{&FV z|7q~*xhK68v)F17F%R4&WrTAb4ENgyOl|if1Jbzs5GY7v()GC~6K`y}1*=faB&ss; zE|5~7j-Jw!fQR78qGp`uW)g^gaAFDso^&Z}l-tf1awa(ToRl3n5)HZfZHc?!ClB4~+Q5V-P0QYX_$elx7OPb9hPLuW{Os+%+^V5CFAQ zmyCV022-soF@K{cMaXnCLWWCND=u%|X200x<0F{fnT*$|dCz2se{F1pDRI5jf!V*+ zNSZEq&w{xT8y&l?ynTi;aIp9}|784z?jfvP!+0=O(7ice$4y7cQ-S^qTgGyx+R@@!>}hp>RTCqRcA!6xzMt?qR;Y&12ah^!MV|2XZeuBSSTmtYG%&oOOzG_K@gI2$U( zFuBGUiWuYE@MnoBe1Nz1SyNN4rRH^1J7=yYj`mJn2smWg&*fS9PU-?I!V!Cv>zu~I z{DirqTW9um31 z)yJKj@I#JzBLynx^Ri5r%|a3=7Em-hgZ;RdY)%l)JVw%XfTdMe z-peS<9F$`=+@-EjR=#``w2EC&;lXgqb_fEE;{L@f{* z{dyM}U!f8sgYNz?H_lkhj2DQrF9TA63Us`b$inF7#*vf0kO9=K%cIf*QUNF{boZa@-6WA_v){0RKd0K8Q4dZ4{2kEwR zZg^G)muz0vy1_<$9>oeepf1}hT-(AbRPq$LF=)&%N{fk(jb=SI(Yh0(>TKVEW{csR z`4o4p>D<=hU}7Z%#B%1G^6Q+S@gO1PW;}pISyz5cN0jSQn^LtvxKhZ{CT`Iwt

l zCKjWft=RL|9o!_Lf9gVo|O;I+LG#<$~7}FJ#$^LyrSF( zZ$0FYR}Qen40Xm->>rM-(0aBd=c=k?3ITPtRQ-y&RSylCRr4tp078y?GT@9eM)U=M zV2VnnH=G;O*iFB!qIyd|tu9Lj3N%HIUm-`!KqsJ_6jBNl>@=M3ObCWdN}gmcn$O>g zsN*-+^N_UTXe4&LxLaCcug%s-Z~rj^)$2@$m@73MYr&X?%xv6OriIudGFZ8oku={a zkX5D?g@s*Dt_juHrN^_DKCjc>IW;=I=e}WsrXn_p)0&I{4(ehI%jn`HFKX^M^RJHh4m}^3PDZB zKPwl)mmescf0X&Bt{&#`a!~F>Sb8GI-rZ&uGdM<-PhW5S^w;F8p3ic7IKTR2R;T9f zKCXMim~)gn*e$+^=?~a;o~~C+a)+`&q3LT0lWb5`YWpyc5UF# zQDGy@BGhlFB7@LDdHL>4`f<`AA;V2k{bFY*s9yQIfsP^R17>E) zpDtjvpY8!BZUcuNQ+?+2$vE1$AiDZpBT4g3t;p%g=GcqA1Gbu1N`^-ddq(81GUbe> z9ibQ6M$D}TZy71$zCAngGOv}mTMBBxZ?261;V&@jYX<9~I5>$wWu2pSbv$lF61%O$ zPEe;V$m>S6TVr>blbTBKw)sKa(?OH^4eXD)Aup6gv9eniK7WMDYlkU(z;N0gOJG%G z4O}!!6>H@4r$DCiuS&ph{a01SVoiN@1S^A#QuMQ|E~(KkZC{$cjKs~h-!HpoE&E66 zre+bmm3z%YK2@_nPku9gtmn${V-sc6lQ!axZ_>tv@x5q}4N>Sg)As~{N z@vQynX2{hWt~d{|s$R~+06*ti*O*i`X*Xb`g&+MhBp2^0UNX8Ix!XDMkS1`Q+J69EcOt~FjB#yiob(YHzTDB;y z9=25=@+Se1bUa|Yd#OQRaJDlzc{Bltecy6d7xl+S>_?Dt*Tp*aw#dl)j{;$$l4Gx! z*(Nz+(1A`CIzB!(Y7=v3;J-!->7A|`h%{DIyQpdBf_);vaNQ6`~&G2=JvP3-aay&2MX?c>ubok`f`F6(j z%5M#G=B`)2vL~1*)QB9KhbenstPBq4cibOYFAIEho0kcBA-|Cbcup-JElJPuBvr4X zWje^^s*c=YPx(=1X2eRNq9yL#hQbShyC&!>yFB#)5{BnzBz;uB(oZbozrUS-SI8eL z$f2!Y#nB~}#CMKC2&slZrs-9ju4a?`Lx0}R;@L)|m^{Kcykr*PLW=2K72{!i_h^{k zznD)=ze`M+^mbK3N6FC@BYK(emCE}12~ZbqIrlrN_>)Kbf#ylUW#tfmlWVZQwO>Ru zEYW;gx_m}tu1E&@s>8GrRj{ho_JkV(p4wCPkLmVPfLYhxm)sj>3z_5ot4V&c zvj0NEC2w3IW8DhYmtfY?);7{M2)L~^)D{M_lCiadNyDVzR%r49REgpL4k0T(k=xDP WgZzK|Y&P;GrDYfd1+QwUGyD&<$3Ue3 literal 0 HcmV?d00001 diff --git a/doc/design/modules/graph.tex b/doc/design/modules/graph.tex index 758f407c7..15d23d5ee 100644 --- a/doc/design/modules/graph.tex +++ b/doc/design/modules/graph.tex @@ -25,6 +25,7 @@ \item[History] \begin{modulehistory} \item[v0.1] Initial version, SSSP only. + \item[v0.2] Graph Framework, SSSP implementation details. \end{modulehistory} \end{moduleinfo} @@ -33,9 +34,53 @@ This module implements various graph algorithms that are used in a number of applications such as social networks, telecommunications and road networks. -% \section{Graph Representation} \label{sec:graph:rep} +\section{Graph Framework} \label{sec:graph:fw} + +MADlib graph representation depends on two structures, a \emph{vertex} table and an \emph{edge} table. The vertex table has to have a column of vertex ids. The edge table has to have 2 columns: source vertex id, destination vertex id. For most algorithms an edge weight column is required as well. The representation assumes a directed graph, an edge from $x$ to $y$ does \emph{not} guarantee the existence of an edge from $y$ to $x$. Both of the tables may have additional columns as required. Multi-edges (multiple edges from a vertex to the same destination) and loops (edge from a vertex to itself) are allowed. For ideal performance, vertex and edge tables should be distributed on vertex id and source id respectively. This representation does not impose any ordering of vertices or edges. An example graph is given in Figure~\ref{sssp:example} and its representative tables are given in Table~\ref{sssp:rep}. + +\begin{figure}[h] + \centering + \includegraphics[width=0.9\textwidth]{figures/graph_example.pdf} +\caption{A sample graph} +\label{sssp:example} +\end{figure} + +\begin{table} + \begin{tabular}{| c | } + \hline + vid \\ \hline + 1 \\ \hline + 2 \\ \hline + 3 \\ \hline + 4 \\ \hline + 5 \\ \hline + 6 \\ \hline + 7 \\ \hline + 8 \\ + \hline + \end{tabular} + \quad + \begin{tabular}{| c | c | c |} + \hline + src & dest & weight \\ \hline + 0 & 1 & 1 \\ \hline + 0 & 2 & 1 \\ \hline + 0 & 4 & 10 \\ \hline + 1 & 2 & 2 \\ \hline + 1 & 3 & 10 \\ \hline + 1 & 5 & 1 \\ \hline + 2 & 3 & 1 \\ \hline + 2 & 5 & 1 \\ \hline + 2 & 6 & 3 \\ \hline + 3 & 0 & 1 \\ \hline + 5 & 6 & 1 \\ \hline + 6 & 7 & 1 \\ + \hline + \end{tabular} + \caption{Graph representation of vertices (left) and edges(right) in the database} + \label{sssp:rep} +\end{table} -% Our graph representation depends on two structures, a \emph{vertex} table and an \emph{edge} table. \section{Single Source Shortest Path} \label{sec:graph:sssp} @@ -49,7 +94,7 @@ \section{Single Source Shortest Path} \label{sec:graph:sssp} \begin{algorithm}[SSSP$(V,E,start)$] \label{alg:sssp} -\alginput{Vertex set $V$, edge set $E$, starting vertex $start$} +\alginput{Vertex set $v$, edge set $E$, starting vertex $start$} \algoutput{Distance and parent set for every vertex $cur$} \begin{algorithmic}[1] \State $toupdate(0) \set (start,0,start)$ @@ -86,3 +131,49 @@ \section{Single Source Shortest Path} \label{sec:graph:sssp} This is not a 1-to-1 pseudocode for the implementation since we don't compare the `toupdate` table records one by one but calculate the overall minimum. In addition, the comparison with `cur` values take place earlier to reduce the number of tuples in the `toupdate` table. +\subsection{Implementation Details} + +In this section, we discuss the MADlib implementation of the SSSP algorithm in depth. + +\begin{algorithm}[SSSP$(V,E,start)$] \label{alg:sssp:high} +\begin{algorithmic}[1] + \Repeat + \State Find Updates + \State Apply updates to the output table + \Until {There are no updates} +\end{algorithmic} +\end{algorithm} + +The implementation consists of two SQL blocks that are called sequentially inside a loop. We will follow the example graph at Figure~\ref{sssp:example} with the starting point as $v_0$. The very first update on the output table is the source vertex. Its weight is $0$ and its parent is itself ($v_0$). After this initialization step, the loop starts with Find Updates (the individual updates will be represented with format). Looking at the example, it is clear that the updates should be <1,1,0>, <2,1,0> and <4,10,0>. We will assume this iteration is already completed and look how the next iteration of the algorithm works to explain the implementation details. + +\begin{algorithm}[Find Updates$(E,old\_update,out\_table)$] +\label{alg:sssp:findu} +\begin{lstlisting} +INSERT INTO new_update + SELECT DISTINCT ON (y.id) y.id AS id, + y.val AS val, + y.parent AS parent + FROM out_table INNER JOIN ( + SELECT edge_table.dest AS id, x.val AS val, old_update.id AS parent + FROM old_update + INNER JOIN edge_table + ON (edge_table.src = old_update.id) + INNER JOIN ( + SELECT edge_table.dest AS id, + min(old_update.val + edge_table.weight) AS val + FROM old_update INNER JOIN + edge_table AS edge_table ON + (edge_table.src=old_update.id) + GROUP BY edge_table.dest + ) x + ON (edge_table.dest = x.id) + WHERE ABS(old_update.val + edge_table.weight - x.val) < EPSILON + ) AS y ON (y.id = out_table.vertex_id) + WHERE y.val $v_3$ edge since that gives the lowest possible path value. Please note that we are aggregating the rows using the $min$ operator for each destination vertex and we are unable to return the source vertex at the same time. This means we know the value of $v_3$ should be $2$ but we cannot know its parent ($v_2$) at the same time. To solve this limitation, we combine the result with edge and $old\_update$ tables (lines 7-10) and get the rows that has the same minimum value. At this point, we would have to tackle the problem of tie-breaking. Vertex $v_5$ has two paths leading into: <5,2,1> and <5,2,2>. The inner subquery will return <5,2> and it will match both of these edges. However, it is redundant to keep both of them in the update list as that would require updating the same vertex multiple times in a given iteration. By using the DISTINCT clause at line 2, we allow the underlying system to accept only a single one of them. Finally, we want to make sure these updates are actually leading us to shortest paths. Line 21 ensures that the values stored in the $out\_table$ does not increase and the solution does not regress throughout the iterations. + +Applying updates is straightforward as the values and the associated parent values are replaced using the $new\_update$ table. After this operation is completed the $new\_update$ table becomes $old\_update$ for the next iteration of the algorithm. + diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in b/src/ports/postgres/modules/graph/graph_utils.py_in new file mode 100644 index 000000000..2d8c4bc82 --- /dev/null +++ b/src/ports/postgres/modules/graph/graph_utils.py_in @@ -0,0 +1,102 @@ +# coding=utf-8 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Graph Methods + +# Please refer to the graph.sql_in file for the documentation + +""" +@file graph.py_in + +@namespace graph +""" + +import plpy +from utilities.control import MinWarning +from utilities.utilities import _assert +from utilities.utilities import extract_keyvalue_params +from utilities.utilities import unique_string +from utilities.validate_args import get_cols +from utilities.validate_args import unquote_ident +from utilities.validate_args import table_exists +from utilities.validate_args import columns_exist_in_table +from utilities.validate_args import table_is_empty + + +def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params, + out_table, **kwargs): + """ + Validates graph tables (vertex and edge) as well as the output table. + """ + _assert(out_table and out_table.strip().lower() not in ('null', ''), + "Graph SSSP: Invalid output table name!") + _assert(not table_exists(out_table), + "Graph SSSP: Output table already exists!") + + _assert(vertex_table and vertex_table.strip().lower() not in ('null', ''), + "Graph SSSP: Invalid vertex table name!") + _assert(table_exists(vertex_table), + "Graph SSSP: Vertex table ({0}) is missing!".format(vertex_table)) + _assert(not table_is_empty(vertex_table), + "Graph SSSP: Vertex table ({0}) is empty!".format(vertex_table)) + + _assert(edge_table and edge_table.strip().lower() not in ('null', ''), + "Graph SSSP: Invalid edge table name!") + _assert(table_exists(edge_table), + "Graph SSSP: Edge table ({0}) is missing!".format(edge_table)) + _assert(not table_is_empty(edge_table), + "Graph SSSP: Edge table ({0}) is empty!".format(edge_table)) + + existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table)) + _assert(vertex_id in existing_cols, + """Graph SSSP: The vertex column {vertex_id} is not present in vertex + table ({vertex_table}) """.format(**locals())) + _assert(columns_exist_in_table(edge_table, edge_params.values()), + "Graph SSSP: Not all columns from {0} present in edge table ({1})". + format(edge_params.values(), edge_table)) + + return None + +def get_graph_usage(schema_madlib, func_name, other_text): + + usage = """ +---------------------------------------------------------------------------- + USAGE +---------------------------------------------------------------------------- + SELECT {schema_madlib}.{func_name}( + vertex_table TEXT, -- Name of the table that contains the vertex data. + vertex_id TEXT, -- Name of the column containing the vertex ids. + edge_table TEXT, -- Name of the table that contains the edge data. + edge_args TEXT, -- A comma-delimited string containing multiple + -- named arguments of the form "name=value". + {other_text} + out_table TEXT -- Name of the table to store the result of SSSP. +); + +The following parameters are supported for edge table arguments ('edge_args' + above): + +src (default = 'src') : Name of the column containing the source + vertex ids in the edge table. +dest (default = 'dest') : Name of the column containing the destination + vertex ids in the edge table. +weight (default = 'weight') : Name of the column containing the weight of + edges in the edge table. +""".format(**locals()) + return usage diff --git a/src/ports/postgres/modules/graph/graph_utils.sql_in b/src/ports/postgres/modules/graph/graph_utils.sql_in new file mode 100644 index 000000000..e69de29bb diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in index 558ec3d03..88d752570 100644 --- a/src/ports/postgres/modules/graph/sssp.py_in +++ b/src/ports/postgres/modules/graph/sssp.py_in @@ -28,6 +28,7 @@ """ import plpy +from graph_utils import * from utilities.control import MinWarning from utilities.utilities import _assert from utilities.utilities import extract_keyvalue_params @@ -84,7 +85,7 @@ def graph_sssp(schema_madlib, vertex_table, vertex_id, edge_table, local_distribution = m4_ifdef(, , ) - validate_graph_coding(vertex_table, vertex_id, edge_table, + validate_sssp(vertex_table, vertex_id, edge_table, edge_params, source_vertex, out_table) plpy.execute(" DROP TABLE IF EXISTS {0},{1},{2}".format( @@ -284,35 +285,11 @@ def graph_sssp_get_path(schema_madlib, sssp_table, dest_vertex, **kwargs): return None -def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params, +def validate_sssp(vertex_table, vertex_id, edge_table, edge_params, source_vertex, out_table, **kwargs): - _assert(out_table and out_table.strip().lower() not in ('null', ''), - "Graph SSSP: Invalid output table name!") - _assert(not table_exists(out_table), - "Graph SSSP: Output table already exists!") - - _assert(vertex_table and vertex_table.strip().lower() not in ('null', ''), - "Graph SSSP: Invalid vertex table name!") - _assert(table_exists(vertex_table), - "Graph SSSP: Vertex table ({0}) is missing!".format(vertex_table)) - _assert(not table_is_empty(vertex_table), - "Graph SSSP: Vertex table ({0}) is empty!".format(vertex_table)) - - _assert(edge_table and edge_table.strip().lower() not in ('null', ''), - "Graph SSSP: Invalid edge table name!") - _assert(table_exists(edge_table), - "Graph SSSP: Edge table ({0}) is missing!".format(edge_table)) - _assert(not table_is_empty(edge_table), - "Graph SSSP: Edge table ({0}) is empty!".format(edge_table)) - - existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table)) - _assert(vertex_id in existing_cols, - """Graph SSSP: The vertex column {vertex_id} is not present in vertex - table ({vertex_table}) """.format(**locals())) - _assert(columns_exist_in_table(edge_table, edge_params.values()), - "Graph SSSP: Not all columns from {0} present in edge table ({1})". - format(edge_params.values(), edge_table)) + validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params, + out_table) _assert(isinstance(source_vertex,int), """Graph SSSP: Source vertex {source_vertex} has to be an integer """. @@ -377,28 +354,7 @@ For more details on function usage: """ elif message in ['usage', 'help', '?']: help_string = """ ----------------------------------------------------------------------------- - USAGE ----------------------------------------------------------------------------- - SELECT {schema_madlib}.graph_sssp( - vertex_table TEXT, -- Name of the table that contains the vertex data. - vertex_id TEXT, -- Name of the column containing the vertex ids. - edge_table TEXT, -- Name of the table that contains the edge data. - edge_args TEXT, -- A comma-delimited string containing multiple - -- named arguments of the form "name=value". - source_vertex INT, -- The source vertex id for the algorithm to start. - out_table TEXT -- Name of the table to store the result of SSSP. -); - -The following parameters are supported for edge table arguments ('edge_args' - above): - -src (default = 'src') : Name of the column containing the source - vertex ids in the edge table. -dest (default = 'dest') : Name of the column containing the destination - vertex ids in the edge table. -weight (default = 'weight') : Name of the column containing the weight of - edges in the edge table. +{graph_usage} To retrieve the path for a specific vertex: @@ -428,5 +384,7 @@ shortest path from the initial source vertex to the desired destination vertex. else: help_string = "No such option. Use {schema_madlib}.graph_sssp()" - return help_string.format(schema_madlib=schema_madlib) + return help_string.format(schema_madlib=schema_madlib, + graph_usage=get_graph_usage(schema_madlib, 'graph_sssp', 'source_vertex INT, -- The source vertex id for the algorithm to start.')) # --------------------------------------------------------------------- + diff --git a/src/ports/postgres/modules/graph/sssp.sql_in b/src/ports/postgres/modules/graph/sssp.sql_in index 7534a752d..7f8982317 100644 --- a/src/ports/postgres/modules/graph/sssp.sql_in +++ b/src/ports/postgres/modules/graph/sssp.sql_in @@ -286,4 +286,3 @@ RETURNS VARCHAR AS $$ $$ LANGUAGE sql IMMUTABLE m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `'); -------------------------------------------------------------------------------- - From 4e20b7aa356189ba0371e1f13fefba2627e673dc Mon Sep 17 00:00:00 2001 From: Orhan Kislal Date: Thu, 23 Feb 2017 13:09:59 -0800 Subject: [PATCH 2/4] Fix vertex table typo. --- doc/design/modules/graph.tex | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/design/modules/graph.tex b/doc/design/modules/graph.tex index 15d23d5ee..ab4be4146 100644 --- a/doc/design/modules/graph.tex +++ b/doc/design/modules/graph.tex @@ -49,14 +49,14 @@ \section{Graph Framework} \label{sec:graph:fw} \begin{tabular}{| c | } \hline vid \\ \hline + 0 \\ \hline 1 \\ \hline 2 \\ \hline 3 \\ \hline 4 \\ \hline 5 \\ \hline 6 \\ \hline - 7 \\ \hline - 8 \\ + 7 \\ \hline \end{tabular} \quad @@ -173,7 +173,7 @@ \subsection{Implementation Details} \end{lstlisting} \end{algorithm} -We begin our analysis of Find Updates function from its innermost subquery. This subquery (lines 11-16) takes a set of vertices (in the table $old\_update$) and finds the reachable vertices. In case a vertex is reachable by multiple vertices, only the path that has the minimum cost is considered. This means the input vertices need the value of their path as well. In our example, both $v_1$ and $v_2$ can reach $v_3$. In this case, we would have to use $v_2$ -> $v_3$ edge since that gives the lowest possible path value. Please note that we are aggregating the rows using the $min$ operator for each destination vertex and we are unable to return the source vertex at the same time. This means we know the value of $v_3$ should be $2$ but we cannot know its parent ($v_2$) at the same time. To solve this limitation, we combine the result with edge and $old\_update$ tables (lines 7-10) and get the rows that has the same minimum value. At this point, we would have to tackle the problem of tie-breaking. Vertex $v_5$ has two paths leading into: <5,2,1> and <5,2,2>. The inner subquery will return <5,2> and it will match both of these edges. However, it is redundant to keep both of them in the update list as that would require updating the same vertex multiple times in a given iteration. By using the DISTINCT clause at line 2, we allow the underlying system to accept only a single one of them. Finally, we want to make sure these updates are actually leading us to shortest paths. Line 21 ensures that the values stored in the $out\_table$ does not increase and the solution does not regress throughout the iterations. +We begin our analysis of Find Updates function from its innermost subquery. This subquery (lines 11-16) takes a set of vertices (in the table $old\_update$) and finds the reachable vertices. In case a vertex is reachable by multiple vertices, only the path that has the minimum cost is considered. This means the input vertices need the value of their path as well. In our example, both $v_1$ and $v_2$ can reach $v_3$. In this case, we would have to use $v_2$ -> $v_3$ edge since that gives the lowest possible path value. Please note that we are aggregating the rows using the $min$ operator for each destination vertex and we are unable to return the source vertex at the same time. This means we know the value of $v_3$ should be $2$ but we cannot know its parent ($v_2$) at the same time. To solve this limitation, we combine the result with $edge$ and $old\_update$ tables (lines 7-10) and get the rows that has the same minimum value. At this point, we would have to tackle the problem of tie-breaking. Vertex $v_5$ has two paths leading into: <5,2,1> and <5,2,2>. The inner subquery will return <5,2> and it will match both of these edges. However, it is redundant to keep both of them in the update list as that would require updating the same vertex multiple times in a given iteration. By using the $DISTINCT$ clause at line 2, we allow the underlying system to accept only a single one of them. Finally, we want to make sure these updates are actually leading us to shortest paths. Line 21 ensures that the values stored in the $out\_table$ does not increase and the solution does not regress throughout the iterations. Applying updates is straightforward as the values and the associated parent values are replaced using the $new\_update$ table. After this operation is completed the $new\_update$ table becomes $old\_update$ for the next iteration of the algorithm. From 9df97c96139f9621c93f9f89f482cfd44efde0be Mon Sep 17 00:00:00 2001 From: Orhan Kislal Date: Tue, 28 Feb 2017 14:34:34 -0800 Subject: [PATCH 3/4] Graph: Update the design doc for clarity. --- doc/design/modules/graph.tex | 125 ++++++++++++++++++++++++++++++----- 1 file changed, 110 insertions(+), 15 deletions(-) diff --git a/doc/design/modules/graph.tex b/doc/design/modules/graph.tex index ab4be4146..5c3910cf6 100644 --- a/doc/design/modules/graph.tex +++ b/doc/design/modules/graph.tex @@ -1,4 +1,5 @@ -% When using TeXShop on the Mac, let it know the root document. The following must be one of the first 20 lines. +% When using TeXShop on the Mac, let it know the root document. The following +% must be one of the first 20 lines. % !TEX root = ../design.tex % Licensed to the Apache Software Foundation (ASF) under one @@ -32,11 +33,22 @@ % Abstract. What is the problem we want to solve? -This module implements various graph algorithms that are used in a number of applications such as social networks, telecommunications and road networks. +This module implements various graph algorithms that are used in a number of +applications such as social networks, telecommunications and road networks. \section{Graph Framework} \label{sec:graph:fw} -MADlib graph representation depends on two structures, a \emph{vertex} table and an \emph{edge} table. The vertex table has to have a column of vertex ids. The edge table has to have 2 columns: source vertex id, destination vertex id. For most algorithms an edge weight column is required as well. The representation assumes a directed graph, an edge from $x$ to $y$ does \emph{not} guarantee the existence of an edge from $y$ to $x$. Both of the tables may have additional columns as required. Multi-edges (multiple edges from a vertex to the same destination) and loops (edge from a vertex to itself) are allowed. For ideal performance, vertex and edge tables should be distributed on vertex id and source id respectively. This representation does not impose any ordering of vertices or edges. An example graph is given in Figure~\ref{sssp:example} and its representative tables are given in Table~\ref{sssp:rep}. +MADlib graph representation depends on two structures, a \emph{vertex} table +and an \emph{edge} table. The vertex table has to have a column of vertex ids. +The edge table has to have 2 columns: source vertex id, destination vertex id. +For most algorithms an edge weight column is required as well. The +representation assumes a directed graph, an edge from $x$ to $y$ does +\emph{not} guarantee the existence of an edge from $y$ to $x$. Both of the +tables may have additional columns as required. Multi-edges (multiple edges +from a vertex to the same destination) and loops (edge from a vertex to +itself) are allowed. This representation does not impose any ordering of +vertices or edges. An example graph is given in Figure~\ref{sssp:example} and +its representative tables are given in Table~\ref{sssp:rep}. \begin{figure}[h] \centering @@ -77,20 +89,32 @@ \section{Graph Framework} \label{sec:graph:fw} 6 & 7 & 1 \\ \hline \end{tabular} - \caption{Graph representation of vertices (left) and edges(right) in the database} + \caption{Graph representation of vertices (left) and edges(right) in the + database} \label{sssp:rep} \end{table} \section{Single Source Shortest Path} \label{sec:graph:sssp} -Given a graph and a source vertex, single source shortest path (SSSP) algorithm finds a path for every vertex such that the sum of the weights of its constituent edges is minimized. +Given a graph and a source vertex, single source shortest path (SSSP) +algorithm finds a path for every vertex such that the sum of the weights of +its constituent edges is minimized. -Shortest path is defined as follows. Let $e_{i,j}$ be the edge from vertex $i$ to vertex $j$ and $w_{i,j}$ be its weight. Given a graph G, the shortest path from $s$ to $d$ is $P = (v_1, v_2 \dots, v_n)$ (where $v_1=s$ and $v_n=d$) that over all possible $n$ minimizes the sum $ \sum _{i=1}^{n-1}f(e_{i,i+1})$. +Shortest path is defined as follows. Let $e_{i,j}$ be the edge from vertex $i$ +to vertex $j$ and $w_{i,j}$ be its weight. Given a graph G, the shortest path +from $s$ to $d$ is $P = (v_1, v_2 \dots, v_n)$ (where $v_1=s$ and $v_n=d$) +that over all possible $n$ minimizes the sum $ \sum _{i=1}^{n-1}f(e_{i,i+1})$. % \subsection{Bellman Ford Algorithm} -Bellman-Ford Algorithm \cite{bellman1958routing,ford1956network} is based on the following idea: We start with a naive approximation for the cost of reaching every vertex. At each iteration, these values are refined based on the edge list and the existing approximations. If there are no refinements at any given step, the algorithm returns the calculated results. If the algorithm does not converge in $|V|-1$ iterations, this indicates the existence of a negative cycle in the graph. +Bellman-Ford Algorithm \cite{bellman1958routing,ford1956network} is based on +the following idea: We start with a naive approximation for the cost of +reaching every vertex. At each iteration, these values are refined based on +the edge list and the existing approximations. If there are no refinements at +any given step, the algorithm returns the calculated results. If the algorithm +does not converge in $|V|-1$ iterations, this indicates the existence of a +negative cycle in the graph. \begin{algorithm}[SSSP$(V,E,start)$] \label{alg:sssp} @@ -125,15 +149,22 @@ \section{Single Source Shortest Path} \label{sec:graph:sssp} Changes from the standard Bellman-Ford algorithm: \begin{description} -\item Line~\ref{alg:sssp:update}: We only check the vertices that have been updated in the last iteration. -\item Line~\ref{alg:sssp:single}: At each iteration, we update a given vertex only one time. This means the toupdate set cannot contain multiple records for the same vertex which requires the comparison with the existing value. +\item Line~\ref{alg:sssp:update}: We only check the vertices that have been +updated in the last iteration. +\item Line~\ref{alg:sssp:single}: At each iteration, we update a given vertex +only one time. This means the toupdate set cannot contain multiple records +for the same vertex which requires the comparison with the existing value. \end{description} -This is not a 1-to-1 pseudocode for the implementation since we don't compare the `toupdate` table records one by one but calculate the overall minimum. In addition, the comparison with `cur` values take place earlier to reduce the number of tuples in the `toupdate` table. +This is not a 1-to-1 pseudocode for the implementation since we don't compare +the `toupdate` table records one by one but calculate the overall minimum. In +addition, the comparison with `cur` values take place earlier to reduce the +number of tuples in the `toupdate` table. \subsection{Implementation Details} -In this section, we discuss the MADlib implementation of the SSSP algorithm in depth. +In this section, we discuss the MADlib implementation of the SSSP algorithm +in depth. \begin{algorithm}[SSSP$(V,E,start)$] \label{alg:sssp:high} \begin{algorithmic}[1] @@ -144,7 +175,15 @@ \subsection{Implementation Details} \end{algorithmic} \end{algorithm} -The implementation consists of two SQL blocks that are called sequentially inside a loop. We will follow the example graph at Figure~\ref{sssp:example} with the starting point as $v_0$. The very first update on the output table is the source vertex. Its weight is $0$ and its parent is itself ($v_0$). After this initialization step, the loop starts with Find Updates (the individual updates will be represented with format). Looking at the example, it is clear that the updates should be <1,1,0>, <2,1,0> and <4,10,0>. We will assume this iteration is already completed and look how the next iteration of the algorithm works to explain the implementation details. +The implementation consists of two SQL blocks that are called sequentially +inside a loop. We will follow the example graph at Figure~\ref{sssp:example} +with the starting point as $v_0$. The very first update on the output table is +the source vertex. Its weight is $0$ and its parent is itself ($v_0$). After +this initialization step, the loop starts with Find Updates (the individual +updates will be represented with format). Looking at the +example, it is clear that the updates should be <1,1,0>, <2,1,0> and <4,10,0>. +We will assume this iteration is already completed and look how the next +iteration of the algorithm works to explain the implementation details. \begin{algorithm}[Find Updates$(E,old\_update,out\_table)$] \label{alg:sssp:findu} @@ -173,7 +212,63 @@ \subsection{Implementation Details} \end{lstlisting} \end{algorithm} -We begin our analysis of Find Updates function from its innermost subquery. This subquery (lines 11-16) takes a set of vertices (in the table $old\_update$) and finds the reachable vertices. In case a vertex is reachable by multiple vertices, only the path that has the minimum cost is considered. This means the input vertices need the value of their path as well. In our example, both $v_1$ and $v_2$ can reach $v_3$. In this case, we would have to use $v_2$ -> $v_3$ edge since that gives the lowest possible path value. Please note that we are aggregating the rows using the $min$ operator for each destination vertex and we are unable to return the source vertex at the same time. This means we know the value of $v_3$ should be $2$ but we cannot know its parent ($v_2$) at the same time. To solve this limitation, we combine the result with $edge$ and $old\_update$ tables (lines 7-10) and get the rows that has the same minimum value. At this point, we would have to tackle the problem of tie-breaking. Vertex $v_5$ has two paths leading into: <5,2,1> and <5,2,2>. The inner subquery will return <5,2> and it will match both of these edges. However, it is redundant to keep both of them in the update list as that would require updating the same vertex multiple times in a given iteration. By using the $DISTINCT$ clause at line 2, we allow the underlying system to accept only a single one of them. Finally, we want to make sure these updates are actually leading us to shortest paths. Line 21 ensures that the values stored in the $out\_table$ does not increase and the solution does not regress throughout the iterations. - -Applying updates is straightforward as the values and the associated parent values are replaced using the $new\_update$ table. After this operation is completed the $new\_update$ table becomes $old\_update$ for the next iteration of the algorithm. +The Find Updates query is constructed in 4 levels of subqueries: \emph{find +values, find parents, eliminate duplicates and ensure improvement}. + +\begin{itemize} + +\item We begin our analysis at the innermost subquery, emph{find values} +(lines 11-16). This subquery takes a set of vertices (in the table +$old_update$) and finds the reachable vertices. In case a vertex is reachable +by multiple vertices, only the path that has the minimum cost is considered +(hence the name find values). There are two important points to note: + \begin{itemize} + \item The input vertices need the value of their path as well. + \begin{itemize} + \item In our example, both $v_1$ and $v_2$ can reach $v_3$. We would + have to use $v_2$ -> $v_3$ edge since that gives the lowest possible + path value. + \end{itemize} + \item The subquery is aggregating the rows using the $min$ operator for + each destination vertex and unable to return the source vertex at the + same time to use as the parent value. + \begin{itemize} + \item We know the value of $v_3$ should be $2$ but we cannot know + its parent ($v_2$) at the same time. + \end{itemize} + \end{itemize} + +\item The \emph{find parents} subquery is designed to solve the +aforementioned limitation. We combine the result of \emph{find values} with +$edge$ and $old\_update$ tables (lines 7-10) and get the rows that has the +same minimum value. + \begin{itemize} + \item Note that, we would have to tackle the problem of tie-breaking. + \begin{itemize} + \item Vertex $v_5$ has two paths leading into: <5,2,1> and <5,2,2>. + The inner subquery will return <5,2> and it will match both of these + edges. + \end{itemize} + \item It is redundant to keep both of them in the update list as that + would require updating the same vertex multiple times in a given + iteration. + \end{itemize} + +\item At this level, we employ the \emph{eliminate duplicates} subquery. By +using the $DISTINCT$ clause at line 2, we allow the underlying system to +accept only a single one of them. + +\item Finally, we introduce the \emph{ensure improvement} subquery to make +sure these updates are actually leading us to shortest paths. Line 21 ensures +that the values stored in the $out\_table$ does not increase and the solution +does not regress throughout the iterations. +\end{itemize} + +Applying updates is straightforward as the values and the associated parent +values are replaced using the $new\_update$ table. After this operation is +completed the $new\_update$ table becomes $old\_update$ for the next iteration +of the algorithm. + +Please note that, for ideal performance, \emph{vertex} and \emph{edge} tables +should be distributed on \emph{vertex id} and \emph{source id} respectively. From f0a16bfe5407293653defa111bf0d0bf9a32138f Mon Sep 17 00:00:00 2001 From: Orhan Kislal Date: Tue, 7 Mar 2017 11:29:11 -0800 Subject: [PATCH 4/4] Graph: Update the generic help message. --- .../postgres/modules/graph/graph_utils.py_in | 39 +++++++++++-------- src/ports/postgres/modules/graph/sssp.py_in | 6 ++- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/ports/postgres/modules/graph/graph_utils.py_in b/src/ports/postgres/modules/graph/graph_utils.py_in index 2d8c4bc82..fb43491f5 100644 --- a/src/ports/postgres/modules/graph/graph_utils.py_in +++ b/src/ports/postgres/modules/graph/graph_utils.py_in @@ -40,36 +40,40 @@ from utilities.validate_args import table_is_empty def validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params, - out_table, **kwargs): + out_table, func_name, **kwargs): """ Validates graph tables (vertex and edge) as well as the output table. """ _assert(out_table and out_table.strip().lower() not in ('null', ''), - "Graph SSSP: Invalid output table name!") + "Graph {func_name}: Invalid output table name!".format(**locals())) _assert(not table_exists(out_table), - "Graph SSSP: Output table already exists!") + "Graph {func_name}: Output table already exists!".format(**locals())) _assert(vertex_table and vertex_table.strip().lower() not in ('null', ''), - "Graph SSSP: Invalid vertex table name!") + "Graph {func_name}: Invalid vertex table name!".format(**locals())) _assert(table_exists(vertex_table), - "Graph SSSP: Vertex table ({0}) is missing!".format(vertex_table)) + "Graph {func_name}: Vertex table ({vertex_table}) is missing!".format( + **locals())) _assert(not table_is_empty(vertex_table), - "Graph SSSP: Vertex table ({0}) is empty!".format(vertex_table)) + "Graph {func_name}: Vertex table ({vertex_table}) is empty!".format( + **locals())) _assert(edge_table and edge_table.strip().lower() not in ('null', ''), - "Graph SSSP: Invalid edge table name!") + "Graph {func_name}: Invalid edge table name!".format(**locals())) _assert(table_exists(edge_table), - "Graph SSSP: Edge table ({0}) is missing!".format(edge_table)) + "Graph {func_name}: Edge table ({edge_table}) is missing!".format( + **locals())) _assert(not table_is_empty(edge_table), - "Graph SSSP: Edge table ({0}) is empty!".format(edge_table)) + "Graph {func_name}: Edge table ({edge_table}) is empty!".format( + **locals())) existing_cols = set(unquote_ident(i) for i in get_cols(vertex_table)) _assert(vertex_id in existing_cols, - """Graph SSSP: The vertex column {vertex_id} is not present in vertex - table ({vertex_table}) """.format(**locals())) + """Graph {func_name}: The vertex column {vertex_id} is not present in + vertex table ({vertex_table}) """.format(**locals())) _assert(columns_exist_in_table(edge_table, edge_params.values()), - "Graph SSSP: Not all columns from {0} present in edge table ({1})". - format(edge_params.values(), edge_table)) + """Graph {func_name}: Not all columns from {cols} present in edge + table ({edge_table})""".format(cols=edge_params.values(), **locals())) return None @@ -83,10 +87,9 @@ def get_graph_usage(schema_madlib, func_name, other_text): vertex_table TEXT, -- Name of the table that contains the vertex data. vertex_id TEXT, -- Name of the column containing the vertex ids. edge_table TEXT, -- Name of the table that contains the edge data. - edge_args TEXT, -- A comma-delimited string containing multiple - -- named arguments of the form "name=value". + edge_args TEXT{comma} -- A comma-delimited string containing multiple + -- named arguments of the form "name=value". {other_text} - out_table TEXT -- Name of the table to store the result of SSSP. ); The following parameters are supported for edge table arguments ('edge_args' @@ -98,5 +101,7 @@ dest (default = 'dest') : Name of the column containing the destination vertex ids in the edge table. weight (default = 'weight') : Name of the column containing the weight of edges in the edge table. -""".format(**locals()) +""".format(schema_madlib=schema_madlib, func_name=func_name, + other_text=other_text, comma = ',' if other_text is not None else ' ') + return usage diff --git a/src/ports/postgres/modules/graph/sssp.py_in b/src/ports/postgres/modules/graph/sssp.py_in index 88d752570..4d27761a8 100644 --- a/src/ports/postgres/modules/graph/sssp.py_in +++ b/src/ports/postgres/modules/graph/sssp.py_in @@ -289,7 +289,7 @@ def validate_sssp(vertex_table, vertex_id, edge_table, edge_params, source_vertex, out_table, **kwargs): validate_graph_coding(vertex_table, vertex_id, edge_table, edge_params, - out_table) + out_table,'SSSP') _assert(isinstance(source_vertex,int), """Graph SSSP: Source vertex {source_vertex} has to be an integer """. @@ -385,6 +385,8 @@ shortest path from the initial source vertex to the desired destination vertex. help_string = "No such option. Use {schema_madlib}.graph_sssp()" return help_string.format(schema_madlib=schema_madlib, - graph_usage=get_graph_usage(schema_madlib, 'graph_sssp', 'source_vertex INT, -- The source vertex id for the algorithm to start.')) + graph_usage=get_graph_usage(schema_madlib, 'graph_sssp', + """source_vertex INT, -- The source vertex id for the algorithm to start. + out_table TEXT -- Name of the table to store the result of SSSP.""")) # ---------------------------------------------------------------------