From d4e59c0ed34342691dc5edf8375372d6e2dc2536 Mon Sep 17 00:00:00 2001 From: Keegan Lensink Date: Wed, 7 Feb 2018 17:51:10 -0800 Subject: [PATCH 01/22] Add IP/OP function mapping to mean along dim, optimize batchnorm --- benchmarks/micro/bm_batchnorm.jl | 5 ++++- benchmarks/micro/bm_batchnorm.jld | Bin 45946 -> 119882 bytes src/layers/normLayer.jl | 11 +++++++---- src/utils/utilities.jl | 27 +++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/benchmarks/micro/bm_batchnorm.jl b/benchmarks/micro/bm_batchnorm.jl index e38f39d..a2fa7f7 100644 --- a/benchmarks/micro/bm_batchnorm.jl +++ b/benchmarks/micro/bm_batchnorm.jl @@ -43,7 +43,8 @@ end function benchmarkApply(L, theta, Y, history) funcName = "apply" - Yout2,Yout2,tmp2 = apply(L,theta,Y,true) + Q = copy(Y) + Yout,Yout,tmp = apply(L,theta,Q,true) @code_warntype apply(L,theta,Y,true) @@ -52,4 +53,6 @@ function benchmarkApply(L, theta, Y, history) Meganet.updatehistory!(history, trial, "hist") hist = JLD.load(history, "hist") judge(hist) + + return trial end diff --git a/benchmarks/micro/bm_batchnorm.jld b/benchmarks/micro/bm_batchnorm.jld index 1f54d194c4840c0fc9e347da3eaf352dcbe3a2ce..b449d91565379c9ff1e03590e2f598b2a7050a68 100644 GIT binary patch literal 119882 zcmeF4d0v#P+cb91=ai77*I9^^-Mg+r|zUHG0U%5w^g(_3GAB1wa2=XJAJD{(i7T zg%W!Eyu6gB@?ld8fuH54U8aXN+?uFTc6mTlsRYp}QtzQ2P_9@x@qMKIROu#0{D@Ai zTB-a@cl&NiSUHRE$jfaM@c+~dgtlrH?%Lo;*=qNCN|SZwo{Zl8Mh)l_J93m;yVs@R zHvUO^0B%Y=8e->pWCZeID?ViF41WdmanRdBP`uqM2($PeqY@ z<;p4KQAnqH$@5>~U-k$yR$8_&5Ju=$B`9_cOMe47`MFzk4&>&;KdK zaSV4r_&+hvMepn5uR71}dCPw}rGJe3w#oIdsO$$1ACCvxbqQF+6maKG*-cs=`9 zSrPw##`$NjXCr-nJs$m{C*N|ft6xX|dG*h}-uUHsP+so0hVGZZx^wsXca~u+#&3q? zq00Z^@e=AwqI<}{MqwGlC1wYN&aV#OZT`2KTHG0{hj7NcD+Axy(M*pU322C0`q^n{0H(Q z`R}QQ>JP46BUWYd&pECqIQQH&?=yNp-Bu&UhV~gf*joiiUD5oU{JH_dV`9hYH({jn z_RN)|wR?L*b(P9b%x^sZCq>4YD87<*f02LZfxDfwvLqV6$-i$ucm?uP-$(j%r509( zo{zHmqjbHZ8QUj%%z)m%Y2**&Z{Lr4=eOhf`LlU+4ewX;*s@RTfYEi^4~dQ*GI~J2 zkt6!+^86fLN0`W8=E>_3j4bihFP{+qV*HOhaL;dwys$BT)5{;sZ-Zdj7p~O8D(+YF z8#7?kkdggI_pbk&cK%HM*Y``fZu-ymOYQvVAMBSd5p73|j_ordc7k5?zuQ?8-TP^~ z4BS0WS7e@Yf7zg%z4VB8ChfppR46`NEVW_dKS`JZ8CzmHn4Te>MjN{(pT2 z-1DDOiMEX2H2DYX_(^B^za{0Lr#xKGi)HiQ^j68|fDvPR|092&bw7{B<}ZrgL4R8_ ze{1>IkKF71uW+378##RVkl5bJ{h7?Kx8!fCyoc+rIj-k)_x5|K*il3JMCU(`-Z#a6 zE4c5G>r~=8j=bfa%%2^PHS%-Z2U7b1gZhjZ5L>tTfD!!$5AQQ-*o0pmMT{I7J-Tj$ ztMiGv7uR{()xM7uyVuDTStsG{Mpcf^|G)OjJs)-(TWV81mAwz;FO=k0&)eSqs6;2w92k)sCGZJvLmX^-Q}OFp!AuO~ys75>YI z*;m}qyM!t@CWO!o=x7{x;FX`;=xyJf18f{Kc5!T&&T;E2G@H9-Xr}3Cl+a< zl&z_pQw$aKfC~CKzroRchQ~zfvkggq@0@f0aXldajiIaezjwa5zh-3qm%RoL=+obQ zHgq4;f7{AmKl-CF=b!KNGif79wf(x}cgp>GWKci0wzm{k`CaC(4R@V>9TPh;dcdeY zBl_jP5w7~{wBM;e8nc{h?`ws!TEDCQwed%7{k`|quGbgt6U%STr{S)rq5-k_U%1mb z{5>gHlK<%M%=t@16)JT9DpdLJZGQWAX7~Bs{SMaG@NvC3$^UVW1Le|7UU&VhX*$7o zW1EZPr#4mU{=bv#)PIjoai{+!__xoa%Dv2h*9uX;{k!zvp2_}r^W1-rGUU@S5BJ|& z^IucCe%#M1ZWhjuyCQ$IR$TeN-(R8nkj?*`$3^qnR(ms7UaPM)tpVR?Iy!u(X`H*Q zX%7A#zw2F1Tkjt=Rp~rf_$N)L|2<9PG3cyRsE@0CV+l0B5dBqQ?ORogYTAD)rs??H zOVgJ80J@^Lrd7Tq{`OLu_IG{pyFaXHPAH2%t{nc=@|uno6*P?(D{9(0`e~Y`u+yW& zhd?y~+80&AFI-tu&9iAb9ob?-Jt zmwj5(7zeGRqEA9c9&|Pgp>DnKB2@dI(Z1^2MAK2Jsiw05wC#qDfrj?YQP7wRom<2= zn^8Xvs!h<^2c1tf*KxyywlwGvIzJb`O$+iy3q4zEtx7{%Z_%Sg7jH#;0kqwNR;||f z#oB1v?a*-z+G>Sr-)aZV0njlAs@I_ND6|K))%i9Dv>y^(za8ycP+fx7=x4R>d?-xQ zsM!HVLFWt5F|DKa&2;EI4Q(Fb)USZ%IcV$qoc3*-q0ynU){Z^G5)oQk!=T~aMQat@ z6@M)>W_Q!txF&qFyVkbQ9{BB`TGCT%wH-P>>#eoDww?TiP@U?dwY9fD+%}NBrv{^U z4AWGvL~9y%pxI%R*0yr7`1hb~$r$`1FXFd{_K%@?E{=HUI8Enf=x8+_ojO6&F>tb` z^`fb1kA6wh^mtj*R&=_iad{?;cvVv^ht7RT=-Aicwk4Xjsw*^|??YARO|2boK~joy3) z{n8hj=GR}MOI||13e|U)wYKM7(Nx2~ft~MY8V}|Yf8(yEY}xp z1C6uLZtF(^+RUe$c)i zn#-cKZ(jqAn^4_`&NqhZxML$UGokg`2>MwL)gfp#7)d`Jpxq8_-$S!ejE>tPpfOpr z3C#sBP@f0w+eT^a7&%(gco#Z%K($}^K&*~i<)O`hY9KTljnQ#41loEEuZdrBEbX_1 zR^J!3wwH**pAQ`?grAMmzGL8c;;GR52-g%Qvi4b?(uK0Teho=^=HotdD0bpsl+XKHOb1ufrMTHDLb zrhY3lV_((UF%cTOpmW_E?W-blVQQkL<+*_T7aT{8dUf0@9 z)olrJ2efkD(AqIHS<{{Xty|DmVX5|Q)f}+-Doy9=x6q#oXT6O-agC$ z`B72y7bP`q-79Ju8!Ew8RfvD;uW9akOjB*Ej;>i-)Am(;O?zwuO~;U@HPxx+npVZu zn#QI!_!~Rp2S#W*ibrbND)iGd9RrBp7zTY_fSFT?Uz!2$F4QzGEz)$PEYY-gN`?(>7odd~*x& zC`;3LS`E;zdx|vIv?h1Zw6_@yuaDL=eJ*Jld2iBxZOQYLd3LlTKcgr4 zgJpd2Vd%iM#LGPettI$YMRZ^lxFrY%^&)`Kf#x@js!j?(2yQ{7EQ_uk;*v`23(0PlXcN=Jz`OYp!oqZqq{x6_&AAylFkKv=J z+amiVxD9dtP?&g$_yJG0BU;8`$viKtO`dlY%>9JC%&q7vLuFiD(buKlNI(4GNc^cg zg|eO&4wrtPz|ZxEEvv%hmBhmn;HW`1y^eBcpkrfTsL)f61E)9j{kt*Fh&=K#WIcEd zL$^$aF{v;;5++Fh*>c=u_oKbRGQSxzj|ux}KPHTLYCq}&M^)yysEeO42;V0dy?q&a zmmKG|SJAoBPx?UevU}sl_eCd`fahf0=UgN148UJ#M_bY8^|Ia^Pogtrf2Q& zD-O0y!k;SRDOXDJeAv#6d(3~PfuD4gy5uwH{&6s)5x!ZNKD+6|~j zo^LFEOnKU|9j~P4asA8asZ(LgPpJzzg|;7}-m@F^gJr%m?s48696_ET>!h3<=j)e} zADc^iDV@m6l;bN}_Gfx~e5)H=-w`gn#`tVqslW0y?QI!MJZc2KZ8bWj8S&;rm`DE| z9Jkg=)=N|m@&dl3ZhIZHm#o`Wa$XuN^Kg9)?WD;5@t#2b;7QQlnt6$=F56jx=aY0< zho-EbtXIk3T!;43yFRSvJ*X1C&mqRs|7Uc|&Em^(&^W4&&JUbGKfV^aWVO25kF>L$ ztDcqdzCm7q$#%DUl)TuHwA*?z^=BgCodmcx6CRNMTE@_ByZZPIU!>UV(Gh#ek9r%f8Ufq&f?;{Jb^E)ni+-KD*%Q#-MPc%~ zCv@KI5{$RWc=F1{5%)`kcaG3ruORZzB;oJdOn%BGblp_+x6{z|Jz;Wbn0kkJ>=ami z7tDB){di{|deSiZsa1!%lnDHyGEZH@@Yhd<5p#*J@zV zXIqLt@JU$zEPm_RaCTMvS7OoS#-Klb8kP)z+vX5Ab79DCwj=!`nE5%rx(VaHhjRwP z9bdw}*|g_Z2c2*gZjpMQp6J$B$g8>vc9QuXS{8jn=68hbml7Yc-I@DX-yJ&8&gU!1 zzc&S5-v$pn4v)S^-okg$CHB%z>LGaWJL01%!xAf?Rhaw=o#6Ep{A=A{^J>h?s0rwZ zzU0ju8>rXiC*j2J_~NfxOWwz_-BaJf&ux!ylW|5kIR6E%#&0+Temfstn*mEKq&~6@ zdgy#~lfE@||Gur!>*ct*n?~N0ESMZjy!1FY_z1pp791tpnNfrIy`FGHC$=kcy|h;a z-64?pzGKk8BFx)J-W@p~RhRS3D}C`p=i3;4QjVJ?ThLR+({96$(e;PY z|9F|_8#0fM9{8o@_~|d(u{H_6>|yFB?Sv&B!|(n9?HfJOv&(RPN#8+U&^G*|50mG+ z62F%m$8i_&Ge4v5&Hco`JqZ_GBi{cqdbW%&c`JJC5Zd>a^Um5+^$%K3E3 zChAOiA5!-S^?4O#oK2`-RTN$6EDU^?y7W!;^m<8<=edn38&Z6zyChXzt`t6_kGW;NbgYt z29C~=3%BJ{n4Jbv~0jQjJL0I~o4drS9!EBBAS2NS5kw4JXssl@EZ^tUx__l@B9^Xs@T`tSR&b!gk>%Ka53*D%{RkC$A}R#;V0soVE$Y?u9%Qm%{2gWJ=aD=SnkBFumI z`}zKBeLswi`p^Gv)1qUTo;dlUB|D2LbG14kD6sy(!2ZhpwPM#)x_;cg>pOg|pWpiY zFN44N_4@?&8#wSOeeKfy`EK9+zfIijxP7;F)9vd{^M8dt6f*B~7fr1$*n1O;BD42min*sOdF2VA~|Ia_~D!5MX&*t#I!*}n8g6s17 zG|Qh)M@4VVg6r}MuFKQwq2RhaeLNIgmsfCIp6hrhxGt~Yx;%YeEVwSOubzj3>+(V9hHB^Y-jvpz^RrY18u_KsUmB+*|6ViU`S&}sqOD!5n-)Wk%dEh6jm4WvwhX+ai_i2&HP$ll{NFNQtBi9aMkFvN7Q-! za6^YlmDGUzK7G~vL;f{X5nc8}nPnlWrcUUTvb?!^PA6<|3~Z%7(hsXl92=pU>D*Xn ztuD%Sg{iMfYBG0sc|~tqw_(_)ezmn z$d=<*s1-V4UgxblR85_*c&_6^^{{?erN85#>Y{UFUdsANt+O=--f86}% z@4S$tui-7M5?!>a=k#utPa`T<(^~n1rdd4?-Ot1Bid#pEYpUj@HH{yj`JpfIRW?mq z)5nOn3edFAuZ`|eM^jC&3uB)kezcLM%|94_9kd;Qj@KLG`#-H|ZD~S&9>$jiGVmRU3!yHeS=#Z-S=v zdAz3M>O@WT)Fk3>3X4uA?@3eBTn?R=p><)3_Kle@!(LO#zcn5G&`k6x;lNp1+nUVA zpZuz(c^o?CCu*$@&Lgkyd~`N6zK8bkh189Dop^&Kn$Fl|nvUJj{C+w5C5NUla0U9F zup`-2ztsT!pd#Oy~6`}P5bQqs%-|l@8$gGroBYArsMLLn&!SsnriPAO=pR#=qcAU?MsERU*Vs>u4yFQ(6oByXqsL( zHSL+umII9`-)i5s^*c@H{#%+x;`f?XDs(LR0sp}tHO*I`L+(^#O#eyyw!=_G|4e-a z#W&k+(D5B~e(AwC-|dC?rtiGMn#OYI_)PS|BGk*BZEQP=QRh=a(|HX#UV4DMj(n5e zxm{>IsC~P4N$3!!Lu2Vf+II~0(R7A$M;u!=H19xrn=;f#JgljzltV8suW5c?0eV%` zbO!ptp~70+0mm9)BX55dO?z5Z^h4D&jYs@79WO$)>M{IGVMul2T?6nxg^r(VqR-UQ zv^C(H^o~*Tjrz}P6F(?-w)q^|_te$C@(v`vqn@Vo%_q_Q8o^JvBaEXmcY<+FhSrV4deRja%G- z#Ca`BYo~)dkeGdYY3+Cc+FNi(5@SwZ?c1shr2aZoA%nCw9YZzkt)ewmD73CZ$JF84 zcW#1e;S1#ZafcGe*Q2ScF;>$oK1I{k^kq$F6f_^1O5WqsG#xvj@j0}NSF~?@0UcGR zYi&AaXxe+ug8iUj&eqyFIT8Qqg__0*s1`3Gz7ndn(D4m-E^)Sbow|I&!n7Er&aw*o$!I6RS2< zVcT`w83B!^?`v(nyHnFKdN=hY_u!{MTNexe26Wo?qF;lK&im2dLwnALS{oM*!V`yx zJE1c)O>1)jbVR3XtzI~y>0HMhSj_Xni^sL^XvLja%&DKEx1Q28UOG)(#WR}b&(Klh ztk(AD&TCq0v(arX!4=TR5%hb4wPj(T$(wO+m&uBVcYocj( zYpQ9@YNn|kZ?9>K?x5*B(~)@Na5z`)#8NqeypmlsjapqbRY;_!^B}ZO<<2O!+P#P` z=|g^*zM8ha!gWlWYMMqocUG|93eeh-&YcvTi=IL^ zYbtrqYMQN{)3hGr4hpt*QN&yJ)U+iJ*Hm|)ZD}lit1+7PyjL_G&rjEM_Mf9^zWRoy zwdhUaqu(Nadab4{VjcdBRQy@cxVKGfGMIr8B z(5RTEbAqp?eO6^nb$~l1s8Y4a`?9sBS)ncQ8_#Mwb2|_}8>wmUZP#>E=nqQ{z^^+@ z(|q^^O_lySdd7N9TZiqMR_spvUgtH97KQbl66~{!YMMKWaR-Hr^4vk;SYz}t?z~`p z5UI7Tu&L`tM2n)+*ZZK2Ke8tn{-AhEJm|wjKI<4eG-D(8dIqTpN16 zA$lzAZ^DqCW%c-N@1ZkaV;tKvXg~CAeA{Mpxj2~p3E!ZLk0#%_06$W;J7qpP^BcIU z3wi!te8b4=D)fKob4fp2hc+@ux^Tv!&fg znWxlu$WMM5-r7Jr8I#eGuhLHT8g#}-@cO$lo&my{&>?pcG2_rlbIG?KK&S455o@58 zOni1D=G_*8pEd)|zCb*C7z~j+mqf^aq)(Oq5FZ&qen37J5rO=_Yky!SWw3O3F^U*YeX3Tr#a&z`|}v*ivc z=M*}sA$bS(GQVkO$uB2&9!Tm#+Ea&C$;nLq6@|?}-H$&Q;D)XG+OJ3}J*`77zXFbn<9KO?s z{{1Hrw`-dtYezF;077bXXSkk(FS=J?ayh zz{sb_^L9Vq&Sd|r8qEF*y<1(+U-%d3_;(qHlKtu_$LZ7(jJI|k?Rw6n-DTC;?!ZgZ z?*Q5fE6#YWyl%h2_H3z5og)y2`7_S& z!RU<-!YH{T&bd?6#onR4RJpTF@EGQ``7YuI7QnDWY-eN@;#PI^u7=E8V;KvP?x~{CNwb$AHkmBUkme-A?KVY0Gw_*1d@Olk6^fl@t-(-BN zda}- zEAb-pQ7MW$XzXZDzcEATFL4jN^#Sdk%p`6eVZMD=z;Jo};1xpLK7;KG?ZzD}tbO=d zo{V$+qv&k8^UtnS#^JaNtqb(Gxs2>TInRZkW;+7q&S2pyrLI4DQ+mA|HkiRoGHiHuiJiNzJk`mm=n~;EaAL0suuNAFVoMeDeSi`uQ8tyqc~1O z7JcY~i zh&_?Gx7^X`U>Z8I5A!j2EDU~vaaT7P@7+N9*;Sl=LPs&r2kvqn*ir{3R;N9C9Q`(T z)Y9jr!Ts6(=tXSrmIUgvOOsc&CG}y|na6FH7=O`t`uDjBedYDw{FmtG=w|%&XV||< zd+~k8!LT)KPi?tFUhqQ3F?9^{dtKH^ULTI%ZD$zAg#)b1_-br#uczrJNY+!|Md&E0 z@7ww@y`8>tyv<(D{`9#>KPmFO*mWA)RlPFp4{gA_W@qA038LKud7TqKp7C^_&bSZ6 zpp&E>V>J0`-fZWhF!^Tj!3KIfwUe3e+GXe`;~8|_XQ{h&k~=Xh3upck!pN&|kogPz zly$IjTaa$A>nQw%Kd|4Lyu-KyvuLl-%hcCSVZ2SYplwf**LOGbyd;Hu&rfL2Gmm)e z7ka%|mH7LK(YwP8-S1!J^O^t7Z-926?)s!ksW;`LsOC%1#z!MOT#qUt_3z)$kK?}R zzwbl$=URV%-v06Lz2#(AP5s@S6F06804O_3x>Z;FYXXe4C3pEYDH#Duu z%kll!;WvgVXCwN__lOVQ4okm}-wK*Vc4}=k-bK8~9^$>BdeuUo5T1qBz`fKxeSr8p zVdX>)h=kvJBNSvJo#fT z5T6gtqtF=k8SxlsUUiaJ@uH?V0veBfj-MfX>kH~~pfxyKYik-bGel4N62JE)>PK8g zAA`n&SF|=yK`Zbo@wcGb0FCFbkvIG+P2;L?%GdaXZ{Y8OYS=egTQ_s?JKoe(7oZXR zE&c4t0H@dKp?>zNeiyXw=Uo?*uf{f1uqr@4`YqYFgFu zG|fZ8qCcSnpj98LYWK*00vcmfH`ltilAzkaC-KXdp}$yYSkU?unm>uI;7dFHP@Q;KYvZj) z;5X0;C`Wrep!qJePKy6{dGg(iu7I8c&7-0ptf+mnt8fyu?m|_>kA5aY!y)>h=ue=v z?@{tDR)Sv(YgVTH5yHYYt&N~6aHcRFs;{ASL-gjVw09g@cSTpNM!iryg*MOn<4>xt zX}t%{;2P*r0mL(*dAKI}qR{hk@@!CzhUQFYEEYYmHhE*9+F3_y^C(oRF74KWRu|C& zpgCE%Pkj3m)Grr?)}#GB(EL#JhWfM{+W@9RD=0{7)eai3L#t3j+Bqm}-iW$E&>RWX zZQ-*j%^}8sCcU-h%iCPzAKq+8hT}Kr5|{XP}x4tv0RkUxHRl8`@b5l`Ry# z0;&t5FNrqW>bRK)m0vsb5~xC+rG82nd>xv5p>bOLE8^b}9&S(mNoaQMptbRVaA-&L zB&dE0N86s$RN>G%0F4JbY2UI#l_C79Gj&}eVCgQfs2!e$)*5z1Nc&BpxfWU(!t>&f8AN+~p!GR4ULK6U6I#2W88d`-KZok*Q1UJf z)3g>x6R$iRy>5i2xgA>1j?`KmfYwcD&W<6!@(aWRpxF#sHAWGi2+ak;1L6-Ht>Z?` zSn^__O8+&@&qe!=(RpSYXoQKji;jiX2GL$)X}35ut3YF_XiN047s*Q(7K_tbtruF* zS~8CQj|neAqtbZt&p_jX=n50GZ>)xDtLWkJ&?veNznietdacz{8#Ime&}zOBe-Si}Ln}O0`{t|ADEW@o z)@o>++)Ul7t(r#2HsXt+c@7#y-y?6Xu<>?u`S;0hAPj}Z0^ys&UC_)GsvWw{^n^xV zsD?l*P2%f!(*9S%4|h>l?gQ9bxJI}W8uy^uv77qPJ(_AFH2o~Cjn|+#VlR0PXx09t!@{G&LSKIrCdfMyrrc!~GFLBH=nW5+k>B{{VFk?@+Z!%f<~_$_(6za#It+i<`g{LN66%GKH! z2-R(9KKui|39U+ZsY}n(R9%09lcCw?XRXZ{!V|*rD$?~iis2A`2(6|b+BfeAYZTI2 z&4)(s!sv#cRv0se15O z1NdwZ%n^DAYi*;{DMf(3&tn zYc*`JrsX$`ykar%`?2`(<22P&Xs8Lqof9>Ud(aGeiToy0h=2VG?Eflx4d$YsmlO#j-S;uZ(T(%Y8&OsbN25sRF5Ozu~{zu z!JW+iJ-=W7FZw%>^1sLTKXcAP+gH$YGE9~4TgUqIeTbY*FuE_y?g?)df@x#ms=9EO zd>=f;1HHWn-?xpom(}|tt|K&QO}w>*L$nMr=)EBL`L z!8mC@q6IoO7zWv3?g{E*<@>m)$I#*O{ps`vX+OFpOnjNTUGG7&82*;h=rjWk-iRMA zoD|fbe@xO z1)oJb{Lm>!&>?dCos;WFjHdW;QS=vcM*1m)&XfHXdXUMb`^mcT zxrLt`O*=U~(9Rdp$#PzZtj_qZtjAw9jJ$Yn{OdB`ncw0YG9T%(-ectWa&{sfC)c^C zlC`l_Z)d<5=^t8e*(-HN?y3s+hlzP z`O=POGnhGn{3|jK=7VhCffVWkXTTWu`jPX8e;CJ^UjX^D6WQ)_wecgnP#4^V_FLYh zzrbnueqrVHacUfv{RdtoKkRea zJ~@A6^d>)PEHq_ZCYD8qR-@huqdw#r^r|el@<@L$--Ne-e+VPtu{dA_EJb7NumFtRP`;r$j zlK#vX<|B9%b-tyEXU~IMYT?(ei;k;@4wv)6EjeH23bU@#?m{_^wm(LlS9^Sitb^<* zwj*T{eyp4)`&T31b^|})7>v3DSIKs-KSrKs2e$K^9KY&o@**B5?tK!*K93*Pf_{C= za=eAf^Pd^UcBL;Q9=n}7-`dn=zd}6GPJE#p7b%~S7m|Z-G$Y?Z0A-d7AM? zwuM`!lb6;RI%dLDgSx4Y&~E1I=$4*FOT^2!PJ{_}jf;_5%8il;?|hSr=Y%e5QU&+*i&6L2IP{EEpi`EBA5QKd_$s zv}EF*UGddwbb`E2x_%AqbAWu+j=bEX&@RWrc6lEWdyf3mpU^Vrr^(9dJph47R8VdgsOx7vw7m`bm9{c-%mA+D5cj71~XkhVN|8__7)jUoYJOP%esV)nN57v4E(7NqJ3ol+>-gtkk<=g>&UZNFw=wWo7#?X`i9VM!X5h0`;olB zM$jS0UsN)A+dEMgoJ~8~@4^15j3X(KyrgIFgND$~Y}vl`ONskG!}s6QPtvcK7kSPu zv=j9$eoSH7S?EJN%)*b`Cv}Tq>Nx7`+sIol>)n2Zc!FFPn)n+1XBHX@*yyq^^Uanksvp#}&SQ5TXjx!^c{P-Efum6B9_c6?P5pGE( zo+H<#drd(HR;0bGZ_rt`dg&B7PnMfWU2R!kF;&Tr zI4=8R5Amsw({7%euaq38Tk;r3>OhLle~;bbe2JU{YQ)^ehoUHi_B97dG;N|Z%u#+b6|`QdAk-- z7b5+ft4Lg(B7QCyy>KJ*lD8P&F8?1OaTM{abox{B`eC-L&%xfrz2rD-DeEI;8SNMq zV3G%UvkNnx2)V8|^a8$j8hPes#t|*&pK_n0BMdlL&ZF_KQsjP8X z;CflFp7?nW>FW%wx=;6Vy)I6>8I+)3AOHTktAEeG1O5H$(GnhY{awrbQzZZ91RnXK zkVhdG@>QS?6;=VN(??{2wcq>-Z`o_3z)$kK?}Rzwbl$byR;;5t4g$Jg+w!6UsOf0;4YwSr4tcm1qs zI>C2in~US8HvOZ&&;B*%Q6Gf)+xpw<_+E9boBSU)|KaQS3b~Kxa=Pil9;2?%(qMnc z@87S(xckHZKOUQTMwsiIYZ_si>Z`7Bs~unUhf|?iKbm;!@o@VD;$KceFPMxDdI|1G zkoXMPWH$7itEuYGN6$${=RkAnQuL%1@DMaBzlBbE8*W(#`=n}`d!S0%hR%79c*E`J zQ}4sgJMjnZf>l0%H+K_Xya)Z3aLhjZE(c&v8ua=Iek#0u7`^ZaJaJ4@mCu0ho`CTu zq3w*OnRynDI}b-+BwpxCbf?SkePN+*h?mHLFW)4-AF3X=@!x>vA>q$=$ZL3)cpG8n zkLXPv?Op3ZJy=-N90AoD;bCFBBE&y~>UL4;&XvGl_7HkjDflWhPn044^uzdrgqMZy zKSKNiVdwJb3&OG$(GC2h9-8MxUljfLqvWlvOx^$+Iu5FtRnWbw691qY`ZiRBYoHqk zz>UI}YoZs{(p39}3m->cs7?H#I_SZ|bm38Gb`K<;1kDHQqc01WJc<6U5eyC1G#_t_ z{-6nL+!TL>(AU8KNjSS1`o|XVMjP_CguNQM1F!Wb-h2RzAB4YbI5bAWfEYLzntfl;+T17{ zIvPD3sxz_ZzAwU0pb8v^UMPHg0(#g)O?3>KdnV(Ld?u5Kp>v^{wimr^ADnmqE)@>>5PcG=bswS2eJsB4jdZP5s<6iq;;%>BKy++9 z@+Lv^d_#P%M)+eJqrZ9@{k^b86LhH1!ys=9R7IPiUvEy{u~z6xZQu{WI-%$~!>pGtq@#g)M~d%^_Yf5q>d`_}2?yuJDD0=;4cC zy~VJfFzgMj&DFx)ONoCd^jVJ1UPWH3x6sAbz$3!W>xft01Yg<(SAIZV>D}laP*vK4 zUwI$weSrAN58;hN@H=7sH1y3Au;QnhY9%zci~dAtKS`e7De@)=BTnOwg(~YDy3~2{ z(xCak1+?#HaMu^`2Wa-Xq_z18RLw4=y$`|mKKQRd^Q|)IV&&le^5ku+ zfUZ*!-9T8<55Hbj{HLILPxOt)@I9(S{{UE_rl!iQjShMOJsFy_gtY>RS8G7L4>X%T ziQdr&raVpF?GWhu3~b(nc=2X%X-oV;q3GPU@Nzq-+QW+B@RjG_+z5E9tERc88+vsl z`f7K$xCd+!MSPaf+b(%fE$&BreSiE-1JEr7qPq(RLA84b{-B}cIfNH)^U)(A>WXzf>x$CG7hSdBwKkcY$iwHvFbL;FJ%D-xapp zi$C^5{C5wc^Mr@f(2YKV^A3|Y^$2#+FeMbGgf?-OVw6{nrI zp?OJo!dv@Bp9eM7d(gCmSE04HBzfB&qMe;Wn~&D!9O1jAi2n%9ai!5)%V?@DzU03M zRnv!w_kiYPXuKjirJRl%6U$RS8JeG0q@C`5_^T>uTC1V@T(n1J^2$TCQ~00_e-l(| zs%mXqgyuAVwDA~hP@R0C^&HwbB7Ug=;_nFe)zn%&R*U*x(A@Aic^}o*wAR%@FMEQ# zVS(uF!W+;kRFC|v!fVj@3YxX*Q#Te`&jg`23kx;W+E^Bhe+8=R(3;v<`^qDP_{Y%L z^9*&>o5FFzH=(&+blYas&x6KlXw_=1eXC(8OlzxY)@g@62i34=wN|H~6%|H&TzmXy zJ3u=$20?3hN9~*M3x|fI*Fx(HVcF-jZ}o;|vgnnawQqdVjk-RO=tmBP_6v@=g=zCeDxQJU%nXgoa{y%ZV;p;<4M{HLILYK+#_ zx6mj(R%^37wC)H!U&Qx{qy9~3wi}1P6TDIABu++CTXgx&{|}YmnNM0 zGJ5DMhb5XKP<&L9@^t^m<{>xmp|Np=y&z zeCRxw28|~3wYEAz<40)rSxDXp;V5XtLi0uO?}@Je8u?k!ti4EU^ToxQ#*ad05`Nwr zuxqlWbrhOqm!ZoG_dsPqv*vQ*4>+iw2-P)cWv|e_>hva@3e6fT(anYDpc%DF`^IB$ z!AsDLd0T61dWxpmXpN?-zLvax>)=vog|0_0fTjal^*3tY`fwBR!Ks?6+&gg0X8gYI z!cT=BTZlgbjk>}q(AovfN!xUunFWnW?`duQBs~2-ddE&pBVZRg44Ti2-UE%Yd&zqo zT4(lYZRYLQR6ia-Up@#Arr~e@2wf~))4BnT_@np>gzb+JZ;+v>xWg9~dKbv-pzJK^eL z#NUDHcnPholhCZ;gWd*}Sz2o&-WR{^!4qn{CY$2 zCyqd$gU0re3pA~XucHq_^QUC=$z_^mo8{!4g2ruO+zNcJmE=9JN>fEZBl|7< zxo<=JYU1h8x&Y0r6ykT+!td9Sw{g9u6}kz3%R8FJ<6F=#ZG|cC9wg7_W8y6iqt6|MosZ!MXQGFF3Y%qVns1#Z?l_}qInSdb~)zviS2f@w4Jt643%`~m#HZZasT=ATy z>eNZo^y>n@=n98D4_8KOT1Q607sqQF!zXB}z{#3s{a5fe&p>aSNxbx2{H#R$9`oVe z*YWQx*HlMVXj&6i;rCgsX_ndqdvDiNZ|~5wdhWz;v{%!Zun#@%0P(UP;ZKFusKZ*T z4#y>)1;d2N7s&UyOkOZF77IVPgWtJCXV-o=tOqpJSJ2w|kk;neGBElnSUE(~sAp(e zJzJsu+M!Rh*EDB$#Xr;!fAB#3OVQ{aqczp=Y54b&Gz}{meRdT(b<_WA?_9uhy4HO^ zX=rF@N|2J0x`u`ZK@d!A`nEeH)lQ$ka>4g{%&g@&f2gr=mV1gXZYoz>7FZR6Th zQwQHq!_zM8 zL~4&ubf5xZ`Ma~`!-_qdXACxsM6Lg;|KI=ozWp!!{HXV&l*fL4wA6#0xKxe}eDK%D ze{S#p&7XVplOrnr9*BS6m2O{u|I%};kNo_o&42&Tk9KIusajg|uPc50I{d%;^P~Ug z_T$)){|@^xUak+D|NiS^)B^S+sMdew>*KF$EB<@&4ixV|@eUO4K=BR~??CYm6z@Rs z4ixXe|Nahmm3pTA!!9$M!hflwS0H&+@?Wp}m&X5R#l4>Sk6BOT6~{h9Uvbot|Hl60 zwxm9KqvN(%&wp112x#F8Iai+Vf$Z!zSp zf4#3g-g^*re)}Ek;~Q{1cAct(PTTBQXXjIriTek@(05=={6DY%`?y6MSDHKhhdNR3 zYB(QfzD?Z!2iB3CiFPSTUDOMX^-&kd z7geM_pgY<=p6G1!%u3{!PDHy#(r2Kj^xOY5*Gmm_z{k+_SL#!jNu4y_yAK^zjl9VtQ{!ah*VC-0UD6ShI)ZJ{(eA=gFpb;jBK!{of$`cZnEV?B1? z?62t?u-vhKMjSfcsiNKo?_lO}zC}E?x?_D_JbepFQ0F4oqqDE$`m9~-YuIe!!B-g< zI+K10^&RVLpF)RTqt0WEn_iE7Tp{Be>jO5Shc1BeGJl>NM~-MOdHy8G`BpzT9#3uY z(zNFpxQ2cg(;eqaYKnGm!ail)cFc$6(bq~Pp1BqV1VF36u|7%G<#~c}`SZzV$v)ZZ zsV(iaX#G^D>oQjW9EZI(r^-Ch-%^IldFyVAwZ~ zeS%XtKWQ&=eq6=7H2AP%U2HY--S-gRdx`lH(&+11A4Yq_(9zU+HAUO&ihqr`^^9YG zJ8#px_|NmnvhU$5>08he`tEU@C;tND{XT@A;`JRk1sx^Nfy@Pri=5*)U&bNXk1LLG zdwm&R#JkJ$+(Vv!Zqw6n{_WtMnz20?<*G{L8JuW)cQM<~0a!d9j zy%+hAH2MU6;MlMGIvIa}c*r!UJn+D==gD}Ly34NQt-m?W>m&DHd}-pQ+`o~(bA33M z=eWHs@=@YNo>LB8`-Wq`s7lmFJ_+5XGjH&G>f%yl9nYXcRBti9K+aEw zcqPYFBbF+Ovn7O88)b=>nN1k&5 zzc4O!2KCWyXtf;grj!x%50>{rLJ{{%wp>5a{ix3nPxS2B)Q28&T#r4!R~Gr1^8BqW z-)UgyZF(-CPDwwHeAcJL3)-%;=`Q_j?JWChYj5#lxAnk@^tW}8To1N(+3UFeuzTp3 z9x$wIW&M6H>Ovnk@w`lw?@9=e_p<9$>hnFhZ%px|&z1L}J)g~p*7Pgzcbwm<%D$U* z9oL|}hH^+(bY4V*A8SakbVg@_vz^&|u zeH@XAoG;&T)D^6vpZ9ggaWOx#FaGi!AyJp9v*dW(6C&Po#_Q;$X7r0}O}|_6{D?Y7-tLnjp2vINr!HMQn{x~K zT;Ub%czhRg7#}NM<4H+)ow>y^-tK!j!SOv5C)c_4DeF2R9+B2jS$}_-M?A0Xb@|(# z({i7OiT8b6rsMf>N@o5@H~NHJb6k(TuG+G1sZUeqnI`kMcRbF(_ZT1cBkQ%to0X{Z z@`O%h9P>eK(LwSZ5%##;Rm9`v`IdN>d2=&h*wf_w>(Vz+@Smy*=Nfbe8W* z@Xp2)GB%TW?7E1S?3cBRJ{dmrANnfC5wn=MH3hmRFh0`Dao*VR=zv*xReJj|pFK~O zD}9&Vqb^7030uZK*mX9O8!}Hq2K#$UzB4E31NPHH-fMP!R%`l4`#bKBop<_cYoONsH%?Z$_r=R-ts)ol+RVxdFJ!$NPVF*I#)dV?S3hn81H(A_1kfq z1LZp(mX79la}UwSdV_lZ&sl$tc#vMK;dq>Z^85{+#yoak-}Q_SIOG`5c!z%BpF76w zaRrf%{q4AGD&uVZD8uBtMeKOtEb9I4v5sVUz89>cf3SRRvFk%0`5bn67InTqu}&Ws z?%TvL_B%t~qxSfe{=|LdJ258h^wgh6-2)ttGfn<`kUh?lf8P!)$#uK;Ecb=WF30ng zQ^fPeCzo|)bR}OP#}!+W>nmh0^gPJ<4Bbh6U^~Zk3{66(w{*NNtt@ojKA|VijT6SF_U>dqoW#rAlItjS3FETdS)b=3jw@*x^WBb-go_fd7+J@_HbrPreMzIM5VkKGexwNH^6?SWs>3opXCKExA!p(lQ7%0Ga7*G|L( zy1*ZxF|RB6PCbaP>jk?FB%Uw`T@jx%r7Tnts0!|07i;2z;O!XSLY zj5)##sN%A9y=s1xaeo{`_YfbkF5)A$9U7kqD~gZUv%>d;8=!GMhdzD3gQZWy--Hq0 zYi&HAOMI#Lj-7%=wKL>vLS+e;i7(kzVNf1*(NN7lN52yJurV|`ithh2@wxbx85b`x ze&1!{dE#GoPT1~u;&E4~>nJ{DLxsnoYFkL%n(Ndx5pyXd^SA^f%+^^L?=tm)I_8&)K4LN!>JBHUMrx;s_iE8;WOz=LsP zh4E1Ju0i}Neq$=24&zSNg)QnsOZXEsj0VJ83WvX-wQ&ln#f{J&FH$$933{vWN(mwA@65P`UC>{4g}K7B-O0D;LHrx2+V(`x34*=w88a@5 z-`LzfjJw_!{?w1U1q0z1P<<=DWO;+B>o64FgNDyA>cdA9f9_>yjU!%XBHR#3d~g&T zf)APU!-vc`CcHPB`c7|9=RFSw&nLc5`0PS-=Qp9tBDiQVEdMt+RQ$)B@gGxV#D^?E zSST!y51Bf%LXT6E;^6|}JHqzw(s#xhxMeLY5;jgmA6f@XzptrEZGc~Hgw0dnEa96U zX>A-3f3jiQ$ghIN(e3D#JK*|{;nm3K-JKhe3dfr)w1N{pqeSZ zY3>!tkEjAu@kdkps-bT}W8HI%KUf`(^n@?hqHcd3^tbil-Uh_my?{O{erOXL62B)t zXdgBvAMZ;%tr>i>1@V`m@@P$bV;kZ>3qKPdwAgm!mvtaNq7%&R3@>#fpW7Y1wFi1r z5WFNT-Ix3_{LqXs1IYh21ePC8e*7p{55F_@{5bNh#}hw10sZ|XxZzdezf6X&;!kGm zhpMmmkS&`|zWNNfQNbI+7sOZWE`DLg+GRSgcDxPG#ZiA(SZf73CLSgxQda;CFMPs` zo5|$sr@&08J`#VhHu!=WOLvkl@d@m@2Yx2}N_b5eznA=mPvM-;G*x2@hC?Ib3-Ynz z6ZWmJ1U_M^68>PS^I__43x7C*e&ZPZwg}aa=vH~;oi3xd{*HeB5A=Xr@Y-GY#XU{4 zZHXZL?~ptvP4#3cO=}A@?>vR>j31U6^fWrD0`(tO)HGVS!Slj(?&P~Z3;n8TTAMx5 zKiAYW#?*p4>%rk(nwC?2O%>p+Y3_zb??&X02sbvS?t9^eX4G#L9*1gY3+jgXlTU=| zd>i6B+7f>OUnip%R3l6sx5CAbse4bYjpuq1H-j|QoM26}Wk2#qpm|I9)&TN12Eyn; zn(8ioN@mp|jN5@9lHoR-{7PsfjX>`lOsj&d=_>qq zCh^Z^X{v^^srOT`&0M%L2A0K#M=g#;doR&6ZZ1W4TLG`fYpR9`XsNP7Rs>Y5p?MEq79;N~#+5k^$DV{U zzGK|@)0%4C8RA!egwxJynjhg?Vs*PnT>MMSaM6>XwE*7|GwL_$TniZA1X>?JYvpxM(+Uwz6ut+Ih4-oZ6dEV~L@T4WexEif0WS#Wl+@ZP z|AeMl2dbvf@^B&_TME`MP2Jz1`Wjlc56aI^QfE2C(q&*3sMbJpURnBl1+6+R)Wr$A zmebmZgQ|CV>W4f{-EC;z##hC9?-}a*ioeP)(1@+5^>w-0$gXiX743z{VxGj1m|k3)3<8atZkym1Mt;TflL`*Pyjh^z@cGZyki{CumM>rQ@nc4OS=?!-UqLA**3 zx;8X^?~Qiu3pWI7TIKp_8a<#|-XC3afTr?;<{W6%8GNk-G2*^vjdcy{2fY5NOPV*0QO@Kbc1Tyw~6s;pNx0w#3iJIyasA zmuJA8(7FeW`x37_Q|HZBgqgF@S7$SB;XF<2&;m{4_CoZ|7&vy3rkd~;yat*-h|S;fWOV*)7o6p=rGd%`0idw|t^$mOeoKJ*e(vqIYMJ zUz?5YcMPr(&N+q7gsS~%>Zd}h)EV+ep;UQVDu=AQqU4TDb)HIA=;Dz6a zFDuYA%U?rpy$)k*2Q?@GK>H%+T_5BOwn z7zE9mPz~#&<3^RfnreMNP4g_YRDZ_Z7pei&tr4T`N-4nl)9^Tp+9(Mcsz! zBzU@rneP(GIU1yU&@CG_)4thE?Jm+ey210W+G!8?ndNkvnov&%S;_GB|7KRB^ zp;bPHerd7r`Q?oFfu?gD@o3?DE3~$5zoTj1j3@8=F8ak)uRdh_lbhfKsGK%4PC;vn=!z+f^MtC4@GWRm+D4y0 zp?YZt`d4AaG_95A$MjkDiKg+Luu?iYdN=v(edubRYg%3QYpM}npwr<4A4~M)gPO(( zXcj^1Kqlkd4pG-Zxbq0Qeir#n&|LQwdKWY+e66)H1e&Xb7r!AsH5*nr0W+bw`=r(? z3tDfT(%R~u14E#BQMJoMIlcu^RCUTdT95`6nNSn-Oc z>2p<6&A5(Ue1p8_EjS3O=69&;DXf1NeGi)6f1+1IBSRQz^ws|zW=kne^~6(}Mq?M) zt{mLus%Z^;23_h|O;ztX^fciyVf*SjZZ3dU+ZxmbK;>IkYwN>$@D5bxy%~3{5%E1f z=u6PDnrUsMx6oAg@GG-wwn4w$mi!uM?!~{%sNPY>ts~IL4xs)Q;V0s6=GmFLBH^K) z^efkg_}RX2FTQ8iv;pKR4u&z%avO^F7*2i15t_=x5^kr}IAx(&{Ye9TZYfY<+8m+Cb zQpjg+MW2Mm_q)*T^EA!;bI|F!rpoZ`r$3Lae+jk-(zNCe6^;hs;Oc>g`*D#XC^E1I0T~yaUBMP`m@hJ5am> z#XImHume#w+#LR$9oWcG5A0D%=ac34;p{0ry{O9wf-c?RqAJfj^o!m`JVAcn&YXg_ z`%D`{eTX;lt~vaEPvm^!YA`H$i?}^5rKV#aJ8pAtTk>VEF>Y_Rev=vy!Y+T$kG zKo<>z>1|a*+>3eqJ41V%&4QZr?ViQBwz4ieU-B8^K62d@-DAE}l^y3X_i=n%F2er9 zs7vm`yzVWT=S(Krc>?2;UT|EOXH)7fwIW{72@Y-v>kfv#uFyk#4(#<_eNFb`Mfy~I zkvjJp=)pHx-|TYC|LF?ymi*qh?=zf7D+12^ocw}gFlaXnoyR<0z0fm1fO9v&Wcj`F z;4{pZGM4%J+(BR3PhHwMw9i5EX>y)Z{mExMa(wo2nxn{%Ez7vwk+o$z{Fv-t@JGaN$@T7& z!T9aXVfP6zO!_tXp5v$~_gkoour2B#^)3gpF7^M-aa3rq)*}^>J!dV zclk@kr8VX_?0TDVZmd6g9pm;#qV0S}kz8*xh%fnsy8b)iP&qHrJ(wruF7<`mneR+K z>xh)&xBDi`duRD?>_?RNFl>*5xbrVdyQHFPh)u&5=H1<^5_OSIQ#C`2$}^-zo#WL|B0Q6yUOP&Q|`kcc@KoUl22(5 z)ko4-d<|UW`4%b9^TOsb&rE2@JSk5x&fd4A>fB$h^4_TaIoDO8JRg%fFg{&=FFdgp zdZ?_!Px4ELaQ%5TAg`)IFB9I9&u!Tqncue_OqBP1l)V4;%5yzTe*fO13gazVPr(j! zplD~A$J2}B2%JTJi^=t3A7{MDd`smzJRyEKQPL+x=F8~7{S_*o@9pEve2%_1?8o&`yA1RA$a|`G3whpDl;dv8I=i>$x`{r`I`gX0C#8ksJnnM+=C|Sgi=WH5 z2>E;%Ig-9P-C#r@jDCqe0doJ0pUV1@<+#IEQD?;w&lMk@+CyZV+y}vVTn~Z$srQ!i znJAwZT&;|Bws9c9d(l4khvQEo(S>9NV@2_&gNh8oI3FV@ff+E z^5wp@eedGry0|6RQSQt5F2r=FzmM!wS{(guO_1~7mcDWJeKdyaz^4gu)r-0;+4t`L z^b5QMGhTqX4XDo<3j<`{rOVjQ%CYplDEFmXG~?WFa2*w#k@d)ViV;7r!bz;xE115` zf1^*by#GyEXUb?Fa%!__SN}*?W)kVL!LBq+iHb>hk6Kb8e5{ z%24q+DwOLXR`zv49_zd){(SzwGu}pVc-GqER`zsLvrm6rW3&pdli!(e$|d7L0`oM2rZwbAKv-8S;z zeuQ@t2|bu^=2-eU zFTrOe!S-XT$90rkpL|FZ^@UN4PoBwq(Nh@bEq*siFQNecb#K3MsuhdgB9F6a5}?eFXQkNn=j!LB9rCA%Kc|N$BrqE ze1An)buKJ^j{WP;vFzE)wmsLjqd9Y543~# zO*peXj1`7<*V?$>4<-zNH%3yIgRh(M_IPxI39w!`b%!URcfN|AHx1T{B3>{D{iXQ1 zReFX!<0 z@J};d5dXAr;c{V<0>+OMcEMjwB?}wlt7fDZQgx8}~ z&}H#~Qx(JyZm1JJNV}oY@k#2+I1?XS27WJW^%VK9p{gZ*a4p5}&2l6EOjX9U^ne?n z5mcT0c~9bJYQd=5#J{PFwtd|?dlOd;VK6>!YW|Dpuf)I2uPNj1HHTk|pWEP;=;f_o zX@6M04J_4G)7aS_y`dv48$jHn6YK%i&t1r`?MnQbFrpjrBH_p$NGw!f3a0u~Vgae18r$EDL1oaB4--UHTh%bQZsBrU0;(tO__GR=5;n49~ ztI`wTbHW!UQ8(cg;-L}fF5(+k?sek(XAu7qs;HUh^0UZ4F&Eui=o5|Z0o8Kh2Jw-* zE5O~1ZStxZ68 zdQbWar>!BrT$q+b{r`8I$eZ{!^Z2DC@3d6(g z__Mtv+@4RLR_Ebn@o9VZ7qlNfZR#{WZN`=Y`u%W~{FZC5-3_=L8s%=Glb~^2eA*_L z#aHUnXW-ivU>SVY3@`Cpn<~C)uhk&mtv0-dpPKp#pER|-3Gr&pHH})Wp*wzPst zDqL7#Qok<%&Jtg=_l1@5MKgNzqR(&QgI2CL`h@V${^&mj(Wk^<_{31?Ih=gNNZ1&E zG?f}kepVPfjX#>&8bMvxDew(^(2OZj)ctK19IN1eQ3IbSk0&&Z zkD>Cx$4#w-#@|X)mky1yPf|a}8SX2iY5oq4U&^9OyJ)H&(8w*1ZtSXQ)OiL5LA456 zDH5NEPn=pLOn}y#Rq2!I0TZ5sOKNDEZdcN>}Gfh>^Pt)uHjnz>7B)YIAbuYJ}t|C5gYJGd+=^ZqUNiU&G_0hD#1{0qr zer~xTTAOhrse36D`j4S*SGcCNXd-%01iI&B^t#vJ(@}6a{%vad40Ka`+>HF$=q32K zSvTgO-A)Ar{_ zI&OWlRnxq#UL|(8Hmz{1iF|s`}reJLYJr zC-J#c^`PMk^9{Yd;o9(~S3v(Z_` zcZSv*(AY2W0q3Y6E!+pyS!gEzMBm%csF#lpKCfv8U10niq0`Uk^3e1bE)pJvsv5q1 zsuMIkexYA;XiU9CpZ>o> z(0u+1`OvH6-+|_FXx)~0=W9Bz?g$qbqCbOHi|h2MiNBzgDm)3zwYTWsM|=hcKx+gv z!bGnUoho`KRIYaze-*0F@1jc;X__6NF;?_1!pqQVe^1w0Q=wVK(0)d$AvD}dXl*15 z?+DXM>bNybd+=2---T4m97p;{q&pXgJf z|9ncSze3gQ8RD-9pRYju2xyIh#*m6Su0}$0 zzVIdd46VWyM+&zIe}l$~T8!HyEQc?m z*$k>t(3m20t)uI$sdX8bQxE+IG;4Tit!6;8RDJT3p!u4&*2W>>Y1wUn2iTC$!laPKD;kK&{Qm-Ko0*&1OA_Z|OE8< zm_8nDg_9pJ5&i@Xw^zx}nWAabjf82?@|ueFoTh1dzNTrle_d0}hSp)B_!Amur&BLJ zg~pf}S{t82^9QK9<4b5M@h4m=`Z`q2XVIsJaISE`Z2DzFGfio2t)Bz0LiOW9bmbW8 zrdpUDjx=dO~Zh=mKGdMCz{y=dMHF6nd{mkN!Z@`fLMC zOeS9XLrv8WT5m(+M`+#_U4N6VGplW8d|(RWZbM_uN5qG1fpb&gZfNFjLqD}$(|lTd!X6ILJxK6^0(xapQGi z`7Fk}d_{c&;gqk5|8SIe5j5XCM*OGanr82lwDrc(Aolx zLlQrqtMgXXAK?Bx>Sv!rxBp2~9XgNx`e#j}^)JM8FQME2PJXzs%pc_4g_EJ-T0p$D z@BlPUUt#>$SIM7+#-T#=AJFQ2OKWq|9qNbO)ih=mp@-gsFWuKv!%Ao$HFJ7NO=Bjs z8a#ncaMHArps_{th0^5TETgGDDvRFcqG?{R`84^#72vFjFjiQ_jrs;vG>wJsn%1^w z(a%#hWrLF)UP3yx3TAO~}aAia4R{OvzjWyNHCh(N6rn$K# z`7hcLpU{bT5mXbqkk9X?X%zIJ?uWi`7k-y!-vQJO#^=)f0IGl?I&SV4z8H#bHir7~ z!g=^xT3+LITuq*!X)KyZ-D|UncX@;OQwr9DYBTzq zR=uF{3p7_R)p2XtGU{E#S90Ck=x5@{{|JpWE44OG#>4CHQuk9L%t<1?R(u`56YkhT z+-0k#wP^?Xknmnz68(wLA-Y%`YyhO%Iy~M4@%?fcCM_Z@hpCWmMV{av!bRd>!xW|tD zZ=kuX9`%E~sJj4_i!bq412vUTPxMp4a4f!iR)d$dHm|)3U!DvvOwqJndIRkh3ztIc ztGCI&6Gwcl_~g~!jGnMnQ#~Pmc&$H^y3b)n{P3)dgVgQD56}1$KRmVjB=MVhu=+(! zW5FMq*5Cq7^S8#q?H~MrjY?~+X*CYgH0SowG=3fmUmi<*%v?=%CPvfhxSV|TO~li7 z5O1CVS6{PG;XGz?Yfa-f@t0dY4m}}BQ&kz#U$3X*5>2D$GW4G_#y*Vu94=b$ zki~zBcc6F&ig%!R2a0!~cn6Aipm+z0cc6F&ig%!R2a0!~cnAIycA#V+zvudB>x3S} zy@ojE4|YR4kAhkLuyh+({aI-DyAg)I(;J@a>zKFeJWb+Tq<`up^t2$jU_QJY4()N} z8=~(^-KwVO!5tjyGG0RO9mekq4{VPv>jQVb2BUgGyMLVY3l1ip-XD5NzvvKjcss{& z7vwl=wjyqhbIdRGbJX^H+5Kzk{|*u##X9Y{XG4Au-qz0J9rJcPL5@35_VLq2^!t50 z+|rnNv;EO|UC{ac=y!ND`l6iQwvSvN%jYmI@MY@m3`I{K4zmUm_uN50=N9PkePD8L z;wclU-}@#!wFKJxSk!=e`?yT;VMyymUFO@+&fik#V;vb66~R2o2c>T~bZQTSYErj8 z1nsd3`ZZ^tgJgZt%ZU5dg-J8%8xn!;-xs?36HgckJ^H{TId0$X)UA~H?R94kAs(I$ zFZY8H(_xaFC%e8?Jn?*~a~i=sr{uoKll$XybLavu#pSWDmk7Xyf=2gC3V z(4OBj3_Yqdee##0+wNihJM!FhlKW<+tTR&Pi`~LJClVO9)DPw+&@WVuyGbD0SLQ!= z5FZkI9UjtusqA}Pob(f)4!h1p_VZL3`UUo+&PT4N+*#=CY2?Ss_3iS=bMQnW>t3^p zb*TE(=S^mOb-C`MrJp^2$|CY%mGFB=9f*z-e~e&xUYWJ%mnA+H9)Ckeihtn=@uSF| z&G>*}tY^Bs4`Nb?yU9L>tYdtUJU8;?dh9fZaa&x-7s&OK=Igk>(Ou~0Bj-CmmvPDR zzASnRKMVUfeFkuRLpw9h<#o8Jo?MsmeDag?n=qdHFHi2zfO*tM$nz>iuG`p9;(KSZ z-axr;l4T!V-y~o73e21c?R6O+Gptb+hjKNL*==f zwVUIPkn6y%&#uG%+*;4LCE}A~=bi2mS80yxQnKH*<#>|1Q@2;nYusEJC+kTP9rQBs z%(r0d0oGZw0zNtwPO{(mavlQY{=O*JXQAwKrhPtqsZWsWszBx&D%WS8?DO`j%;Vi( z=9SMmTV`@xx$}vC`WtoUT6>`|kQGzAmx9a(+s_!u+YSuTBlA^Ox(#Mzpysd}Id2AlbJPG1sG_)&ykFBP!>o4e>a9AMwlDiCjD;3&(D}OJ?)0%i>4~{cX zo_C+hdo6xBd20r~M6R6}=a%8vFYhwi?sD~!#B!E%0s`iPIv zkHqbLi|tK3e>d^`u3V2XU!s*f_h!oZaBD@qJ>I5g5%onmaNsW3NA4fH&Nx8)?^^y6 z`#JGoSx2;--(%v}nK_R9rgPj^IdWWaJy=h|8S-9o`~khu*>~WHBXG-D_RViT*X2?< z58Y*7g5^GP&!awCKL4gk-dB#(m_vT%Xs(}Jd2V^OBW@pG%=_re^4!YZB7Li{Zcq7l zQo8+qkn3vbUDg+AN&T~q`yRfJc#wRq7}yeR&ztZD`l7s7)8%@;EdF~gavgv6z$-kS;q0~DE(l=5*XV}kC zyRXf{wfJ+L%i+FOXVDRI-2}?{vFk7Ik>g6{dbZJ+~0Q0Vr&s(|TyL)Q@{oLd|?A4q)s~5+AIiGd7d@JCbAM^?O(r0qt%IDV0 zEgj!mp~HATgulgcBoQ5<@0}PD*ZxcLwjEmT2w`zV|O_o zS2;cpd0&N;WZsNzoM-DD`kWaq@8|E?$8dR$C}*zo^w+72k>}{;aU4g$3g$IN(XWE| zd=|~;xGedc=hv0I%Ns&@j-SZG_bylV&;63jBhR5!XZi$h<9KqPW4^?@FycEtm!ze$ zp40Mu0ctYGnLC8}*01IH93#)|fYRjceT|8wpJz1lWXtElPuTCo2wkTl zHVu7vU&Z&h7u~*u@uX4Wf$)3ye-HjV>T~tbR_Ng$n@@OB-ND%~ zA4Cn~3B%8@y6L|fH5b6DAHj3^5`UUWSG@s8Mav={q#yM!axs)1^n_9F!Mx5!iSbjz z$BpzH6CU|`#2BNZt6>a!khjO1uZ4}7JjScXn3qRR8WY*5!-=o2=yb?a8QSjt(Bg4HP3I1Jnp8U$xi2C&(%<*8VfAXhOU_b}MeV$}W z8pg#^3m2DgE#d5=|1@b_Qx5hoF~7mCEch?6mbR(iKVnUNwpVzpu7&Mc_m8?J&Fzi+ zN33a6+hpr6G2i_aAMMw)nQgxHm%1jlpVVJsjsJR*{>eW4{~B^U{||QIhVwVA{;_X9 zO_$bTYyJ}R^|B51{(hbKG)wn;tgcC5*T-Uw?VIZ_{rvofJyzG!8vbaku}@IMqcNWr zd2=3%HMeivf6URi>EMNr);0EBAM;qOS%+1R#hRSk{8+59{ZRPJ9DaSaJXY6o)z(L2 zO?=iLdNkI!#ipMgi#4|&IRBWVNz1hV$DvwJbJG*X>zF?F{<_d{}0&k B{e=Jk delta 695 zcmX@Lg8kPsrVScQ%vGBiHfu9wvNJz8ojtj5-O9-hTzg>bNxU4BH*haOaMvK19Fr6H z=76~$J2!6Lz;D9C$g=sV^m1k()hizlqTVW*v%*vWjpf?>NP|fbEUqv2!mt>Nanj zJr}|VTq_Hf0w$Wjj29;7uZP9LW_5N3P^3zL2q0i&kbuyP5CLW|kAZ=Cy7qZSO-=>vOD diff --git a/src/layers/normLayer.jl b/src/layers/normLayer.jl index 8d944f8..39c401e 100644 --- a/src/layers/normLayer.jl +++ b/src/layers/normLayer.jl @@ -33,7 +33,7 @@ end function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T <: Number} - # first organize Y with channels + # first organize Y with channels nf = this.nData[2]::Int nex = div(length(Yin),nFeatIn(this))::Int Y = reshape(Yin,:,nf,nex) @@ -41,11 +41,14 @@ function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=t dA = (T)[] # subtract mean across pixels - Yout = Y.-mean(Y,this.doNorm) + m = mean(Y, this.doNorm) + Yout = Y .- m # normalize - S2 = sqrt.(mean(Yout.^2,this.doNorm) + this.eps) - Yout ./= S2 + ep = this.eps + mean!(x -> x^2, m, Yout) + m .= sqrt.(m .+ ep) + Yout .= Yout ./ m Yout2 = reshape(Yout,:,nex) diff --git a/src/utils/utilities.jl b/src/utils/utilities.jl index 4769d25..5a009a6 100644 --- a/src/utils/utilities.jl +++ b/src/utils/utilities.jl @@ -63,3 +63,30 @@ function meshgrid(vx::AbstractVector{T}, vy::AbstractVector{T}, oo = ones(Int, o) (vx[om, :, oo], vy[:, on, oo], vz[om, on, :]) end + +""" + mean(f, A, region) + +Apply the function `f` to each element of `A`, and compute the mean along dimension in `region`. +""" +function Base.mean(f::Function, a::AbstractArray, region::Int) + x = Base.mapreducedim(f, +, a, region) + n = max(1, Base._length(x)) // Base._length(a) + x .= x .* n + + return x +end + +""" + mean!(f, r, A) + +Apply `f` to each element of A, and compute the mean over the singleton dimensions of `r`, and write the results to `r`. +""" +function Base.mean!(f::Function, r::AbstractArray{T}, a::AbstractArray) where {T<:Number} + fill!(r, zero(T)) + x = Base.mapreducedim!(f, +, r, a) + n = max(1, Base._length(x)) // Base._length(a) + x .= x .* n + + return x +end From 17b32398e87180cffd4f52dbd6da8c92bcb8fbb8 Mon Sep 17 00:00:00 2001 From: davidbegert Date: Tue, 13 Feb 2018 09:49:59 -0800 Subject: [PATCH 02/22] testing --- examples/EResNN_CIFAR10.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/EResNN_CIFAR10.jl b/examples/EResNN_CIFAR10.jl index 9afcbbc..cd49cb3 100644 --- a/examples/EResNN_CIFAR10.jl +++ b/examples/EResNN_CIFAR10.jl @@ -25,7 +25,7 @@ getConvKernel = (nImg,sK) -> getConvGEMMKernel(TYPE,nImg,sK); # opening layer K1 = getConvKernel(nImg,[3,3,cin,nc[1]]); -nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true); +nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true) blocks = [getSingleLayer(TYPE,K1,nL)] for k=1:length(nt) @@ -40,7 +40,7 @@ for k=1:length(nt) end blocks = [blocks;RN] # change channels - Kc = getConvKernel(nImg,[1,1,nc[k],nc[k+1]]); + Kc = getConvKernel(nImg,[1,1,nc[k],nc[k+1]]) nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k+1]],isTrainable=true) blocks = [blocks; getSingleLayer(TYPE,Kc,nL)] @@ -54,8 +54,8 @@ for k=1:length(nt) end # Connector block -B = kron(speye(TYPE,nc[end]),ones(TYPE, prod(nImg)))/prod(nImg); -blocks = [blocks; getConnector(TYPE,B')]; +B = kron(speye(TYPE,nc[end]),ones(TYPE, prod(nImg)))/prod(nImg) +blocks = [blocks; getConnector(TYPE,B')] blocks[end].outTimes=1 net = getNN(blocks) @@ -74,10 +74,10 @@ pLoss = getSoftMaxLoss(TYPE); objFun = dnnObjFctn(net,pLoss,pRegTh,pRegW) opt = getSGDsolver(TYPE,learningRate=1e-2,maxEpochs=1,miniBatch=miniBatchSize,out=true) -W = 0.1*vec(randn(TYPE,10,nFeatOut(net)+1)); -W = min.(W,.2); -W = max.(W,-.2); -W = convert(Array{TYPE},W); +W = 0.1*vec(randn(TYPE,10,nFeatOut(net)+1)) +W = min.(W,.2) +W = max.(W,-.2) +W = convert(Array{TYPE},W) solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test) @time solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test) From 902014fe45eb4e4a14fa199103ede6a52a150c43 Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Tue, 13 Feb 2018 17:32:25 -0800 Subject: [PATCH 03/22] Eran's corrected code added --- regemiii.zip | Bin 0 -> 11386 bytes regemiii/EResNN_CIFAR10.jl | 102 ++++++++++ regemiii/Meganet.jl | 49 +++++ regemiii/abstractConvKernel.jl | 50 +++++ regemiii/convFFTKernel.jl | 126 +++++++++++++ regemiii/convFFTKernelTest.jl | 37 ++++ regemiii/convGEMMKernel.jl | 227 +++++++++++++++++++++++ regemiii/convGEMMKernelTest.jl | 40 ++++ regemiii/doubleSymLayer.jl | 215 +++++++++++++++++++++ regemiii/doubleSymLayerTest.jl | 61 ++++++ regemiii/reluActivation.jl | 44 +++++ regemiii/singleLayer.jl | 158 ++++++++++++++++ src/optimization/.dnnBatchObjFctn.jl.swp | Bin 0 -> 1024 bytes src/optimization/dnnBatchObjFctn.jl.save | 55 ++++++ 14 files changed, 1164 insertions(+) create mode 100644 regemiii.zip create mode 100644 regemiii/EResNN_CIFAR10.jl create mode 100644 regemiii/Meganet.jl create mode 100644 regemiii/abstractConvKernel.jl create mode 100644 regemiii/convFFTKernel.jl create mode 100644 regemiii/convFFTKernelTest.jl create mode 100644 regemiii/convGEMMKernel.jl create mode 100644 regemiii/convGEMMKernelTest.jl create mode 100644 regemiii/doubleSymLayer.jl create mode 100644 regemiii/doubleSymLayerTest.jl create mode 100644 regemiii/reluActivation.jl create mode 100644 regemiii/singleLayer.jl create mode 100644 src/optimization/.dnnBatchObjFctn.jl.swp create mode 100644 src/optimization/dnnBatchObjFctn.jl.save diff --git a/regemiii.zip b/regemiii.zip new file mode 100644 index 0000000000000000000000000000000000000000..bfcf3cb939bff8197d5b958700eed75d5f63033c GIT binary patch literal 11386 zcmaL7WmKG7@;!{ZySuvucXxMpcN(|g5?q73JHZLscyManc zSOTV|E|(rwNT-Gi&8@Yj=YS!tWR#S+lFlX zDE{Nf@fcqoS~L5$gF@xUuY$5~O#4Mmp4U)|LaF4(NuV`CNggUpE!xo6M!TwsUHQ0v zg!;N-HKdbQC~Ko(Xlmpn!!&>m?!dmvNMpfT^@B^>gjOzir7SV{jlqEZx^qN1s(J_) zhlWS-a#dWRv%;Y^Y$-BSc@&z?VFMq#XC-7mIxRhVb>l|QzqfSiytcD^tAqJ z0{qnz=uUuKIN;}|Uo(DvdjqKcDQ=c)=* z@~c?59|mjAW49u@m49jgP#$aaFA3O)=>YEyCk=E|2(qx0 z>V{Y0HI^M2d=FvCaIL7MUup(Z`d3BJXg4NryP-{gZPJ4plv{UdYhXY0En9jLIm<^n zcg}vc7C^-e9y+Y4cU9LjeuyzPmN7y&_L*>8mAa`sItF*TVjnyJB zvVF34g3O&qKD^B^pKNOw8?n!rpdJcyn_0 z#p`eiT2nhp+ebzh2(<=zU?L~@ImP-`M$_w_EhUyO<(}K4FBr#4{^TM9&mw`$HT6>v zviEp>JrV!bbDq+@6IhGAR+`$%6***fhFrL%|K&Gf(wZN`MO7%Y0bbwv8Ac;S?cN}f z164RZ?nw4y{Wu+?r3}Jtk{kOcRcz5_z8htGEcw+P4zIk0S1NBQ)&4CoG)nKgAU((= zoMAnDg&Bg0;GMhTPo#LwU7k@J2VpAHj5wKq$qJvLLn6xeMBhomH3WjSfoX~Iym)x7 z`^~3(RO}`moDh$v8J4IvimaPgp*gi;@VxpOEVr^T`gTa71bGr;rU@Y4uhvnji;XIc zyB8^Zu33t*74fo0C}@>A2Qe0e6gOje<7NB5a3cN#v1)ZO*lW z%md$YwXM`k*v>2Dq3L8NE-F^A=O2GbiRMLT_8XZGK>(f5@u}6akILo`AcLz)IA6!X zZ(ow|ckZ+#A79&ZHVXip1%k%YlXqjoT#nHX%*t}mFnI6iOdyaTAeA2ksr+Br6ZK#0 zDJ7wx@NfF;(z$e6<3xWu1N%+v2w%TUYbWk;6_Bup{cF|n=(Kg?Gpx2f5C`K+oQ+-* z{Y*s{)BDSXv}~a{kD?yHk#3aY68iGOTT%Ye5uF;%(gpDTBg^wLoImO@6S3cy^XiHW zT}YP8WlSJf)?@gfYPSDyDpv@;b+*UAP1>R{N8^+K4bk>$SRdo$GdpFD5`Q4nW7KB2 zC&1H1BkA!)$%%ZFfIR~V)Rr3mSakegVg|R)acJw}OircN!(@#Dpf;weE@lbQYLO_qDt9 z#)BPg0yG5o?$XE-0=86dFjB)YrvzUIlesl^l!JuR@4DCrzsh&0QKm~g&EL>>5d@+! zwaNgLRcRikvj?;MlZ$ob1x(K}kRrRi$J?wfw2IZ+9ZN7yztduqxo?CtTDbO=Ty~U_ z5G|K`Ira@hDslVW)NVYiU8PZ%8ct1aBO?W5I*K8=?#O+2hVeTF!idAccOwal+j1y^ z<@Qb-Exd4QoTKcC);7(No*VY{$4r02+v97Ri4b1rB5$g9jMKGG9tesCu1PzTEZE{2 zu{iEn(zoY`&a$K1NAVA5HUr8Bc z(%otwkrNS7ilxdn#*M4P(>Cw2fB`BK>)zN9hiCUnQ@G}Z4@rv(DgwSV+AGf%; z)d$rmk};sAl1*0)11tN$o95_B!vzL8cWQ>td;53BS<%byJCD$mt6x)L;-L(F z8`iYNPxB3#f#@=!Zw%_UUA(tud1l{~rOysQWom^3=Bw9L^?iN7CBgr41ToIRRyjQ# z+b@}L!U)BJWjg&iEW@ak4LlYVS998>!(-&NjVw&_B;%Y_TYu)U7r)9yEYY2X##p9G zOA2Ma2Ai^h+U&?ELL3wIhL@LeQEUB0 zb=~Ye2l!aXGY#Wp5Tjh3(`11^FI+;o1lWHL*Dp+Gb{v%^PPm3Pe}t@JR&yt5fsjsiU9M;#cIZ?-V0H_))Rqaku`Q$0zb>LjbYC1Eh1`>}7V>e2 zs+sS%v!%$nMEr=oNHe}N*3QRb?E^!ku|io&(Sk97hdo(*58h`_^Nc))T&BH`!vb#{S9G&Dhg_RdcNzX9 zcnOMUGChDTjJR90Ky+2!0R+4*9PbCA|I(@xRhG>Z&2TI7YzCGj7^?Uon(Cw&97}FG zmt+`&x>vC%@)w@%@xZ%FjVm|Jm!Ksw)Ng~Ca^*(OmILr~DUZB|Z4)Ja$4$%jmR$n( zL|k|HWE@BXXQ7k~Wva@8^1g%P1dZbIE>7u|g)Wwzhb+U7%~>Rvtj|JOHih*7o0i2X zItz?p%fw@w9}ASqcH+`MqL&x&XMDX2s@|q6oAK$YPRU>U{&Zk$RvhUBlIvmKGCtj6!cDGsLQSlEMeKUtA`oO>C~$FtTCj1 zamm;ATIm*t%X_I9Ky)IX*q4s(qb1JM-dRv>^qa@*T-Sk>3}olYW``S>PISwmEnq7d z<4mr=sIh>pYB8rtMoFU6Jge?tJUbFM#Mk8?DNS&++0C;07A@=WW;8)bpj4_B=M0qH z7coz#JCj4dheP?4l~wQA4OznMe#E;KIUA44m6D<7rWXWph~Y-x&$G#rD|x028g&j? zSUeQPY~@j|+UYRP)4ub}?E(=QdrSxk=V%u`6Ug}xyEHZ3O zi|U|b`+hh#4%=tSVUZT}Rf*S!<{T8XM2j_oB(I-2R{VQ1SN`dC@F$>S9=!93RrOjN2S!;f`LziEWxaQB?DsEBk38ERYQ)=Ke9bS1>synlNyC= zkij?=_G-XcuDU-795mLGX(^ZF3to39R~c7zh@EMEd`|7$5!u z&0SYGHzt3$h4p18zqT%U$o*N!yF2NpuT$oUwUExF;vucgr_Zwycq@ zlciQ=(^0?P(BRs0&z0S>?9yY*MswtdYDkUVmbiOnQN%kP&J8LFyT0b1nCDl*b zD435n({mufo?lb|R~uP+bJ zDN#(i_Dq76;E&LdP+2TktU;{p=n4e=^AL>>2giVcfq-OvXw3huFU_6YP3SY-iN-#2r&Ho3SH}8ZvIWu{!qpSs?83*$#4#Fj%B2Rm4*r(zTR4_bgm=l z{RPR1tRic!7RQ#NMLgHdi)rggIx1kZ8a~dj#*>Tw`B3nkabpf_OXVUyJ2yL*-}`3& z7NTo{i#}6rNG-PpS9h!MTyTn(xe<~~6}l!AYHTUAHk3#fth<CC zSLF0Q6ohfR(b8FfEWD%^@--+`-N3#P0>4*VN0OMm0nPz%XZxhP%#*h0wKjX-`{m}8QO*ND31+>JAZZOq~&H-npZv&(V((5%qAq6ivSErf2f=XXe zYX!S`S{73-QW$JQgGwUtqR*+~mQAje#%wJ{8KN1K+&+~x6AO3C&OzEe>ppfm{0Yl_ zERyEsI4TNnVH;y$^s=sqX48o#*A!D5qxt8v=!Xmq5@zn1HtkB)Wc3wOdovjhv5A}# zWdL(2OUw@ZaSJH3(Cd@SZ9FLcjyO|B$w=c92`O3Y@r1~TWU*={++I$=_(%eR(XVX% zO*boyRlq>Gn_e4oSg|Ojz26iQSC_yrQvEbL9e2x76ZbvK(o?QGY;&wdMg2SuJk0>s zW~sSZq$_ePA;oMuXS+G2he(nE3w!p=7*BFWZ|G3gv5$MChNg2*Ze>=|U%fI!S_nsS z?F?Q6iv$C~;6evw%iqpQeHGVpImn)Xodd~1{4lq|1cEl(=&OwZEDR|8<=~RGU(S+R zjw5H)Z!@=;#P?`W*X?+>?V)-fLHJIBQAS5LaoBfk*WY1PnMgdnKoVgkZp zlQy7v9ktm9pzix;lra^esILNHL7mc%!{KxYOjvfz;mB|*g(Xj{ssqHK?1a_SR#T6C zLW;eOKO^JE;!v8@vnnK@fvy;u4za8pJQ0b-ECG4sS1FJI&@{+b95Wthj%U{7D~!)P zYqNg92itenC)C1_ZkldUBc+K-86AV1&C?nE`x8rIic$$H!@C4SwMSsU_J@)v$3Nr{OkL_d#jC^COGaMkciT+H zyIKT~78k6jmzm1s9(a^k>(#dy`>eQcs7r*Rp~{L|qc)4oyGN?<3OrTBZX&dzQYvRq z$9*OhDqN`}=oY^OXd>>w#Tb_;1(#4_mIc&`=xpwA2;_D zcaw3)$a80zTTW>-Y-kxwnE2k_YD2TUVVE;?^F`(1y5oa)!=<LBVeTTX)e0b(R@58x!1Bh4ABc&{h zjt06CGBSE|lE!TZ?yNUmr#B#-R|a&8eM6XD?CtjlgRDA#oL@u44eiQICcIVa>!%oz z6!7!H4{&6SE`K8`r{gPOA<_8?o-(ZQHKaN~fn*bYe)<9jHxtueTv(8*7Is*wWX5T_ zoFPk_&;>80N?!;&QKq&{_@@uV(zELsLSELeQOM60H@(>?vqRE5DY3?1e$PPTcJzYD z;)#wg9VAF~hYFwPPTrTbT#!Z;Uvo9rLQ7?8)K{*v6dep`gar|lF`;wnCT;WRY?XKQ z(npS=CL7Z6Hp)d%B`5phmP*tex}aZAbm}XUIC^ISO#lRmO2ptY@6F)X7g>p!S57AD zjWPqUuBP6y5Yqgw4sVgxuAzeXJYB;|}zCh>0kqP}`2i{Bzs z(ospWYZ6S|mxN31Yn(3r`jE?R7SFuAt*>4oEn`1vF?qnZH6Hq|Ezv+BLy{}{7VF?G zpV#(EA1ENeBcM}D=?lXnWBxriJ;jJ$_B-p-AZZbFLSy@@09qDnJS4KwcS0B^mB&^X zvTIl0s%|uu@A9#podz@a$ZrD>>3L$^`=Y^*x1q(v2~hY{eTXguHx`UPPoAo2yySZ( zYK8{K?k}qnu=Y%wgJwJxU7JM6JqdYLw+1#I+irW)WOo^$O910_e znBtE^qb%Kpx4Lm;tZ7hSwvTopKsh`hl%>KV|7Dw(`>GBSoJv1yZvjV7|K4Ip#ypT) z-7owG3@HnfXK05(8>S_-lR5~N`^l~2Vt?b`XQpV#4>57>^VGYCLFPI`EmnAY`4V?^ zCGvC@d!C`00S`+@iml`dTG*YVgA_wq4yX|nzaDR9rDCc7Lev3?4ZgQ~^BI2R{d2tB z-d^QN(bq60>!S3kdAX~|!n_Dc5G4EbEABYcQ?-U|MO*pTNNNyv8Do;4UzEyYb3~n3 z(@(XH9ca^F0}XcTs{9EW7wFboY{_yU3a@@jE%9siu3YVq!=ne3|M-E4A+r`Y;9+G! zi1G>z^J;8Xk^v-Z3BS=Q=x)?I_}7VBaE%b8PJHMtZmO;~-76@!>b_e&gB)X^50(L(p^F~z^*jT+N=4$vo6&XUmsGqntCpX91E5Hony&wf}&;CR{ zy{VE~yX|V(aIET^P@{~-pKig2f5i`EWhRhf{=QWr#jqdH~Kqv?v%U zM3<#Jv8WcVBQR2W;F3qgv)59XsoRDVV`!-s3mt9VHU9AX*CYnBkWMfzULOb;ubzmy1%D`~dkU5JHkMO_6L7=<@8KADhVma8Fa&k!xUiM8 zUOY{#j0VgQo@yYFqMGO$t@%1JReLSGxjVDYjxrcx=9j+V8{AM_^wxlVmxt`p86638PMbk%{?MV3$l&O z^XsDalHkT}WH(ftb*Es;F9D`yKl7cF0cpM`Mn|+eml>0o%FcTF0TSFYW+Xd|6xKXC z;s5g!r%@I~W*=T69P5A4Gu#I~D_B^WI9dQcj6`fopJUM1`M4&$Yva81>E-e_7lY#WrDbV!nNP?@ZQsKm<)ZaNDMB^7=SHDJ&qe5YXHVW zqY#pHaRcg_p&gN^cgG;O7P5jSHg(&b^#Rm872DV|igmSFMYBWpdU(QqZy-3yESfqN zvT?oZG%JrfTdEw26v%|J?RWn>(0nQh%l%Va@q8KO3i4618VbB+L?-Z{v2H1r4j41W zTqj}&5KAT0mCQMl1CSCp9nLuaF57TA-okb^3IQT{g1S*Ivu4*7#1)#0?}qyYMDHaY zk?VXy%Rx6Uv#z7WyFaQ!Ytg$L<$_#&aho~2qvs2@w|8O8&9p7Lb_m3@4Hq^`KVlp5 z?zg85j}J3?_{3nD3MH3UGtnRF(Qcwv$BJ*8p<4bZRXUDLLTBW#NlppQmsigu7h#*4 z;9(M8lV_UHV?A12{45Kt_)1|!IK}R#=A_=h%jF1N`ILmxA##fBl3KJn@w+dRw)z@u z0Wj10jDARRwS{X%=O1NxYV?3@-c4Lu+nz9Yybu4U@Tzbc{U`(l0eOb_U&#KMM7Ubm zyNj3sY&=W=HcpNo0GA}PDgjwg!mQ{Hga~rbXvN1R31f_nO-E;EY$y9vZKbTJL&@Iw zTGUD$QP9vvlhy@#e7A7RHWR@69k-#&BzIC~R;bqOTi5K-Pf60CoREubo{a7z(LEow{J&fBKYg7)1M~lkeAF%6{=7{&Dr>SpVHAJP=|CLQzT)b13JC~C z?ZN0FRpRGaO=3;Nrzr!=L5BRE5=FPg7S}E|rE)5t-6}wibK6 zKbj$M8xgCC%5g;{I5TU1pHZS!Ll+Tl=}8sfsVfI>57hrfUpA&Euv{JC&Z_`-!41O$ zWAGj#knbw+i)OiGdaJbonHt4-zTaC#!TPB@iFvXE_M|!`B^9Q0sk5p%^=eewPP2cc zwf85TWDmZr&YsX2zaip;he!T^l+)Q|)A)PG>UQc>5W zd+5ET@Yms&E3V6Pu#;a3U30sTn#!<>ir$g6^ujQe zTp0WA)i$h6r*CkH7N&}9Sx#BPTctU!`r8J95GN1za=$`%6b!`8Au7%K3un*wQoQ~h zpn}zUR&kQzS|PWdpxJoX=VR7c_sSbXW&~yWY;G!%ud`(E`g21p%u!Z95+P;$TnZ~w1;U8+XN8!Pj}T>U1XN(ds66CFLI4w*oW$eV!Wi;Pr3YB^suE*y zb7C_7gHOSF%Vb}Syh3X zt6QNb3ugq;TqqIq3S+Oww$6A)>XgRwEVgPXV{oiTa}=}g1I%s>E1JvhIv8CxVg;n5 zH|$boBr^JFUrDW3rb7?j!~Vha#(r@!=>yX*{}a9m1+;7Xkv= zMsjSZX5v2bz0Js?i*`BzS1_nz%4tCl+>;~m~!||vWU+Gi5cJUBrmqfL(_4rC9qv=9&Gu830ZdGB{Wl~c?Wbk$j z`5HuXgn#6H8cj{JO`sHK&15RU#cp#~fvVUqflwZGfp;_3~#WTRR!NRmnI_L>3onvNGUxh<$w#hP8mUOeF!_N&* zYnh#=vS#p`G&QGFa~ebg4OC)K9>pz% zii$>JGLj;ytSld#+pOu}w9Ae1wl4b{8S8hw%xO49lK^bo!iuirR?mQ;jPMDTo6poz>>niU}QE7T=73ACC_fH6vZ z<*hIs*KBN(xKa7ew|YP6;{FFp9IW0Dx-RFCOf1Gx+>9Zz+{x0#StfHu?=dX;nKAA? z?*UauMw`JUEmlZtjT1;~Ed-(Y@sTZ<-_7-nsG7o&C`DfUKiVLxhUfT`wv083aW^_l z>0*fXE(v1j1g*cU9pemvdx45L18|0dw<+YqAf&)A8==0XA3FDaSJpkrL->yODuR|M zI#qE@-#h4IMC2&EEKi~4LyAR52w<}drORc=V`QP@U=PhRk0%JO36q@ycqUGs#8wcBh1M6ei!^`z*bjWF-~BSVvRUoYP0B zZXE^&R1XDbhKXsm#<$4H;>%E2gE39REzTgY)$6phx6Xd3KU#L{Kc&lZfoP9*K~`X+ zW$4}yizs(gqxEuxz^6G`vA)Y5aZWNjVNrG#+R>A1<)>SnCjsU(x~~d@Ft<$plD0Hg zeW--RHpA2r6{#>0AV_AKy1YOjZBJ}|HL3@@Py};BhKun;VfT*032BVT8m(*RX-i^f zSia;>4nsm!n4GVv*X>p?sDaSxggMcjG~O9LXh~-5$*=F0GbTJ{-*mr>Y*+%|e&hT~ zYfdi8oMmhnUy0%A!(v~u?t?47LJ^1NMDEqw~8Um zw{c&^iC(Vk^x5C3Qt?6Ky02S3r(1P|c}T{P;l#PDydfOfu%IQufK@z=`D&Mj;0s2Nnj_U?=1X6Sn3p14wWN)aUx`oXqs?8y&Xvr*PH7mygokhrjPn*{Dif_&6P4J_1bXPe+cf!9#NnJ z!p!b<049X}BjDk9=GPKAYw-y`6E#pAVnj=d5NC%NPiO>c*nwQ;jC)&K$RmX5Y`Il9 zraiEip!?X~)82re@@VGq*7l^;L&{FJh?CwD<pP_q;x@Rc5{uc)bM56S46`qhEx6#sBdpbIV}?YoAR0dcU4CG+}BY z;dNouqr$dduK>3h`%I-f)uXb&6Rvt#DqWY}aII5gPkb2$f z$~R$JP`&1BO%TnTlB%Z4$mv}xBrJsXL#S-O-@O35pxM#>{e$tKVIa6=7Jwh}o*dy1 z1A>BKfc)pO=3g%&{kg39_xR`4&3~@`>#Xt*gZ?K3Kg{Cab?gV9{&V?Xr&NCm>VHB9 z@xP2W|GC9q`-Xo%K`!z?EA;>k%m1o5|DH5f;(zA+Uz+5fTl`hx{1K=B2}sg^+}HgtJN&h% u{{0XuWdBV2f7#-%Pvzg!P(ks3)M8~h$dC3g2nhVgr~Si1Rnh+W_5T2#Luu^* literal 0 HcmV?d00001 diff --git a/regemiii/EResNN_CIFAR10.jl b/regemiii/EResNN_CIFAR10.jl new file mode 100644 index 0000000..e9b60ba --- /dev/null +++ b/regemiii/EResNN_CIFAR10.jl @@ -0,0 +1,102 @@ +using MAT, Meganet + +n = 256; +# Y_train,C_train,Y_test,C_test = getCIFAR10(n,Pkg.dir("Meganet")*"/data/CIFAR10/"); +Y_train,C_train,Y_test,C_test = getCIFAR10(n,"../data/CIFAR10/"); +# using PyPlot +# y = Y_train[:,50]; y = y - minimum(y); y = y./maximum(y); +# y = reshape(y,32,32,3); +# y[:,:,1] = y[:,:,1]';y[:,:,2] = y[:,:,2]';y[:,:,3] = y[:,:,3]'; +# figure(); imshow(y) + +miniBatchSize = 32; +nImg = [32; 32] +cin = 3 +nc = [16;32;64;64] +nt = 2*[1;1;1] +h = [1.;1.;1.] + +TYPE = Float32; +act = reluActivation; +getConvKernel = (nImg,sK) -> getConvGEMMKernel(TYPE,nImg,sK); +# getConvKernel = (nImg,sK) -> getConvFFTKernel(TYPE,nImg,sK); +#getConvKernel = (nImg,sK) -> getSparseConvKernel2D(TYPE,nImg,sK); + +# opening layer +K1 = getConvKernel(nImg,[3,3,cin,nc[1]]); + +nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true); +Bin = kron(eye(TYPE,nc[1]),ones(TYPE,prod(nImg),1)); +blocks = [getSingleLayer(TYPE,K1,nL,Bin = Bin,activation = act)] + +for k=1:length(nt) + # ResNN layers + K2 = getConvKernel(nImg,[3,3,nc[k],nc[k]]) + nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k]],isTrainable=true) + Bin = kron(eye(TYPE,nc[k]),ones(TYPE,prod(nImg),1)); + Bout = zeros(TYPE, nFeatIn(K2),0); + + L2 = getDoubleSymLayer(TYPE,K2,nL,Bin=Bin,activation=act) + RN = getResNN(TYPE,L2,nt[k],h[k]) + + if k (24, 500))) +# end diff --git a/regemiii/Meganet.jl b/regemiii/Meganet.jl new file mode 100644 index 0000000..a38a01d --- /dev/null +++ b/regemiii/Meganet.jl @@ -0,0 +1,49 @@ +module Meganet + +using LinearOperators, MAT, Base.Test + +import JLD, BenchmarkTools + +include("AbstractMeganetElement.jl") + +include("activations/tanhActivation.jl") +include("activations/reluActivation.jl") +include("activations/identityActivation.jl") + + +include("integrators/NN.jl") +include("integrators/connector.jl") +include("integrators/ResNN.jl") +include("kernelTypes/abstractConvKernel.jl"); +include("kernelTypes/denseKernel.jl") +include("kernelTypes/sparseKernel.jl") +include("kernelTypes/convFFTKernel.jl"); +include("kernelTypes/convGEMMKernel.jl"); +include("kernelTypes/convCircKernel.jl"); +include("kernelTypes/convDiagKernel.jl"); + + + +include("layers/affineScalingLayer.jl") +include("layers/normLayer.jl") +include("layers/doubleSymLayer.jl") +include("layers/singleLayer.jl") + +include("loss/softMaxLoss.jl") +include("regularization/TikhonovReg.jl") + +include("optimization/dnnBatchObjFctn.jl") +include("optimization/sgd.jl") + +include("utils/getConvMatPeriodic.jl") +include("utils/testAbstractMeganetElement.jl") +include("utils/testLossFunction.jl") +include("utils/utilities.jl"); +include("utils/checkDerivative.jl"); +include("utils/normalizeData.jl"); + +include("utils/getCIFAR10.jl"); +include("utils/Benchmark.jl"); + + +end diff --git a/regemiii/abstractConvKernel.jl b/regemiii/abstractConvKernel.jl new file mode 100644 index 0000000..6e49529 --- /dev/null +++ b/regemiii/abstractConvKernel.jl @@ -0,0 +1,50 @@ +export nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta, AbstractConvKernel + +abstract type AbstractConvKernel{T} <: AbstractMeganetElement{T} end + +## All convKernel types are assumed to have fields nImage (size of the image) and sK (size of the Convolution Kernel) + +function nImgIn(this::AbstractConvKernel) + return [this.nImg[1]; this.nImg[2]; this.sK[3]] +end + +function nImgOut(this::AbstractConvKernel) + return [this.nImg[1]; this.nImg[2]; this.sK[4]] +end + +function nFeatIn(this::AbstractConvKernel) + return prod(nImgIn(this)); +end +function nFeatOut(this::AbstractConvKernel) + return prod(nImgOut(this)); +end + +function nTheta(this::AbstractConvKernel) + return prod(this.sK); +end + + +function getOp(this::AbstractConvKernel{T},theta::Array{T}) where {T <: Number} + + m = prod(nImgOut(this)) + n = prod(nImgIn(this)) + + A = LinearOperator{T}(m,n,false,false, + v -> Amv(this,theta,v), + Nullable{Function}(), + w -> ATmv(this,theta,w)) + return A +end + +function initTheta(this::AbstractConvKernel{T}) where {T <: Number} + + sd = T(0.01); + theta = sd*randn(T,prod(this.sK)); + #id1 = find(theta>2*sd); + #theta(id1[:]) = randn(numel(id1),1); + + #id2 = find(theta< -2*sd); + #theta(id2(:)) = randn(numel(id2),1); + #theta = max(min(2*sd, theta),-2*sd); + return theta +end diff --git a/regemiii/convFFTKernel.jl b/regemiii/convFFTKernel.jl new file mode 100644 index 0000000..97f02d0 --- /dev/null +++ b/regemiii/convFFTKernel.jl @@ -0,0 +1,126 @@ +export convFFTKernel, getEigs,getConvFFTKernel +## For the functions nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta : see AbstractConvKernel.jl +## All convKernel types are assumed to have fields nImage and sK +mutable struct convFFTKernel{T} <: AbstractConvKernel{T} + nImg :: Array{Int,1} + sK :: Array{Int,1} + S :: Array{Complex{T},2} +end + +function getConvFFTKernel(TYPE::Type,nImg,sK) + S = getEigs(Complex{TYPE},nImg,sK) + return convFFTKernel{TYPE}(nImg,sK,S) +end + +function getEigs(TYPE,nImg,sK) + S = zeros(TYPE,prod(nImg),prod(sK[1:2])); + for k=1:prod(sK[1:2]) + Kk = zeros(sK[1],sK[2]); + Kk[k] = 1; + Ak = getConvMatPeriodic(TYPE,Kk,[nImg[1],nImg[2], 1]); + Akk = full(convert(Array{TYPE},Ak[:,1])); + S[:,k] = vec(fft2(reshape(Akk,nImg[1],nImg[2]) )); + end + return S +end + +export Amv +function Amv(this::convFFTKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} + + nex = div(numel(Y),prod(nImgIn(this))) + + # compute convolution + AY = zeros(Complex{T},tuple([nImgOut(this); nex]...)); + theta = reshape(theta, tuple([prod(this.sK[1:2]); this.sK[3:4]]...)); + Yh = ifft2(reshape(Y,tuple([nImgIn(this); nex]...))); + #### allocate stuff for the loop + Sk = zeros(Complex{T},tuple(nImgOut(this)...)) + #T = zeros(Complex{eltype(Y)},tuple(nImgOut(this)...)) + nn = nImgOut(this); nn[3] = 1; + sumT = zeros(Complex{T},tuple([nn;nex]...)) + #### + + for k=1:this.sK[4] + Sk = reshape(this.S*theta[:,:,k],tuple(nImgIn(this)...)); + #T = Sk .* Yh; + #sumT = sum(T,3) + sumT = hadamardSum(sumT,Yh,Sk) + AY[:,:,k,:] = sumT[:,:,1,:]; + end + AY = real(fft2(AY)); + Y = reshape(AY,:,nex); + return Y +end + +function ATmv(this::convFFTKernel{T},theta::Array{T},Z::Array{T}) where {T<:Number} + + nex = div(numel(Z),prod(nImgOut(this))); + ATY = zeros(Complex{T},tuple([nImgIn(this); nex]...)); + theta = reshape(theta, prod(this.sK[1:2]),this.sK[3],this.sK[4]); + #### allocate stuff for the loop + Sk = zeros(Complex{T},tuple(nImgOut(this)...)) + #T = zeros(Complex{eltype(Z)},tuple(nImgOut(this)...)) + nn = nImgOut(this); nn[3] = 1; + sumT = zeros(Complex{T},tuple([nn;nex]...)) + #### + + Yh = fft2(reshape(Z,tuple([nImgOut(this); nex]...))); + for k=1:this.sK[3] + tk = theta[:,k,:] + #if size(this.S,2) == 1 + # tk = reshape(tk,1,:); + #end + Sk = reshape(this.S*tk,tuple(nImgOut(this)...)); + #T = Sk.*Yh; + #sumT = sum(T,3) + sumT = hadamardSum(sumT,Yh,Sk) + ATY[:,:,k,:] = sumT[:,:,1,:]; + end + ATY = real(ifft2(ATY)); + ATY = reshape(ATY,:,nex); + return ATY +end + +function Jthetamv(this::convFFTKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} + + nex = div(numel(Y),nFeatIn(this)); + Y = reshape(Y,:,nex); + Z = Amv(this,dtheta,Y); + return Z +end + +function JthetaTmv(this::convFFTKernel{T},Z::Array{T},dummy::Array{T},Y::Array{T}) where {T<:Number} + # derivative of Z*(A(theta)*Y) w.r.t. theta + + nex = div(numel(Y),nFeatIn(this)); + + dth1 = zeros(this.sK[1]*this.sK[2],this.sK[3],this.sK[4]); + Y = permutedims(reshape(Y,tuple([nImgIn(this); nex ]...)),[1 2 4 3]); + Yh = reshape(fft2(Y),prod(this.nImg[1:2]),nex*this.sK[3]); + Zh = permutedims(ifft2(reshape(Z,tuple([nImgOut(this); nex]...))),[1 2 4 3]); + Zh = reshape(Zh,:, this.sK[4]); + + for k=1:prod(this.sK[1:2]) + temp = conj(this.S[:,k]) .* Yh + temp = reshape(temp,:,this.sK[3]) + dth1[k,:,:] = real(conj(temp)'*Zh); + end + + dtheta = reshape(dth1,tuple(this.sK...)); + return dtheta +end + +function hadamardSum(sumT::Array{T},Yh::Array{T},Sk::Array{T}) where {T<:Number} + sumT .= 0.0; + for i4 = 1:size(Yh,4) + for i3 = 1:size(Yh,3) + for i2 = 1:size(Yh,2) + for i1 = 1:size(Yh,1) + @inbounds tt = Sk[i1,i2,i3] + @inbounds sumT[i1,i2,1,i4] += tt * Yh[i1,i2,i3,i4] + end + end + end + end + return sumT +end diff --git a/regemiii/convFFTKernelTest.jl b/regemiii/convFFTKernelTest.jl new file mode 100644 index 0000000..f6a8eb0 --- /dev/null +++ b/regemiii/convFFTKernelTest.jl @@ -0,0 +1,37 @@ +using Base.Test +using Meganet +using LinearOperators + + +nImg = [8,10] +sK = [3,3,4,4] +for TYPE=[Float64,Float32] + K = getConvFFTKernel(TYPE,nImg,sK) + + @testset "adjoint test $TYPE" begin + nex = 2; + theta = initTheta(K) + A = getOp(K,theta); + v = randn(TYPE,nFeatIn(K),nex) + w = randn(TYPE,nFeatOut(K),nex) + + t1 = vecdot(w,A*v) + t2 = vecdot(v,A'*w) + + # println("adjointTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) + end + + @testset "derivative Test" begin + th = initTheta(K); + dth = initTheta(K); + nex = 2; + Y = randn(TYPE,nFeatIn(K),nex)+nex; + Z = randn(TYPE,nFeatOut(K),nex)-nex; + + t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); + t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); + # println("derivativeTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) + end +end diff --git a/regemiii/convGEMMKernel.jl b/regemiii/convGEMMKernel.jl new file mode 100644 index 0000000..24bcda7 --- /dev/null +++ b/regemiii/convGEMMKernel.jl @@ -0,0 +1,227 @@ +export convGEMMKernel,Amv,ATmv,transposeTest,getConvGEMMKernel + +mutable struct convGEMMKernel{T} <: AbstractConvKernel{T} + nImg :: Array{Int,1} + sK :: Array{Int,1} + shiftX :: Array{Int,1} + shiftT :: Array{Int,1} + aux_sk3 :: Array{T, 3} + aux_sk4 :: Array{T, 3} +end +function getConvGEMMKernel(TYPE::Type,nImg,sK) + + if sK[1] == 1 && sK[2] == 1 + shiftX = [0;0]; + shiftT = [0;0]; + elseif sK[1] == 3 && sK[2] == 3 + shiftX = [0;-1;0;0;1;0]; + shiftT = [1;0;0;0;0;-1]; + else + error("Code only supports 1X1 and 3X3 convolutions"); + end + + aux_sk3 = zeros(TYPE,nImg[1],nImg[2],sK[3]); + aux_sk4 = zeros(TYPE,nImg[1],nImg[2],sK[4]); + + return convGEMMKernel{TYPE}(copy(nImg),copy(sK),shiftX,shiftT,aux_sk3,aux_sk4); +end + +function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} + ## We assume that the data Y is held in the order XYCN. + sK = this.sK; + nImg = this.nImg; + nex = div(numel(Y),prod(nImgIn(this))) + # compute convolution + Y = reshape(Y,nImg[1],nImg[2],this.sK[3],nex); + AY = Array{T, 3}(nImg[1]*nImg[2],this.sK[4],nex); + aux = this.aux_sk3; + AYk = reshape(this.aux_sk4,nImg[1]*nImg[2],sK[4]); + ### reshape the kernels for gemm!: + K = reshape(theta, sK[1], sK[2], sK[3], sK[4]) + KK = Array{Array{T,2}}(sK[1],sK[2]); + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds KK[k1,k2] = K[k1,k2,:,:]'; + end + end + for k = 1:nex + AYk[:] = zero(T) + AYk = multConv2Dblock(Y,KK, AYk,aux,this.shiftX,this.shiftT,k); + @inbounds AY[:,:,k] = AYk; + + end + AY_out = reshape(AY,:,nex); + return AY_out +end + +function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:Number} + nImg = this.nImg; + sK = this.sK; + nex = div(numel(Zin),prod(nImgOut(this))); + K = reshape(theta, sK[1], sK[2], sK[3], sK[4]); + Z = reshape(Zin,nImg[1],nImg[2],sK[4],nex); + aux = this.aux_sk4; + ATZ = zeros(T,nImg[1]*nImg[2],sK[3],nex); + ATZk = reshape(this.aux_sk3,nImg[1]*nImg[2],sK[3]); + + ### reshape the kernels for gemm!: + KK = Array{Array{T,2}}(sK[1],sK[2]); + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds KK[k1,k2] = K[k1,k2,:,:]; + end + end + ## flipping: + KK = flipdim(flipdim(KK,2),1); + for k = 1:nex + ATZk[:] = zero(T) + ATZk = multConv2Dblock(Z,KK, ATZk,aux,this.shiftX,this.shiftT,k); + @inbounds ATZ[:,:,k] = ATZk; + end + ATZ_out = reshape(ATZ,:,nex); + return ATZ_out +end + +function Jthetamv(this::convGEMMKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} + nex = div(numel(Y),nFeatIn(this)); + Z = Amv(this,dtheta,Y); + return Z +end + +function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin::Array{T}) where {T<:Number} + # derivative of Z*(A(theta)*Y) w.r.t. theta + sK = this.sK + nImg = this.nImg + nex = div(numel(Yin),prod(nImgIn(this))) + # compute convolution + Y = reshape(Yin, nImg[1], nImg[2], this.sK[3], nex) + Z = reshape(Zin, nImg[1]*nImg[2], this.sK[4], nex) + Zk = reshape(this.aux_sk4, nImg[1]*nImg[2], this.sK[4]); + aux = this.aux_sk3; + ### reshape the kernels for gemm!: + dtheta = zeros(T, sK[1], sK[2], sK[3], sK[4]) + KK = Array{Array{T, 2}}(sK[1], sK[2]) + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds KK[k1, k2] = zeros(T, sK[3], sK[4]) + end + end + for k = 1:nex + getColumn!(Z, Zk, k) + multConv2Dblock(Y, KK, Zk, aux, this.shiftX, this.shiftT, k, doDerivative = 1) + end + ### Assemble the kernels from gemm!: + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds dtheta[k1, k2, :, :] = KK[k1, k2] + end + end + dtheta_out = reshape(dtheta, sK[1], sK[2], sK[3], sK[4]) + return dtheta_out +end + + + +function getColumn!(Z::Array{T},Zk::Array{T},k::Int64) where {T<:Number} +for c=1:size(Z,2) + for j=1:size(Z,1) + @inbounds Zk[j,c] = Z[j,c,k]; + end +end +end + +function multConv2Dblock(x::Array{T},K::Array{Array{T,2},2}, y::Array{T}, tin::Array{T},shiftX,shiftT,imIdx;doDerivative = 0) where {T<:Number} + ## y = K*x + ## K - 3X3 array of Arrays + ## x - a vector of length |nImgag+2|*cin (zero padded) + ## y - a vector of length |nImgag|*cout + + nImg1 = size(x,1); + nImg2 = size(x,2); + cin = size(x,3); + cout = size(y,2); + OneType = one(T); + t = reshape(tin,nImg1,nImg2,cin); + kernelWidth = size(K,1); + # y = reshape(y,nImg1*nImg2,cout); # it is supposed to be of this shape... + k=1; + jt=0;it=0;jt=0;jx=0; + + for p = 1:2:2*kernelWidth + for q = 1:2:2*kernelWidth + lower = nImg2+shiftT[p+1] # Move outside of the forloop for increased speed + upper = nImg1+shiftT[q+1] # Move outside of the forloop for increased speed + for cc = 1:cin + jx = 1+shiftX[p]; # Moving these outside didn't seem to help + jt = 1+shiftT[p]; + if jt > 1 + ###################### Dirichlet ####################### + @inbounds t[:,1:(jt-1),cc] = zero(T); + ###################### Periodic ####################### + # ix = 1+shiftX[q]; + # if shiftT[q] > 0 + #@inbounds t[1,1,cc] = x[end,end,cc,imIdx]; + # end + # for it = (1+shiftT[q]):upper + #@inbounds t[it,1,cc] = x[ix,end,cc,imIdx]; + # ix +=1; + # end + # if shiftT[q+1] < 0 + #@inbounds t[end,1,cc] = x[1,end,cc,imIdx]; + # end + ###################### End Periodic ####################### + end + while jt <= lower + it = 1+shiftT[q]; + ix = 1+shiftX[q]; + if it > 1 + for ii = 1:(it-1) + ###################### Dirichlet ####################### + @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[1:(it-1),jt,cc] = 0.0 - faster unvectorized + ###################### Periodic ####################### + #@inbounds t[ii,jt,cc] = x[end,jx,cc,imIdx]; + end + end + while it <= upper + @inbounds t[it,jt,cc] = x[ix,jx,cc,imIdx]; + it+=1;ix+=1; + end + if it <= nImg1 + for ii = it:nImg1 + ###################### Dirichlet ####################### + @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[it:nImg1,jt,cc] = 0.0 - faster unvectorized + ###################### Periodic ####################### + # @inbounds t[ii,jt,cc] = x[1,jx,cc,imIdx]; + end + end + jt+=1;jx+=1; + + end + if jt <= nImg2 + ###################### Dirichlet ####################### + @inbounds t[:,jt:nImg2,cc] = zero(T); + ###################### Periodic ####################### + # if shiftT[q] > 0 + # @inbounds t[1,end,cc] = x[end,1,cc,imIdx]; + # end + # ix = ix = 1+shiftX[q]; + # for it = (1+shiftT[q]):upper + # @inbounds t[it,end,cc] = x[ix,1,cc,imIdx]; + # ix +=1; + # end + # if shiftT[q+1] < 0 + # @inbounds t[end,end,cc] = x[1,1,cc,imIdx]; + # end + ###################### End Periodic ####################### + end + end + if doDerivative == 0 + BLAS.gemm!('N','T',OneType,reshape(t,nImg1*nImg2,cin),K[k],OneType,y); + else + BLAS.gemm!('T','N',OneType,reshape(t,nImg1*nImg2,cin),y,OneType,K[k]); + end + k+=1; + end + end + return y; +end diff --git a/regemiii/convGEMMKernelTest.jl b/regemiii/convGEMMKernelTest.jl new file mode 100644 index 0000000..f70bfd6 --- /dev/null +++ b/regemiii/convGEMMKernelTest.jl @@ -0,0 +1,40 @@ +using Base.Test +using Meganet +using LinearOperators + + +nImg = [8,10] +sK = [3,3,2,4] +for TYPE=[Float64,Float32] + K = getConvGEMMKernel(TYPE,nImg,sK) + + @testset "adjoint test $TYPE" begin + nex = 2; + theta = initTheta(K) + A = getOp(K,theta); + v = randn(TYPE,nFeatIn(K),nex) + w = randn(TYPE,nFeatOut(K),nex) + + t1 = dot(w,A*v) + t2 = dot(v,A'*w) + # println("adjointTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) + end + + @testset "derivative Test" begin + th = initTheta(K); + dth = initTheta(K); + nex = 2; + Y = randn(TYPE,nFeatIn(K),nex); + Z = randn(TYPE,nFeatOut(K),nex); + + t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); + t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); + # println("derivativeTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) + end +end + + + + diff --git a/regemiii/doubleSymLayer.jl b/regemiii/doubleSymLayer.jl new file mode 100644 index 0000000..7fc41fb --- /dev/null +++ b/regemiii/doubleSymLayer.jl @@ -0,0 +1,215 @@ +export DoubleSymLayer,getDoubleSymLayer + +""" + Implementation of symmetric double layer model + + Y(theta,Y0) = K(th1)'(activation( K(th1)\*Y0 + trafo.Bin\*th2))) + trafo.Bout\*th3 +""" +mutable struct DoubleSymLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} + activation :: Function # activation function + K :: TK # Kernel model, e.g., convMod + nLayer :: TN # normalization layer + Bin :: Array{T} # Bias inside the nonlinearity + Bout :: Array{T} # bias outside the nonlinearity +end + + +function getDoubleSymLayer(TYPE::Type,K,nLayer::AbstractMeganetElement{T}; + Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE, nFeatIn(K),0), + activation=tanhActivation) where {T <: Number} + BinT = convert.(T, Bin) + BoutT = convert.(T, Bout) + return DoubleSymLayer(activation,K,nLayer,BinT,BoutT); + +end + +function splitWeights(this::DoubleSymLayer{T},theta::Array{T}) where {T<:Number} + + th1 = theta[1:nTheta(this.K)::Int] + cnt = length(th1) + th2 = theta[cnt+(1:size(this.Bin,2))] + cnt = cnt + length(th2) + th3 = theta[cnt+(1:size(this.Bout,2))] + cnt = cnt + length(th3) + + th4 = theta[cnt+1:end]; + + return th1, th2, th3, th4 +end + +function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T<:Number} + + #QZ = [] + tmp = Array{Any}(2) + nex = div(length(Yin),nFeatIn(this))::Int + Y = reshape(Yin,:,nex) + + theta1,theta2,theta3,theta4 = splitWeights(this,theta) + Kop = getOp(this.K,theta1) + KY = Kop*Y + KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY) + Yt = KY + if !isempty(theta2) + Yt .+= this.Bin*theta2 + end + tmp[2] = copy(Yt) + Z::Array{T,2}, = this.activation(Yt,doDerivative) + Z = -(Kop'*Z) + if !isempty(theta3) + Z .+= this.Bout*theta3 + end + return Z, Z, tmp +end + +function nTheta(this::DoubleSymLayer) + return nTheta(this.K) + size(this.Bin,2)+ size(this.Bout,2) + nTheta(this.nLayer) +end + +function nFeatIn(this::DoubleSymLayer) + return nFeatIn(this.K) +end + +function nFeatOut(this::DoubleSymLayer) + return nFeatIn(this.K) +end + +function nDataOut(this::DoubleSymLayer) + return nFeatIn(this) +end + +function initTheta(this::DoubleSymLayer{T}) where {T<:Number} + theta = [vec(initTheta(this.K)); + T(0.01)*ones(T,size(this.Bin,2),1); + T(0.01)*ones(T,size(this.Bout,2),1); + initTheta(this.nLayer)]; + return theta +end + +function Jthetamv(this::DoubleSymLayer{T},dtheta::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + A,dA = this.activation(tmp[2],true) + th1, th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + + Kop = getOp(this.K,th1) + dKop = getOp(this.K,dth1) + dY = dKop*Y + + dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,copy(tmp[1]))[2] + dY = dY .+ this.Bin*dth2 + + dY = -(Kop'*(dA.*dY) + dKop'*A) .+ this.Bout*dth3 + return dY, dY +end + +function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + dA = this.activation(tmp[2],true)[2] + + nex = div(length(dY),nFeatIn(this)) + dY = reshape(dY,:,nex) + Y = reshape(Y,:,nex) + th1, th2,th3,th4 = splitWeights(this,theta) + + Kop = getOp(this.K,th1) + dY = Kop*dY + dY = JYmv(this.nLayer,dY,th4,Kop*Y,copy(tmp[1]))[2] + dZ = -(Kop'*(dA.*dY)) + return dZ, dZ +end + +function Jmv(this::DoubleSymLayer{T},dtheta::Array{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + A,dA = this.activation(copy(tmp[2]),true) + nex = div(length(Y),nFeatIn(this)) + + th1, th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + + Kop = getOp(this.K,th1) + dKop = getOp(this.K,dth1) + if length(dY)>1 + dY = reshape(dY,:,nex) + KdY = Kop*dY + else + KdY = 0 + end + dY = dKop*Y+KdY + dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,tmp[1])[2] + + dY = reshape(dY,:,nex) + if !isempty(dth2) + dY .+= this.Bin*dth2 + end + + dY = -(Kop'*(dA.*dY) + dKop'*A) + if !isempty(dth3) + dth3 .+= this.Bout*dth3 + end + + return dY, dY +end + + +function JthetaTmv(this::DoubleSymLayer{T},Z::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + nex = div(length(Y),nFeatIn(this)) + Z = reshape(Z,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + A,dA = this.activation(tmp[2],true) + + dth3 = vec(sum(this.Bout'*Z,2)) + dAZ = dA.*(Kop*Z) + dth2 = vec(sum(this.Bin'*dAZ,2)) + + dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y,tmp[1]) + + dth1 = JthetaTmv(this.K,A,zeros(T,0),Z) + dth1 += JthetaTmv(this.K,dAZ,zeros(T,0),Y) + dtheta = [-vec(dth1); -vec(dth2); vec(dth3); -vec(dth4)] + return dtheta +end + +function JYTmv(this::DoubleSymLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + nex = div(length(Y),nFeatIn(this)) + Z = reshape(Zin,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + A,dA = this.activation(tmp[2],true) + + dAZ = dA.*(Kop*Z) + dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y,tmp[1]) + dAZ_out = reshape(dAZ,:,nex) + dY = -(Kop'*dAZ_out) + return dY +end + +function JTmv(this::DoubleSymLayer{T}, Zin::Array{T}, dummy::Array{T}, + theta::Array{T}, Yin::Array{T}, tmp) where {T<:Number} + + nex = div(length(Yin),nFeatIn(this)) + Z = reshape(Zin, :, nex) + Yt = reshape(tmp[2]::Array{T,2},:,nex) + Y = reshape(Yin,:,nex) + th1, th2, th3, th4 = splitWeights(this,theta) + #Kop = getOp(this.K,th1) + A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,true) + + dth3 = vec(sum(this.Bout'*Z,2)) + + KopZ = Amv(this.K, th1, Z) + dAZ1 = dA.*KopZ + + dth2 = vec(sum(this.Bin'*dAZ1,2)) + KopY = Amv(this.K, th1, Y) + dth4, dAZ2 = JTmv(this.nLayer,dAZ1,zeros(T,0),th4,KopY,tmp[1]) + dth1 = JthetaTmv(this.K,dAZ2,zeros(T,0),Y) + dth1 = dth1 + JthetaTmv(this.K,A,(T)[],Z) + dtheta = [-vec(dth1); -vec(dth2); vec(dth3);-vec(dth4)] + + dAZ_out = reshape(dAZ2,:,nex) + KopTdAZ = ATmv(this.K, th1, dAZ_out) + dY = -KopTdAZ + return dtheta, dY +end diff --git a/regemiii/doubleSymLayerTest.jl b/regemiii/doubleSymLayerTest.jl new file mode 100644 index 0000000..ac5c964 --- /dev/null +++ b/regemiii/doubleSymLayerTest.jl @@ -0,0 +1,61 @@ +using Base.Test +using Meganet + +for TYPE=[Float64,Float32] + K = getDenseKernel(TYPE,[32,18]) + nex = 8 + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getTVNormLayer(TYPE,[8,4]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (dense/TV) $TYPE" begin + testAbstractMeganetElement(L) + end + + K = getDenseKernel(TYPE,[32,18]) + nex = 8 + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getBatchNormLayer(TYPE,[8,4]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (dense/Batch) $TYPE" begin + testAbstractMeganetElement(L) + end + + nImg = [32 32] + nc = 16 + nex = 50 + K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc],isTrainable=false) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (conv/Batch/not trainable) $TYPE" begin + testAbstractMeganetElement(L,nex=nex) + end + + + nImg = [8 4] + nc = 3 + nex = 4 + K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (conv/Batch) $TYPE" begin + testAbstractMeganetElement(L,nex=nex) + end + + nImg = [16 8] + nc = 6 + nex = 8 + K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getTVNormLayer(TYPE,[prod(nImg),nc]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (conv/TV) $TYPE" begin + testAbstractMeganetElement(L) + end +end diff --git a/regemiii/reluActivation.jl b/regemiii/reluActivation.jl new file mode 100644 index 0000000..fe676e6 --- /dev/null +++ b/regemiii/reluActivation.jl @@ -0,0 +1,44 @@ +export reluActivation + +""" + relu activation A = relu(Y) + + Input: + + Y - array of features + + Optional Input: + + doDerivative - flag for computing derivative, set via varargin + Ex: reluActivation(Y,true); + + Output: + + A - activation + dA - derivatives +""" +function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T} + +A = max.(Y,zero(T)); + +if doDerivative + dA = sign.(A); +else + dA = zeros(T,0) +end + +return A,dA +end + + + +function reluActivation!(A::Array{T},dA::Array{T} = zeros(T,size(A)),doDerivative::Bool=false) where {T} +A .= max.(A,zero(T)); +if doDerivative + dA .= sign.(A); +else + dA = zeros(T,0) +end + +return A,dA +end diff --git a/regemiii/singleLayer.jl b/regemiii/singleLayer.jl new file mode 100644 index 0000000..d7af01f --- /dev/null +++ b/regemiii/singleLayer.jl @@ -0,0 +1,158 @@ +export singleLayer,getSingleLayer + +mutable struct singleLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} + activation :: Function # activation function + K :: TK # transformation type + nLayer :: TN # normalization layer + Bin :: Array{T} # bias inside nonlinearity + Bout :: Array{T} # bias outside nonlinearity + +end + +function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0),activation=tanhActivation) + singleLayer(activation,K,nLayer,Bin,Bout); +end + + +function splitWeights(this::singleLayer{T},theta::Array{T}) where {T <: Number} + th1 = theta[1:nTheta(this.K)] + cnt = length(th1) + th2 = theta[cnt+(1:size(this.Bin,2))] + cnt += length(th2) + th3 = theta[cnt+(1:size(this.Bout,2))] + cnt += length(th3) + th4 = theta[cnt+(1:nTheta(this.nLayer))] + cnt += length(th4) + if cnt!=length(theta); error("splitWeights: length does not match"); end + return th1, th2, th3, th4 +end + +function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},doDerivative=false) where {T <: Number} + tmp = Array{Any}(2) + nex = div(length(Yin),nFeatIn(this)) + Y = reshape(Yin,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + + Yout::Array{T,2} = getOp(this.K,th1)*Y + Yout .+= this.Bin * th2 + Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,doDerivative) + Yout,tmp[2] = this.activation(Yout,doDerivative) + Yout .+= this.Bout*th3 + Ydata = Yout + return Ydata, Yout, tmp +end + +function nTheta(this::singleLayer) + return nTheta(this.K)+size(this.Bin,2) + size(this.Bout,2) + nTheta(this.nLayer) +end + +function nFeatIn(this::singleLayer) + return nFeatIn(this.K) +end + +function nFeatOut(this::singleLayer) + return nFeatOut(this.K) +end + +function nDataOut(this::singleLayer) + return nFeatOut(this.K) +end + +function initTheta(this::singleLayer{T}) where {T <: Number} + return [vec(initTheta(this.K)); convert(T,0.01)*ones(T,size(this.Bin,2),1) ; convert(T,0.01)*ones(T,size(this.Bout,2),1); initTheta(this.nLayer) ] +end + + +function Jthetamv(this::singleLayer{T},dtheta::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Yin),nFeatIn(this)) + Y = reshape(Yin,:,nex) + + th1,th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + dZ::Array{T,2} = Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 + Kop = getOp(this.K,th1) + dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] + dZ .*= dA + dZ .+= this.Bout*dth3 + return dZ, dZ +end + +function JYmv(this::singleLayer{T},dYin::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA = tmp[2] + nex = div(length(dYin),nFeatIn(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + dY = reshape(dYin,:,nex) + dZ = Kop*dY + dZ = JYmv(this.nLayer,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] + # dZ = dA.*dZ + dZ .*= dA + return dZ,dZ +end + +function Jmv(this::singleLayer{T},dtheta::Array{T},dYin::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Yin),nFeatIn(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + + dY = reshape(dYin,:,nex); + Kop = getOp(this.K,th1) + dZ::Array{T, 2} = Kop*dY; + + Y = reshape(Yin,:,nex); + dZ += Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 + dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] + + dZ .*= dA + dZ .+= this.Bout*dth3 + return dZ,dZ +end + +function JTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Y),nFeatIn(this)) + Z = reshape(Zin,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + + dth3 = vec(sum(this.Bout'*Z,2)) + dAZ = dA.*Z + dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y.+this.Bin*th2,tmp[1]) # this not type stable + dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)) + dth1 = JthetaTmv(this.K, dAZ,theta,Y) # this not type stable + + dY = Kop'*reshape(dAZ,:,nex) + dtheta = [vec(dth1); vec(dth2); vec(dth3); vec(dth4)] + + return dtheta, dY + +end + +function JthetaTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA = tmp[2] + nex = div(length(Zin),nFeatOut(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + + Z = reshape(Zin,:,nex); + dAZ = dA.*Z; + dth3 = vec(sum(this.Bout'*Z,2)); + Kop = getOp(this.K,th1) + dth4,dAZ = JTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) + dth1 = JthetaTmv(this.K,dAZ,theta,Y); + dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)); + return [vec(dth1); vec(dth2); vec(dth3); vec(dth4)]; +end + +function JYTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Y),nFeatIn(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + Z = reshape(Zin,:,nex) + dAZ::Array{T,2} = dA.*Z + dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) + ret::Array{T,2} = Kop'*reshape(dAZ,:,nex) + return ret #TODO: @lars or eldad rename this variable as I'm not sure what to call it +end diff --git a/src/optimization/.dnnBatchObjFctn.jl.swp b/src/optimization/.dnnBatchObjFctn.jl.swp new file mode 100644 index 0000000000000000000000000000000000000000..5b8b453a39133311739b11ea89142be426a106c5 GIT binary patch literal 1024 zcmYc?$V<%2S1{5u)iY*50?Mik47vHGxtS%2NTS%86(vQ9$tAkQ$wi3;so0f(MfEfC zb5r%9rt0fumF8q7>X#Ylnd$qcrYGj5mgpB3CF|!Glw{^+Rwb5X=I80B Date: Tue, 13 Feb 2018 17:33:49 -0800 Subject: [PATCH 04/22] Eran's corrected code added --- benchmarks/CIFAR10/cifar10_512_64.jl | 2 +- examples/EResNN_CIFAR10.jl | 184 +++++------ src/Meganet.jl | 94 +++--- src/activations/reluActivation.jl | 75 +++-- src/integrators/NN.jl | 13 +- src/integrators/connector.jl | 2 + src/kernelTypes/abstractConvKernel.jl | 100 +++--- src/kernelTypes/convFFTKernel.jl | 252 +++++++-------- src/kernelTypes/convGEMMKernel.jl | 432 ++++++++++++++------------ src/layers/doubleSymLayer.jl | 430 ++++++++++++------------- src/layers/normLayer.jl | 12 +- src/layers/singleLayer.jl | 316 +++++++++---------- src/optimization/sgd.jl | 10 +- test/kernel/convFFTKernelTest.jl | 72 ++--- test/kernel/convGEMMKernelTest.jl | 95 +++--- test/layer/doubleSymLayerTest.jl | 122 ++++---- 16 files changed, 1132 insertions(+), 1079 deletions(-) diff --git a/benchmarks/CIFAR10/cifar10_512_64.jl b/benchmarks/CIFAR10/cifar10_512_64.jl index 2c444f9..418dbc8 100644 --- a/benchmarks/CIFAR10/cifar10_512_64.jl +++ b/benchmarks/CIFAR10/cifar10_512_64.jl @@ -67,7 +67,7 @@ pLoss = getSoftMaxLoss(TYPE); objFun = dnnObjFctn(net,pLoss,pRegTh,pRegW) opt = getSGDsolver(TYPE,learningRate=1e-2,maxEpochs=1,miniBatch=miniBatchSize,out=true) -W = 0.1*vec(randn(TYPE,10,nFeatOut(net)+1)); +W = 0.01*vec(randn(TYPE,10,nFeatOut(net)+1)); W = min.(W,.2); W = max.(W,-.2); W = convert(Array{TYPE},W); diff --git a/examples/EResNN_CIFAR10.jl b/examples/EResNN_CIFAR10.jl index 9afcbbc..1330c28 100644 --- a/examples/EResNN_CIFAR10.jl +++ b/examples/EResNN_CIFAR10.jl @@ -1,92 +1,92 @@ -using MAT, Meganet - -n = 256; -Y_train,C_train,Y_test,C_test = getCIFAR10(n,Pkg.dir("Meganet")*"/data/CIFAR10/"); - -# using PyPlot -# y = Y_train[:,50]; y = y - minimum(y); y = y./maximum(y); -# y = reshape(y,32,32,3); -# y[:,:,1] = y[:,:,1]';y[:,:,2] = y[:,:,2]';y[:,:,3] = y[:,:,3]'; -# figure(); imshow(y) - -miniBatchSize = 64; -nImg = [32; 32] -cin = 3 -nc = [16;32;64;64] -nt = 2*[1;1;1] -h = [1.;1.;1.] - -TYPE = Float32; - -getConvKernel = (nImg,sK) -> getConvGEMMKernel(TYPE,nImg,sK); -# getConvKernel = (nImg,sK) -> getConvFFTKernel(TYPE,nImg,sK); -#getConvKernel = (nImg,sK) -> getSparseConvKernel2D(TYPE,nImg,sK); - -# opening layer -K1 = getConvKernel(nImg,[3,3,cin,nc[1]]); - -nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true); -blocks = [getSingleLayer(TYPE,K1,nL)] - -for k=1:length(nt) - # ResNN layers - K2 = getConvKernel(nImg,[3,3,nc[k],nc[k]]) - nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k]],isTrainable=true) - L2 = getDoubleSymLayer(TYPE,K2,nL) - RN = getResNN(TYPE,L2,nt[k],h[k]) - - if k (24, 500))) -# end +using MAT, Meganet +# BLAS.set_num_threads(1) +n = 512; +Y_train,C_train,Y_test,C_test = getCIFAR10(n,Pkg.dir("Meganet")*"/data/CIFAR10/"); + +# using PyPlot +# y = Y_train[:,50]; y = y - minimum(y); y = y./maximum(y); +# y = reshape(y,32,32,3); +# y[:,:,1] = y[:,:,1]';y[:,:,2] = y[:,:,2]';y[:,:,3] = y[:,:,3]'; +# figure(); imshow(y) + +miniBatchSize = 64; +nImg = [32; 32] +cin = 3 +nc = [16;32;64;64] +nt = 2*[1;1;1] +h = [1.;1.;1.] + +TYPE = Float32; + +getConvKernel = (nImg,sK) -> getConvGEMMKernel(TYPE,nImg,sK); +# getConvKernel = (nImg,sK) -> getConvFFTKernel(TYPE,nImg,sK); +#getConvKernel = (nImg,sK) -> getSparseConvKernel2D(TYPE,nImg,sK); + +# opening layer +K1 = getConvKernel(nImg,[3,3,cin,nc[1]]); + +nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true); +blocks = [getSingleLayer(TYPE,K1,nL)] + +for k=1:length(nt) + # ResNN layers + K2 = getConvKernel(nImg,[3,3,nc[k],nc[k]]) + nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k]],isTrainable=true) + L2 = getDoubleSymLayer(TYPE,K2,nL) + RN = getResNN(TYPE,L2,nt[k],h[k]) + + if k (24, 500))) +# end \ No newline at end of file diff --git a/src/Meganet.jl b/src/Meganet.jl index 30d1aaf..13cb3e9 100644 --- a/src/Meganet.jl +++ b/src/Meganet.jl @@ -1,45 +1,49 @@ -module Meganet - -using LinearOperators, MAT, Base.Test - -import JLD, BenchmarkTools - -include("AbstractMeganetElement.jl") - -include("activations/tanhActivation.jl") - -include("integrators/NN.jl") -include("integrators/connector.jl") -include("integrators/ResNN.jl") -include("kernelTypes/abstractConvKernel.jl"); -include("kernelTypes/denseKernel.jl") -include("kernelTypes/sparseKernel.jl") -include("kernelTypes/convFFTKernel.jl"); -include("kernelTypes/convGEMMKernel.jl"); -include("kernelTypes/convCircKernel.jl"); - - - -include("layers/affineScalingLayer.jl") -include("layers/normLayer.jl") -include("layers/doubleSymLayer.jl") -include("layers/singleLayer.jl") - -include("loss/softMaxLoss.jl") -include("regularization/TikhonovReg.jl") - -include("optimization/dnnBatchObjFctn.jl") -include("optimization/sgd.jl") - -include("utils/getConvMatPeriodic.jl") -include("utils/testAbstractMeganetElement.jl") -include("utils/testLossFunction.jl") -include("utils/utilities.jl"); -include("utils/checkDerivative.jl"); -include("utils/normalizeData.jl"); - -include("utils/getCIFAR10.jl"); -include("utils/Benchmark.jl"); - - -end +module Meganet + +using LinearOperators, MAT, Base.Test + +import JLD, BenchmarkTools + +include("AbstractMeganetElement.jl") + +include("activations/tanhActivation.jl") +include("activations/reluActivation.jl") +include("activations/identityActivation.jl") + + +include("integrators/NN.jl") +include("integrators/connector.jl") +include("integrators/ResNN.jl") +include("kernelTypes/abstractConvKernel.jl"); +include("kernelTypes/denseKernel.jl") +include("kernelTypes/sparseKernel.jl") +include("kernelTypes/convFFTKernel.jl"); +include("kernelTypes/convGEMMKernel.jl"); +include("kernelTypes/convCircKernel.jl"); +# include("kernelTypes/convDiagKernel.jl"); + + + +include("layers/affineScalingLayer.jl") +include("layers/normLayer.jl") +include("layers/doubleSymLayer.jl") +include("layers/singleLayer.jl") + +include("loss/softMaxLoss.jl") +include("regularization/TikhonovReg.jl") + +include("optimization/dnnBatchObjFctn.jl") +include("optimization/sgd.jl") + +include("utils/getConvMatPeriodic.jl") +include("utils/testAbstractMeganetElement.jl") +include("utils/testLossFunction.jl") +include("utils/utilities.jl"); +include("utils/checkDerivative.jl"); +include("utils/normalizeData.jl"); + +include("utils/getCIFAR10.jl"); +include("utils/Benchmark.jl"); + + +end diff --git a/src/activations/reluActivation.jl b/src/activations/reluActivation.jl index cba57df..fe676e6 100644 --- a/src/activations/reluActivation.jl +++ b/src/activations/reluActivation.jl @@ -1,31 +1,44 @@ -export reluActivation - -""" - relu activation A = relu(Y) - - Input: - - Y - array of features - - Optional Input: - - doDerivative - flag for computing derivative, set via varargin - Ex: reluActivation(Y,true); - - Output: - - A - activation - dA - derivatives -""" -function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T} - -Y = max.(Y,0); - -if doDerivative - dA = sign(Y); -else - dA = zeros(T,0) -end - -return A,dA -end +export reluActivation + +""" + relu activation A = relu(Y) + + Input: + + Y - array of features + + Optional Input: + + doDerivative - flag for computing derivative, set via varargin + Ex: reluActivation(Y,true); + + Output: + + A - activation + dA - derivatives +""" +function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T} + +A = max.(Y,zero(T)); + +if doDerivative + dA = sign.(A); +else + dA = zeros(T,0) +end + +return A,dA +end + + + +function reluActivation!(A::Array{T},dA::Array{T} = zeros(T,size(A)),doDerivative::Bool=false) where {T} +A .= max.(A,zero(T)); +if doDerivative + dA .= sign.(A); +else + dA = zeros(T,0) +end + +return A,dA +end diff --git a/src/integrators/NN.jl b/src/integrators/NN.jl index 6f28a2a..2da950d 100644 --- a/src/integrators/NN.jl +++ b/src/integrators/NN.jl @@ -1,5 +1,6 @@ export NN,getNN,initTheta - +# using TimerOutputs +# to = TimerOutput() """ NN Neural Network block @@ -63,10 +64,11 @@ end # --------- forward problem ---------- function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) where {T<:Number} + # tic() Y::Array{T,2} = copy(Y0) nex = div(length(Y),nFeatIn(this))::Int nt = length(this.layers) - + tmp = Array{Any}(nt+1,2) if doDerivative tmp[1,1] = Y0 @@ -76,7 +78,8 @@ function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) whe cnt = 0 for i=1:nt ni = nTheta(this.layers[i])::Int - + # @timeit to "Apply" apply(this.layers[i],theta[cnt+(1:ni)],Y,doDerivative) + Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,doDerivative) if this.outTimes[i]==1 Ydata = [Ydata; this.Q*Yd] @@ -86,7 +89,9 @@ function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) whe end cnt = cnt + ni end - + # println("Measuring time for all the layers in NN ",toc()) + # println(to) + # error("After 1 apply call from NN") return Ydata,Y,tmp end diff --git a/src/integrators/connector.jl b/src/integrators/connector.jl index 5824642..4f6be03 100644 --- a/src/integrators/connector.jl +++ b/src/integrators/connector.jl @@ -19,6 +19,7 @@ end function apply(this::Connector{T},theta::Array{T},Y0::Array{T},doDerivative=true) where {T <: Number} + # tic() nex = div(length(Y0),nFeatIn(this)) Y0 = reshape(Y0,:,nex) Y = this.K*Y0 .+ this.b @@ -27,6 +28,7 @@ function apply(this::Connector{T},theta::Array{T},Y0::Array{T},doDerivative=true Ydata = this.Q*Y end tmp = Y0; + # println("Measuring time for connector ",toc()) return Ydata, Y, tmp end diff --git a/src/kernelTypes/abstractConvKernel.jl b/src/kernelTypes/abstractConvKernel.jl index 182a2ba..6e49529 100644 --- a/src/kernelTypes/abstractConvKernel.jl +++ b/src/kernelTypes/abstractConvKernel.jl @@ -1,50 +1,50 @@ -export nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta, AbstractConvKernel - -abstract type AbstractConvKernel{T} <: AbstractMeganetElement{T} end - -## All convKernel types are assumed to have fields nImage (size of the image) and sK (size of the Convolution Kernel) - -function nImgIn(this::AbstractConvKernel) - return [this.nImg[1]; this.nImg[2]; this.sK[3]] -end - -function nImgOut(this::AbstractConvKernel) - return [this.nImg[1]; this.nImg[2]; this.sK[4]] -end - -function nFeatIn(this::AbstractConvKernel) - return prod(nImgIn(this)); -end -function nFeatOut(this::AbstractConvKernel) - return prod(nImgOut(this)); -end - -function nTheta(this::AbstractConvKernel) - return prod(this.sK); -end - - -function getOp(this::AbstractConvKernel{T},theta::Array{T}) where {T <: Number} - - m = prod(nImgOut(this)) - n = prod(nImgIn(this)) - - A = LinearOperator{T}(m,n,false,false, - v -> Amv(this,theta,v), - Nullable{Function}(), - w -> ATmv(this,theta,w)) - return A -end - -function initTheta(this::AbstractConvKernel{T}) where {T <: Number} - - sd = T(0.1); - theta = sd*randn(T,prod(this.sK)); - #id1 = find(theta>2*sd); - #theta(id1[:]) = randn(numel(id1),1); - - #id2 = find(theta< -2*sd); - #theta(id2(:)) = randn(numel(id2),1); - #theta = max(min(2*sd, theta),-2*sd); - return theta -end +export nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta, AbstractConvKernel + +abstract type AbstractConvKernel{T} <: AbstractMeganetElement{T} end + +## All convKernel types are assumed to have fields nImage (size of the image) and sK (size of the Convolution Kernel) + +function nImgIn(this::AbstractConvKernel) + return [this.nImg[1]; this.nImg[2]; this.sK[3]] +end + +function nImgOut(this::AbstractConvKernel) + return [this.nImg[1]; this.nImg[2]; this.sK[4]] +end + +function nFeatIn(this::AbstractConvKernel) + return prod(nImgIn(this)); +end +function nFeatOut(this::AbstractConvKernel) + return prod(nImgOut(this)); +end + +function nTheta(this::AbstractConvKernel) + return prod(this.sK); +end + + +function getOp(this::AbstractConvKernel{T},theta::Array{T}) where {T <: Number} + + m = prod(nImgOut(this)) + n = prod(nImgIn(this)) + + A = LinearOperator{T}(m,n,false,false, + v -> Amv(this,theta,v), + Nullable{Function}(), + w -> ATmv(this,theta,w)) + return A +end + +function initTheta(this::AbstractConvKernel{T}) where {T <: Number} + + sd = T(0.01); + theta = sd*randn(T,prod(this.sK)); + #id1 = find(theta>2*sd); + #theta(id1[:]) = randn(numel(id1),1); + + #id2 = find(theta< -2*sd); + #theta(id2(:)) = randn(numel(id2),1); + #theta = max(min(2*sd, theta),-2*sd); + return theta +end diff --git a/src/kernelTypes/convFFTKernel.jl b/src/kernelTypes/convFFTKernel.jl index cfa480b..97f02d0 100644 --- a/src/kernelTypes/convFFTKernel.jl +++ b/src/kernelTypes/convFFTKernel.jl @@ -1,126 +1,126 @@ -export convFFTKernel, getEigs,getConvFFTKernel -## For the functions nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta : see AbstractConvKernel.jl -## All convKernel types are assumed to have fields nImage and sK -mutable struct convFFTKernel{T} <: AbstractConvKernel{T} - nImg :: Array{Int,1} - sK :: Array{Int,1} - S :: Array{Complex{T},2} -end - -function getConvFFTKernel(TYPE::Type,nImg,sK) - S = getEigs(Complex{TYPE},nImg,sK) - return convFFTKernel{TYPE}(nImg,sK,S) -end - -function getEigs(TYPE,nImg,sK) - S = zeros(TYPE,prod(nImg),prod(sK[1:2])); - for k=1:prod(sK[1:2]) - Kk = zeros(sK[1],sK[2]); - Kk[k] = 1; - Ak = getConvMatPeriodic(TYPE,Kk,[nImg[1],nImg[2], 1]); - Akk = full(convert(Array{TYPE},Ak[:,1])); - S[:,k] = vec(fft2(reshape(Akk,nImg[1],nImg[2]) )); - end - return S -end - -export Amv -function Amv(this::convFFTKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} - - nex = div(numel(Y),prod(nImgIn(this))) - - # compute convolution - AY = zeros(Complex{T},tuple([nImgOut(this); nex]...)); - theta = reshape(theta, tuple([prod(this.sK[1:2]); this.sK[3:4]]...)); - Yh = ifft2(reshape(Y,tuple([nImgIn(this); nex]...))); - #### allocate stuff for the loop - Sk = zeros(Complex{T},tuple(nImgOut(this)...)) - #T = zeros(Complex{eltype(Y)},tuple(nImgOut(this)...)) - nn = nImgOut(this); nn[3] = 1; - sumT = zeros(Complex{T},tuple([nn;nex]...)) - #### - - for k=1:this.sK[4] - Sk = reshape(this.S*theta[:,:,k],tuple(nImgIn(this)...)); - #T = Sk .* Yh; - #sumT = sum(T,3) - sumT = hadamardSum(sumT,Yh,Sk) - AY[:,:,k,:] = sumT[:,:,1,:]; - end - AY = real(fft2(AY)); - Y = reshape(AY,:,nex); - return Y -end - -function ATmv(this::convFFTKernel{T},theta::Array{T},Z::Array{T}) where {T<:Number} - - nex = div(numel(Z),prod(nImgOut(this))); - ATY = zeros(Complex{T},tuple([nImgIn(this); nex]...)); - theta = reshape(theta, prod(this.sK[1:2]),this.sK[3],this.sK[4]); - #### allocate stuff for the loop - Sk = zeros(Complex{T},tuple(nImgOut(this)...)) - #T = zeros(Complex{eltype(Z)},tuple(nImgOut(this)...)) - nn = nImgOut(this); nn[3] = 1; - sumT = zeros(Complex{T},tuple([nn;nex]...)) - #### - - Yh = fft2(reshape(Z,tuple([nImgOut(this); nex]...))); - for k=1:this.sK[3] - tk = theta[:,k,:] - #if size(this.S,2) == 1 - # tk = reshape(tk,1,:); - #end - Sk = reshape(this.S*tk,tuple(nImgOut(this)...)); - #T = Sk.*Yh; - #sumT = sum(T,3) - sumT = hadamardSum(sumT,Yh,Sk) - ATY[:,:,k] = sumT[:,:,1]; - end - ATY = real(ifft2(ATY)); - ATY = reshape(ATY,:,nex); - return ATY -end - -function Jthetamv(this::convFFTKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} - - nex = div(numel(Y),nFeatIn(this)); - Y = reshape(Y,:,nex); - Z = Amv(this,dtheta,Y); - return Z -end - -function JthetaTmv(this::convFFTKernel{T},Z::Array{T},dummy::Array{T},Y::Array{T}) where {T<:Number} - # derivative of Z*(A(theta)*Y) w.r.t. theta - - nex = div(numel(Y),nFeatIn(this)); - - dth1 = zeros(this.sK[1]*this.sK[2],this.sK[3],this.sK[4]); - Y = permutedims(reshape(Y,tuple([nImgIn(this); nex ]...)),[1 2 4 3]); - Yh = reshape(fft2(Y),prod(this.nImg[1:2]),nex*this.sK[3]); - Zh = permutedims(ifft2(reshape(Z,tuple([nImgOut(this); nex]...))),[1 2 4 3]); - Zh = reshape(Zh,:, this.sK[4]); - - for k=1:prod(this.sK[1:2]) - temp = conj(this.S[:,k]) .* Yh - temp = reshape(temp,:,this.sK[3]) - dth1[k,:,:] = real(conj(temp)'*Zh); - end - - dtheta = reshape(dth1,tuple(this.sK...)); - return dtheta -end - -function hadamardSum(sumT::Array{T},Yh::Array{T},Sk::Array{T}) where {T<:Number} - sumT .= 0.0; - for i4 = 1:size(Yh,4) - for i3 = 1:size(Yh,3) - for i2 = 1:size(Yh,2) - for i1 = 1:size(Yh,1) - @inbounds tt = Sk[i1,i2,i3] - @inbounds sumT[i1,i2,1,i4] += tt * Yh[i1,i2,i3,i4] - end - end - end - end - return sumT -end +export convFFTKernel, getEigs,getConvFFTKernel +## For the functions nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta : see AbstractConvKernel.jl +## All convKernel types are assumed to have fields nImage and sK +mutable struct convFFTKernel{T} <: AbstractConvKernel{T} + nImg :: Array{Int,1} + sK :: Array{Int,1} + S :: Array{Complex{T},2} +end + +function getConvFFTKernel(TYPE::Type,nImg,sK) + S = getEigs(Complex{TYPE},nImg,sK) + return convFFTKernel{TYPE}(nImg,sK,S) +end + +function getEigs(TYPE,nImg,sK) + S = zeros(TYPE,prod(nImg),prod(sK[1:2])); + for k=1:prod(sK[1:2]) + Kk = zeros(sK[1],sK[2]); + Kk[k] = 1; + Ak = getConvMatPeriodic(TYPE,Kk,[nImg[1],nImg[2], 1]); + Akk = full(convert(Array{TYPE},Ak[:,1])); + S[:,k] = vec(fft2(reshape(Akk,nImg[1],nImg[2]) )); + end + return S +end + +export Amv +function Amv(this::convFFTKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} + + nex = div(numel(Y),prod(nImgIn(this))) + + # compute convolution + AY = zeros(Complex{T},tuple([nImgOut(this); nex]...)); + theta = reshape(theta, tuple([prod(this.sK[1:2]); this.sK[3:4]]...)); + Yh = ifft2(reshape(Y,tuple([nImgIn(this); nex]...))); + #### allocate stuff for the loop + Sk = zeros(Complex{T},tuple(nImgOut(this)...)) + #T = zeros(Complex{eltype(Y)},tuple(nImgOut(this)...)) + nn = nImgOut(this); nn[3] = 1; + sumT = zeros(Complex{T},tuple([nn;nex]...)) + #### + + for k=1:this.sK[4] + Sk = reshape(this.S*theta[:,:,k],tuple(nImgIn(this)...)); + #T = Sk .* Yh; + #sumT = sum(T,3) + sumT = hadamardSum(sumT,Yh,Sk) + AY[:,:,k,:] = sumT[:,:,1,:]; + end + AY = real(fft2(AY)); + Y = reshape(AY,:,nex); + return Y +end + +function ATmv(this::convFFTKernel{T},theta::Array{T},Z::Array{T}) where {T<:Number} + + nex = div(numel(Z),prod(nImgOut(this))); + ATY = zeros(Complex{T},tuple([nImgIn(this); nex]...)); + theta = reshape(theta, prod(this.sK[1:2]),this.sK[3],this.sK[4]); + #### allocate stuff for the loop + Sk = zeros(Complex{T},tuple(nImgOut(this)...)) + #T = zeros(Complex{eltype(Z)},tuple(nImgOut(this)...)) + nn = nImgOut(this); nn[3] = 1; + sumT = zeros(Complex{T},tuple([nn;nex]...)) + #### + + Yh = fft2(reshape(Z,tuple([nImgOut(this); nex]...))); + for k=1:this.sK[3] + tk = theta[:,k,:] + #if size(this.S,2) == 1 + # tk = reshape(tk,1,:); + #end + Sk = reshape(this.S*tk,tuple(nImgOut(this)...)); + #T = Sk.*Yh; + #sumT = sum(T,3) + sumT = hadamardSum(sumT,Yh,Sk) + ATY[:,:,k,:] = sumT[:,:,1,:]; + end + ATY = real(ifft2(ATY)); + ATY = reshape(ATY,:,nex); + return ATY +end + +function Jthetamv(this::convFFTKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} + + nex = div(numel(Y),nFeatIn(this)); + Y = reshape(Y,:,nex); + Z = Amv(this,dtheta,Y); + return Z +end + +function JthetaTmv(this::convFFTKernel{T},Z::Array{T},dummy::Array{T},Y::Array{T}) where {T<:Number} + # derivative of Z*(A(theta)*Y) w.r.t. theta + + nex = div(numel(Y),nFeatIn(this)); + + dth1 = zeros(this.sK[1]*this.sK[2],this.sK[3],this.sK[4]); + Y = permutedims(reshape(Y,tuple([nImgIn(this); nex ]...)),[1 2 4 3]); + Yh = reshape(fft2(Y),prod(this.nImg[1:2]),nex*this.sK[3]); + Zh = permutedims(ifft2(reshape(Z,tuple([nImgOut(this); nex]...))),[1 2 4 3]); + Zh = reshape(Zh,:, this.sK[4]); + + for k=1:prod(this.sK[1:2]) + temp = conj(this.S[:,k]) .* Yh + temp = reshape(temp,:,this.sK[3]) + dth1[k,:,:] = real(conj(temp)'*Zh); + end + + dtheta = reshape(dth1,tuple(this.sK...)); + return dtheta +end + +function hadamardSum(sumT::Array{T},Yh::Array{T},Sk::Array{T}) where {T<:Number} + sumT .= 0.0; + for i4 = 1:size(Yh,4) + for i3 = 1:size(Yh,3) + for i2 = 1:size(Yh,2) + for i1 = 1:size(Yh,1) + @inbounds tt = Sk[i1,i2,i3] + @inbounds sumT[i1,i2,1,i4] += tt * Yh[i1,i2,i3,i4] + end + end + end + end + return sumT +end diff --git a/src/kernelTypes/convGEMMKernel.jl b/src/kernelTypes/convGEMMKernel.jl index a818b2a..5bbdaf8 100644 --- a/src/kernelTypes/convGEMMKernel.jl +++ b/src/kernelTypes/convGEMMKernel.jl @@ -1,202 +1,230 @@ -export convGEMMKernel,Amv,ATmv,transposeTest,getConvGEMMKernel - -mutable struct convGEMMKernel{T} <: AbstractConvKernel{T} - nImg :: Array{Int,1} - sK :: Array{Int,1} -end -function getConvGEMMKernel(TYPE::Type,nImg,sK) - return convGEMMKernel{TYPE}(copy(nImg),copy(sK)); -end - -function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} - ## We assume that the data Y is held in the order XYCN. - sK = this.sK; - nImg = this.nImg; - nex = div(numel(Y),prod(nImgIn(this))) - # compute convolution - Y = reshape(Y,nImg[1],nImg[2],this.sK[3],nex); - AY = Array{T, 3}(nImg[1]*nImg[2],this.sK[4],nex); - aux = zeros(T,nImg[1],nImg[2],this.sK[3]); - AYk = zeros(T,nImg[1]*nImg[2],this.sK[4]); - ### reshape the kernels for gemm!: - K = reshape(theta, sK[1], sK[2], sK[3], sK[4]) - KK = Array{Array{T,2}}(sK[1],sK[2]); - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds KK[k1,k2] = K[k1,k2,:,:]'; - end - end - shiftX = [0;-1;0;0;1;0]; - shiftT = [1;0;0;0;0;-1]; - - for k = 1:nex - AYk = multConv2Dblock(Y,KK, AYk,aux,shiftX,shiftT,k); - @inbounds AY[:,:,k] = AYk; - AYk[:] = zero(T) - end - AY_out = reshape(AY,:,nex); - return AY_out -end - -function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:Number} - nImg = this.nImg; - sK = this.sK; - nex = div(numel(Zin),prod(nImgOut(this))); - K = reshape(theta, sK[1], sK[2], sK[3], sK[4]); - Z = reshape(Zin,nImg[1],nImg[2],sK[4],nex); - aux = zeros(T,nImg[1],nImg[2],sK[4]); - ATZ = zeros(T,nImg[1]*nImg[2],sK[3],nex); - ATZk = zeros(T,nImg[1]*nImg[2],sK[3]); - - ### reshape the kernels for gemm!: - KK = Array{Array{T,2}}(sK[1],sK[2]); - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds KK[k1,k2] = K[k1,k2,:,:]; - end - end - ## flipping: - KK = flipdim(flipdim(KK,2),1); - shiftX = [0;-1;0;0;1;0]; - shiftT = [1;0;0;0;0;-1]; - for k = 1:nex - ATZk = multConv2Dblock(Z,KK, ATZk,aux,shiftX,shiftT,k); - @inbounds ATZ[:,:,k] = ATZk; - ATZk[:] = zero(T) - end - ATZ_out = reshape(ATZ,:,nex); - return ATZ_out -end - -function Jthetamv(this::convGEMMKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} - nex = div(numel(Y),nFeatIn(this)); - Z = Amv(this,dtheta,Y); - return Z -end - -function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin::Array{T}) where {T<:Number} - # derivative of Z*(A(theta)*Y) w.r.t. theta - sK = this.sK - nImg = this.nImg - nex = div(numel(Yin),prod(nImgIn(this))) - # compute convolution - Y = reshape(Yin, nImg[1], nImg[2], this.sK[3], nex) - Z = reshape(Zin, nImg[1]*nImg[2], this.sK[4], nex) - Zk = zeros(T, nImg[1]*nImg[2], this.sK[4]) - aux = zeros(T, nImg[1], nImg[2], this.sK[3]) - - ### reshape the kernels for gemm!: - dtheta = zeros(T, sK[1], sK[2], sK[3], sK[4]) - KK = Array{Array{T, 2}}(sK[1], sK[2]) - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds KK[k1, k2] = zeros(T, sK[3], sK[4]) - end - end - shiftX = [0;-1;0;0;1;0] - shiftT = [1;0;0;0;0;-1] - for k = 1:nex - getColumn!(Z, Zk, k) - multConv2Dblock(Y, KK, Zk, aux, shiftX, shiftT, k, doDerivative = 1) - end - ### Assemble the kernels from gemm!: - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds dtheta[k1, k2, :, :] = KK[k1, k2] - end - end - dtheta_out = reshape(dtheta, sK[1], sK[2], sK[3], sK[4]) - return dtheta_out -end - - - -function getColumn!(Z::Array{T},Zk::Array{T},k::Int64) where {T<:Number} -for c=1:size(Z,2) - for j=1:size(Z,1) - @inbounds Zk[j,c] = Z[j,c,k]; - end -end -end - -function multConv2Dblock(x::Array{T},K::Array{Array{T,2},2}, y::Array{T}, tin::Array{T},shiftX,shiftT,imIdx;doDerivative = 0) where {T<:Number} - ## y = K*x - ## K - 3X3 array of Arrays - ## x - a vector of length |nImgag+2|*cin (zero padded) - ## y - a vector of length |nImgag|*cout - - nImg1 = size(x,1); - nImg2 = size(x,2); - cin = size(x,3); - cout = size(y,2); - OneType = one(T); - - kernelWidth = size(K,1); - # y = reshape(y,nImg1*nImg2,cout); # it is supposed to be of this shape... - k=1; - jt=0;it=0;jt=0;jx=0; - for p = 1:2:2*kernelWidth - for q = 1:2:2*kernelWidth - t = reshape(tin,nImg1,nImg2,cin); - lower = nImg2+shiftT[p+1] # Move outside of the forloop for increased speed - upper = nImg1+shiftT[q+1] # Move outside of the forloop for increased speed - for cc = 1:cin - jx = 1+shiftX[p]; # Moving these outside didn't seem to help - jt = 1+shiftT[p]; - if jt > 1 - @inbounds t[:,1:(jt-1),cc] = 0.0; - end - while jt <= lower - it = 1+shiftT[q]; - ix = 1+shiftX[q]; - if it > 1 - for ii = 1:(it-1) - @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[1:(it-1),jt,cc] = 0.0 - faster unvectorized - end - end - while it <= upper - @inbounds t[it,jt,cc] = x[ix,jx,cc,imIdx]; - it+=1;ix+=1; - end - if it <= nImg1 - for ii = it:nImg1 - @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[it:nImg1,jt,cc] = 0.0 - faster unvectorized - end - end - jt+=1;jx+=1; - - end - if jt <= nImg2 - @inbounds t[:,jt:nImg2,cc] = 0.0; - end - end - tin = reshape(t,nImg1*nImg2,cin); - if doDerivative == 0 - BLAS.gemm!('N','T',OneType,tin,K[k],OneType,y); - else - BLAS.gemm!('T','N',OneType,tin,y,OneType,K[k]); - end - k+=1; - end - end - return y; -end - - -# function transposeTest() -# nImage = [16,16]; -# sK = [3,3,2,4]; -# TYPE = Float64; -# K = randn(TYPE,tuple(sK...)); -# Y = randn(TYPE,nImage[1],nImage[2],sK[3],2); -# Z = randn(TYPE,nImage[1],nImage[2],sK[4],2); -# Kernel2 = convGEMMKernel(nImage,sK); -# AY = Amv(Kernel2,K,Y); -# ATZ = ATmv(Kernel2,K,Z); -# println(vecdot(Z,AY)); -# println(vecdot(ATZ,Y)); -# -# println(vecdot(Z,Jthetamv(Kernel2,K,[],Y))); -# println(vecdot(K,JthetaTmv(Kernel2,Z,[],Y))); -# -# end +export convGEMMKernel,Amv,ATmv,transposeTest,getConvGEMMKernel +using DistributedArrays +mutable struct convGEMMKernel{T} <: AbstractConvKernel{T} + nImg :: Array{Int,1} + sK :: Array{Int,1} + shiftX :: Array{Int,1} + shiftT :: Array{Int,1} + aux_sk3 :: Array{T, 3} + aux_sk4 :: Array{T, 3} +end +function getConvGEMMKernel(TYPE::Type,nImg,sK) + + if sK[1] == 1 && sK[2] == 1 + shiftX = [0;0]; + shiftT = [0;0]; + elseif sK[1] == 3 && sK[2] == 3 + shiftX = [0;-1;0;0;1;0]; + shiftT = [1;0;0;0;0;-1]; + else + error("Code only supports 1X1 and 3X3 convolutions"); + end + + aux_sk3 = zeros(TYPE,nImg[1],nImg[2],sK[3]); + aux_sk4 = zeros(TYPE,nImg[1],nImg[2],sK[4]); + # error("new GEMM") + return convGEMMKernel{TYPE}(copy(nImg),copy(sK),shiftX,shiftT,aux_sk3,aux_sk4); +end + +function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} + ## We assume that the data Y is held in the order XYCN. + sK = this.sK; + nImg = this.nImg; + nex = div(numel(Y),prod(nImgIn(this))) + # compute convolution + Y = reshape(Y,nImg[1],nImg[2],this.sK[3],nex); + AY = Array{T, 3}(nImg[1]*nImg[2],this.sK[4],nex); + aux = this.aux_sk3; + AYk = reshape(this.aux_sk4,nImg[1]*nImg[2],sK[4]); + ### reshape the kernels for gemm!: + K = reshape(theta, sK[1], sK[2], sK[3], sK[4]) + KK = Array{Array{T,2}}(sK[1],sK[2]); + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds KK[k1,k2] = K[k1,k2,:,:]'; + end + end + # AYk = @parallel vcat for k = 1:nex + for k = 1:nex + AYk[:] = zero(T) + AYk = multConv2Dblock(Y,KK, AYk,aux,this.shiftX,this.shiftT,k); + @inbounds AY[:,:,k] = AYk; + + end + + AY_out = reshape(AY,:,nex); + return AY_out +end + +function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:Number} + nImg = this.nImg; + sK = this.sK; + nex = div(numel(Zin),prod(nImgOut(this))); + K = reshape(theta, sK[1], sK[2], sK[3], sK[4]); + Z = reshape(Zin,nImg[1],nImg[2],sK[4],nex); + aux = this.aux_sk4; + ATZ = zeros(T,nImg[1]*nImg[2],sK[3],nex); + ATZk = reshape(this.aux_sk3,nImg[1]*nImg[2],sK[3]); + + ### reshape the kernels for gemm!: + KK = Array{Array{T,2}}(sK[1],sK[2]); + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds KK[k1,k2] = K[k1,k2,:,:]; + end + end + ## flipping: + KK = flipdim(flipdim(KK,2),1); + for k = 1:nex + ATZk[:] = zero(T) + ATZk = multConv2Dblock(Z,KK, ATZk,aux,this.shiftX,this.shiftT,k); + @inbounds ATZ[:,:,k] = ATZk; + end + ATZ_out = reshape(ATZ,:,nex); + return ATZ_out +end + +function Jthetamv(this::convGEMMKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} + nex = div(numel(Y),nFeatIn(this)); + Z = Amv(this,dtheta,Y); + return Z +end + +function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin::Array{T}) where {T<:Number} + # derivative of Z*(A(theta)*Y) w.r.t. theta + sK = this.sK + nImg = this.nImg + nex = div(numel(Yin),prod(nImgIn(this))) + # compute convolution + Y = reshape(Yin, nImg[1], nImg[2], this.sK[3], nex) + Z = reshape(Zin, nImg[1]*nImg[2], this.sK[4], nex) + Zk = reshape(this.aux_sk4, nImg[1]*nImg[2], this.sK[4]); + aux = this.aux_sk3; + ### reshape the kernels for gemm!: + dtheta = zeros(T, sK[1], sK[2], sK[3], sK[4]) + KK = Array{Array{T, 2}}(sK[1], sK[2]) + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds KK[k1, k2] = zeros(T, sK[3], sK[4]) + end + end + for k = 1:nex + getColumn!(Z, Zk, k) + multConv2Dblock(Y, KK, Zk, aux, this.shiftX, this.shiftT, k, doDerivative = 1) + end + ### Assemble the kernels from gemm!: + for k1 = 1:sK[1] + for k2 = 1:sK[2] + @inbounds dtheta[k1, k2, :, :] = KK[k1, k2] + end + end + dtheta_out = reshape(dtheta, sK[1], sK[2], sK[3], sK[4]) + return dtheta_out +end + + + +function getColumn!(Z::Array{T},Zk::Array{T},k::Int64) where {T<:Number} +for c=1:size(Z,2) + for j=1:size(Z,1) + @inbounds Zk[j,c] = Z[j,c,k]; + end +end +end + +function multConv2Dblock(x::Array{T},K::Array{Array{T,2},2}, y::Array{T}, tin::Array{T},shiftX,shiftT,imIdx;doDerivative = 0) where {T<:Number} + ## y = K*x + ## K - 3X3 array of Arrays + ## x - a vector of length |nImgag+2|*cin (zero padded) + ## y - a vector of length |nImgag|*cout + + nImg1 = size(x,1); + nImg2 = size(x,2); + cin = size(x,3); + cout = size(y,2); + OneType = one(T); + t = reshape(tin,nImg1,nImg2,cin); + kernelWidth = size(K,1); + # y = reshape(y,nImg1*nImg2,cout); # it is supposed to be of this shape... + k=1; + jt=0;it=0;jt=0;jx=0; + + for p = 1:2:2*kernelWidth + for q = 1:2:2*kernelWidth + lower = nImg2+shiftT[p+1] # Move outside of the forloop for increased speed + upper = nImg1+shiftT[q+1] # Move outside of the forloop for increased speed + for cc = 1:cin + jx = 1+shiftX[p]; # Moving these outside didn't seem to help + jt = 1+shiftT[p]; + if jt > 1 + ###################### Dirichlet ####################### + @inbounds t[:,1:(jt-1),cc] = zero(T); + ###################### Periodic ####################### + # ix = 1+shiftX[q]; + # if shiftT[q] > 0 + #@inbounds t[1,1,cc] = x[end,end,cc,imIdx]; + # end + # for it = (1+shiftT[q]):upper + #@inbounds t[it,1,cc] = x[ix,end,cc,imIdx]; + # ix +=1; + # end + # if shiftT[q+1] < 0 + #@inbounds t[end,1,cc] = x[1,end,cc,imIdx]; + # end + ###################### End Periodic ####################### + end + while jt <= lower + it = 1+shiftT[q]; + ix = 1+shiftX[q]; + if it > 1 + for ii = 1:(it-1) + ###################### Dirichlet ####################### + @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[1:(it-1),jt,cc] = 0.0 - faster unvectorized + ###################### Periodic ####################### + #@inbounds t[ii,jt,cc] = x[end,jx,cc,imIdx]; + end + end + while it <= upper + @inbounds t[it,jt,cc] = x[ix,jx,cc,imIdx]; + it+=1;ix+=1; + end + if it <= nImg1 + for ii = it:nImg1 + ###################### Dirichlet ####################### + @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[it:nImg1,jt,cc] = 0.0 - faster unvectorized + ###################### Periodic ####################### + # @inbounds t[ii,jt,cc] = x[1,jx,cc,imIdx]; + end + end + jt+=1;jx+=1; + + end + if jt <= nImg2 + ###################### Dirichlet ####################### + @inbounds t[:,jt:nImg2,cc] = zero(T); + ###################### Periodic ####################### + # if shiftT[q] > 0 + # @inbounds t[1,end,cc] = x[end,1,cc,imIdx]; + # end + # ix = ix = 1+shiftX[q]; + # for it = (1+shiftT[q]):upper + # @inbounds t[it,end,cc] = x[ix,1,cc,imIdx]; + # ix +=1; + # end + # if shiftT[q+1] < 0 + # @inbounds t[end,end,cc] = x[1,1,cc,imIdx]; + # end + ###################### End Periodic ####################### + end + end + if doDerivative == 0 + + BLAS.gemm!('N','T',OneType,reshape(t,nImg1*nImg2,cin),K[k],OneType,y); + else + BLAS.gemm!('T','N',OneType,reshape(t,nImg1*nImg2,cin),y,OneType,K[k]); + end + k+=1; + end + end + return y; +end diff --git a/src/layers/doubleSymLayer.jl b/src/layers/doubleSymLayer.jl index c245ce7..7fc41fb 100644 --- a/src/layers/doubleSymLayer.jl +++ b/src/layers/doubleSymLayer.jl @@ -1,215 +1,215 @@ -export DoubleSymLayer,getDoubleSymLayer - -""" - Implementation of symmetric double layer model - - Y(theta,Y0) = K(th1)'(activation( K(th1)\*Y0 + trafo.Bin\*th2))) + trafo.Bout\*th3 -""" -mutable struct DoubleSymLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} - activation :: Function # activation function - K :: TK # Kernel model, e.g., convMod - nLayer :: TN # normalization layer - Bin :: Array{T} # Bias inside the nonlinearity - Bout :: Array{T} # bias outside the nonlinearity -end - - -function getDoubleSymLayer(TYPE::Type,K,nLayer::AbstractMeganetElement{T}, - Bin=zeros(nFeatOut(K),0),Bout=zeros(nFeatIn(K),0), - activation=tanhActivation) where {T <: Number} - BinT = convert.(T, Bin) - BoutT = convert.(T, Bout) - return DoubleSymLayer(activation,K,nLayer,BinT,BoutT); - -end - -function splitWeights(this::DoubleSymLayer{T},theta::Array{T}) where {T<:Number} - - th1 = theta[1:nTheta(this.K)::Int] - cnt = length(th1) - th2 = theta[cnt+(1:size(this.Bin,2))] - cnt = cnt + length(th2) - th3 = theta[cnt+(1:size(this.Bout,2))] - cnt = cnt + length(th3) - - th4 = theta[cnt+1:end]; - - return th1, th2, th3, th4 -end - -function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T<:Number} - - #QZ = [] - tmp = Array{Any}(2) - nex = div(length(Yin),nFeatIn(this))::Int - Y = reshape(Yin,:,nex) - - theta1,theta2,theta3,theta4 = splitWeights(this,theta) - Kop = getOp(this.K,theta1) - KY = Kop*Y - KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY) - Yt = KY - if !isempty(theta2) - Yt .+= this.Bin*theta2 - end - tmp[2] = copy(Yt) - Z::Array{T,2}, = this.activation(Yt,doDerivative) - Z = -(Kop'*Z) - if !isempty(theta3) - Z .+= this.Bout*theta3 - end - return Z, Z, tmp -end - -function nTheta(this::DoubleSymLayer) - return nTheta(this.K) + size(this.Bin,2)+ size(this.Bout,2) + nTheta(this.nLayer) -end - -function nFeatIn(this::DoubleSymLayer) - return nFeatIn(this.K) -end - -function nFeatOut(this::DoubleSymLayer) - return nFeatIn(this.K) -end - -function nDataOut(this::DoubleSymLayer) - return nFeatIn(this) -end - -function initTheta(this::DoubleSymLayer{T}) where {T<:Number} - theta = [vec(initTheta(this.K)); - T(0.1)*ones(T,size(this.Bin,2),1); - T(0.1)*ones(T,size(this.Bout,2),1); - initTheta(this.nLayer)]; - return theta -end - -function Jthetamv(this::DoubleSymLayer{T},dtheta::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - A,dA = this.activation(tmp[2],true) - th1, th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - - Kop = getOp(this.K,th1) - dKop = getOp(this.K,dth1) - dY = dKop*Y - - dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,copy(tmp[1]))[2] - dY = dY .+ this.Bin*dth2 - - dY = -(Kop'*(dA.*dY) + dKop'*A) .+ this.Bout*dth3 - return dY, dY -end - -function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - dA = this.activation(tmp[2],true)[2] - - nex = div(length(dY),nFeatIn(this)) - dY = reshape(dY,:,nex) - Y = reshape(Y,:,nex) - th1, th2,th3,th4 = splitWeights(this,theta) - - Kop = getOp(this.K,th1) - dY = Kop*dY - dY = JYmv(this.nLayer,dY,th4,Kop*Y,copy(tmp[1]))[2] - dZ = -(Kop'*(dA.*dY)) - return dZ, dZ -end - -function Jmv(this::DoubleSymLayer{T},dtheta::Array{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - A,dA = this.activation(copy(tmp[2]),true) - nex = div(length(Y),nFeatIn(this)) - - th1, th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - - Kop = getOp(this.K,th1) - dKop = getOp(this.K,dth1) - if length(dY)>1 - dY = reshape(dY,:,nex) - KdY = Kop*dY - else - KdY = 0 - end - dY = dKop*Y+KdY - dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,tmp[1])[2] - - dY = reshape(dY,:,nex) - if !isempty(dth2) - dY .+= this.Bin*dth2 - end - - dY = -(Kop'*(dA.*dY) + dKop'*A) - if !isempty(dth3) - dth3 .+= this.Bout*dth3 - end - - return dY, dY -end - - -function JthetaTmv(this::DoubleSymLayer{T},Z::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - nex = div(length(Y),nFeatIn(this)) - Z = reshape(Z,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],true) - - dth3 = vec(sum(this.Bout'*Z,2)) - dAZ = dA.*(Kop*Z) - dth2 = vec(sum(this.Bin'*dAZ,2)) - - dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y,tmp[1]) - - dth1 = JthetaTmv(this.K,A,zeros(T,0),Z) - dth1 += JthetaTmv(this.K,dAZ,zeros(T,0),Y) - dtheta = [-vec(dth1); -vec(dth2); vec(dth3); -vec(dth4)] - return dtheta -end - -function JYTmv(this::DoubleSymLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - nex = div(length(Y),nFeatIn(this)) - Z = reshape(Zin,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],true) - - dAZ = dA.*(Kop*Z) - dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y,tmp[1]) - dAZ_out = reshape(dAZ,:,nex) - dY = -(Kop'*dAZ_out) - return dY -end - -function JTmv(this::DoubleSymLayer{T}, Zin::Array{T}, dummy::Array{T}, - theta::Array{T}, Yin::Array{T}, tmp) where {T<:Number} - - nex = div(length(Yin),nFeatIn(this)) - Z = reshape(Zin, :, nex) - Yt = reshape(tmp[2]::Array{T,2},:,nex) - Y = reshape(Yin,:,nex) - th1, th2, th3, th4 = splitWeights(this,theta) - #Kop = getOp(this.K,th1) - A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,true) - - dth3 = vec(sum(this.Bout'*Z,2)) - - KopZ = Amv(this.K, th1, Z) - dAZ1 = dA.*KopZ - - dth2 = vec(sum(this.Bin'*dAZ1,2)) - KopY = Amv(this.K, th1, Y) - dth4, dAZ2 = JTmv(this.nLayer,dAZ1,zeros(T,0),th4,KopY,tmp[1]) - dth1 = JthetaTmv(this.K,dAZ2,zeros(T,0),Y) - dth1 = dth1 + JthetaTmv(this.K,A,(T)[],Z) - dtheta = [-vec(dth1); -vec(dth2); vec(dth3);-vec(dth4)] - - dAZ_out = reshape(dAZ2,:,nex) - KopTdAZ = ATmv(this.K, th1, dAZ_out) - dY = -KopTdAZ - return dtheta, dY -end +export DoubleSymLayer,getDoubleSymLayer + +""" + Implementation of symmetric double layer model + + Y(theta,Y0) = K(th1)'(activation( K(th1)\*Y0 + trafo.Bin\*th2))) + trafo.Bout\*th3 +""" +mutable struct DoubleSymLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} + activation :: Function # activation function + K :: TK # Kernel model, e.g., convMod + nLayer :: TN # normalization layer + Bin :: Array{T} # Bias inside the nonlinearity + Bout :: Array{T} # bias outside the nonlinearity +end + + +function getDoubleSymLayer(TYPE::Type,K,nLayer::AbstractMeganetElement{T}; + Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE, nFeatIn(K),0), + activation=tanhActivation) where {T <: Number} + BinT = convert.(T, Bin) + BoutT = convert.(T, Bout) + return DoubleSymLayer(activation,K,nLayer,BinT,BoutT); + +end + +function splitWeights(this::DoubleSymLayer{T},theta::Array{T}) where {T<:Number} + + th1 = theta[1:nTheta(this.K)::Int] + cnt = length(th1) + th2 = theta[cnt+(1:size(this.Bin,2))] + cnt = cnt + length(th2) + th3 = theta[cnt+(1:size(this.Bout,2))] + cnt = cnt + length(th3) + + th4 = theta[cnt+1:end]; + + return th1, th2, th3, th4 +end + +function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T<:Number} + + #QZ = [] + tmp = Array{Any}(2) + nex = div(length(Yin),nFeatIn(this))::Int + Y = reshape(Yin,:,nex) + + theta1,theta2,theta3,theta4 = splitWeights(this,theta) + Kop = getOp(this.K,theta1) + KY = Kop*Y + KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY) + Yt = KY + if !isempty(theta2) + Yt .+= this.Bin*theta2 + end + tmp[2] = copy(Yt) + Z::Array{T,2}, = this.activation(Yt,doDerivative) + Z = -(Kop'*Z) + if !isempty(theta3) + Z .+= this.Bout*theta3 + end + return Z, Z, tmp +end + +function nTheta(this::DoubleSymLayer) + return nTheta(this.K) + size(this.Bin,2)+ size(this.Bout,2) + nTheta(this.nLayer) +end + +function nFeatIn(this::DoubleSymLayer) + return nFeatIn(this.K) +end + +function nFeatOut(this::DoubleSymLayer) + return nFeatIn(this.K) +end + +function nDataOut(this::DoubleSymLayer) + return nFeatIn(this) +end + +function initTheta(this::DoubleSymLayer{T}) where {T<:Number} + theta = [vec(initTheta(this.K)); + T(0.01)*ones(T,size(this.Bin,2),1); + T(0.01)*ones(T,size(this.Bout,2),1); + initTheta(this.nLayer)]; + return theta +end + +function Jthetamv(this::DoubleSymLayer{T},dtheta::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + A,dA = this.activation(tmp[2],true) + th1, th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + + Kop = getOp(this.K,th1) + dKop = getOp(this.K,dth1) + dY = dKop*Y + + dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,copy(tmp[1]))[2] + dY = dY .+ this.Bin*dth2 + + dY = -(Kop'*(dA.*dY) + dKop'*A) .+ this.Bout*dth3 + return dY, dY +end + +function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + dA = this.activation(tmp[2],true)[2] + + nex = div(length(dY),nFeatIn(this)) + dY = reshape(dY,:,nex) + Y = reshape(Y,:,nex) + th1, th2,th3,th4 = splitWeights(this,theta) + + Kop = getOp(this.K,th1) + dY = Kop*dY + dY = JYmv(this.nLayer,dY,th4,Kop*Y,copy(tmp[1]))[2] + dZ = -(Kop'*(dA.*dY)) + return dZ, dZ +end + +function Jmv(this::DoubleSymLayer{T},dtheta::Array{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + A,dA = this.activation(copy(tmp[2]),true) + nex = div(length(Y),nFeatIn(this)) + + th1, th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + + Kop = getOp(this.K,th1) + dKop = getOp(this.K,dth1) + if length(dY)>1 + dY = reshape(dY,:,nex) + KdY = Kop*dY + else + KdY = 0 + end + dY = dKop*Y+KdY + dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,tmp[1])[2] + + dY = reshape(dY,:,nex) + if !isempty(dth2) + dY .+= this.Bin*dth2 + end + + dY = -(Kop'*(dA.*dY) + dKop'*A) + if !isempty(dth3) + dth3 .+= this.Bout*dth3 + end + + return dY, dY +end + + +function JthetaTmv(this::DoubleSymLayer{T},Z::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + nex = div(length(Y),nFeatIn(this)) + Z = reshape(Z,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + A,dA = this.activation(tmp[2],true) + + dth3 = vec(sum(this.Bout'*Z,2)) + dAZ = dA.*(Kop*Z) + dth2 = vec(sum(this.Bin'*dAZ,2)) + + dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y,tmp[1]) + + dth1 = JthetaTmv(this.K,A,zeros(T,0),Z) + dth1 += JthetaTmv(this.K,dAZ,zeros(T,0),Y) + dtheta = [-vec(dth1); -vec(dth2); vec(dth3); -vec(dth4)] + return dtheta +end + +function JYTmv(this::DoubleSymLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} + + nex = div(length(Y),nFeatIn(this)) + Z = reshape(Zin,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + A,dA = this.activation(tmp[2],true) + + dAZ = dA.*(Kop*Z) + dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y,tmp[1]) + dAZ_out = reshape(dAZ,:,nex) + dY = -(Kop'*dAZ_out) + return dY +end + +function JTmv(this::DoubleSymLayer{T}, Zin::Array{T}, dummy::Array{T}, + theta::Array{T}, Yin::Array{T}, tmp) where {T<:Number} + + nex = div(length(Yin),nFeatIn(this)) + Z = reshape(Zin, :, nex) + Yt = reshape(tmp[2]::Array{T,2},:,nex) + Y = reshape(Yin,:,nex) + th1, th2, th3, th4 = splitWeights(this,theta) + #Kop = getOp(this.K,th1) + A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,true) + + dth3 = vec(sum(this.Bout'*Z,2)) + + KopZ = Amv(this.K, th1, Z) + dAZ1 = dA.*KopZ + + dth2 = vec(sum(this.Bin'*dAZ1,2)) + KopY = Amv(this.K, th1, Y) + dth4, dAZ2 = JTmv(this.nLayer,dAZ1,zeros(T,0),th4,KopY,tmp[1]) + dth1 = JthetaTmv(this.K,dAZ2,zeros(T,0),Y) + dth1 = dth1 + JthetaTmv(this.K,A,(T)[],Z) + dtheta = [-vec(dth1); -vec(dth2); vec(dth3);-vec(dth4)] + + dAZ_out = reshape(dAZ2,:,nex) + KopTdAZ = ATmv(this.K, th1, dAZ_out) + dY = -KopTdAZ + return dtheta, dY +end diff --git a/src/layers/normLayer.jl b/src/layers/normLayer.jl index 8d944f8..f742845 100644 --- a/src/layers/normLayer.jl +++ b/src/layers/normLayer.jl @@ -12,13 +12,18 @@ function getNormLayer(TYPE::Type, nData,doNorm,eps = convert(TYPE,1e-3)) end function getBatchNormLayer(TYPE::Type, nData; eps = convert(TYPE,1e-3),isTrainable::Bool=true) + L = normLayer{TYPE}(nData,3,eps) if isTrainable SL = AffineScalingLayer{TYPE}(nData) - return getNN([L;SL]); + + temp_var = getNN([L;SL]) + return temp_var; else - return L; + temp_var = L + return temp_var; end + end function getTVNormLayer(TYPE::Type,nData;eps = convert(TYPE,1e-3),isTrainable::Bool=true) @@ -33,6 +38,7 @@ end function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T <: Number} + # tic() # first organize Y with channels nf = this.nData[2]::Int nex = div(length(Yin),nFeatIn(this))::Int @@ -48,7 +54,7 @@ function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=t Yout ./= S2 Yout2 = reshape(Yout,:,nex) - + # println("Measuring time for batch norm ",toc()) return Yout2, Yout2, dA end diff --git a/src/layers/singleLayer.jl b/src/layers/singleLayer.jl index 3b6b24f..d7af01f 100644 --- a/src/layers/singleLayer.jl +++ b/src/layers/singleLayer.jl @@ -1,158 +1,158 @@ -export singleLayer,getSingleLayer - -mutable struct singleLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} - activation :: Function # activation function - K :: TK # transformation type - nLayer :: TN # normalization layer - Bin :: Array{T} # bias inside nonlinearity - Bout :: Array{T} # bias outside nonlinearity - -end - -function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0),activation=tanhActivation) - singleLayer(activation,K,nLayer,Bin,Bout); -end - - -function splitWeights(this::singleLayer{T},theta::Array{T}) where {T <: Number} - th1 = theta[1:nTheta(this.K)] - cnt = length(th1) - th2 = theta[cnt+(1:size(this.Bin,2))] - cnt += length(th2) - th3 = theta[cnt+(1:size(this.Bout,2))] - cnt += length(th3) - th4 = theta[cnt+(1:nTheta(this.nLayer))] - cnt += length(th4) - if cnt!=length(theta); error("splitWeights: length does not match"); end - return th1, th2, th3, th4 -end - -function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},doDerivative=false) where {T <: Number} - tmp = Array{Any}(2) - nex = div(length(Yin),nFeatIn(this)) - Y = reshape(Yin,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - - Yout::Array{T,2} = getOp(this.K,th1)*Y - Yout .+= this.Bin * th2 - Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,doDerivative) - Yout,tmp[2] = this.activation(Yout,doDerivative) - Yout .+= this.Bout*th3 - Ydata = Yout - return Ydata, Yout, tmp -end - -function nTheta(this::singleLayer) - return nTheta(this.K)+size(this.Bin,2) + size(this.Bout,2) + nTheta(this.nLayer) -end - -function nFeatIn(this::singleLayer) - return nFeatIn(this.K) -end - -function nFeatOut(this::singleLayer) - return nFeatOut(this.K) -end - -function nDataOut(this::singleLayer) - return nFeatOut(this.K) -end - -function initTheta(this::singleLayer{T}) where {T <: Number} - return [vec(initTheta(this.K)); convert(T,0.1)*ones(T,size(this.Bin,2),1) ; convert(T,0.1)*ones(T,size(this.Bout,2),1); initTheta(this.nLayer) ] -end - - -function Jthetamv(this::singleLayer{T},dtheta::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Yin),nFeatIn(this)) - Y = reshape(Yin,:,nex) - - th1,th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - dZ::Array{T,2} = Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 - Kop = getOp(this.K,th1) - dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] - dZ .*= dA - dZ .+= this.Bout*dth3 - return dZ, dZ -end - -function JYmv(this::singleLayer{T},dYin::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA = tmp[2] - nex = div(length(dYin),nFeatIn(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - dY = reshape(dYin,:,nex) - dZ = Kop*dY - dZ = JYmv(this.nLayer,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] - # dZ = dA.*dZ - dZ .*= dA - return dZ,dZ -end - -function Jmv(this::singleLayer{T},dtheta::Array{T},dYin::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Yin),nFeatIn(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - - dY = reshape(dYin,:,nex); - Kop = getOp(this.K,th1) - dZ::Array{T, 2} = Kop*dY; - - Y = reshape(Yin,:,nex); - dZ += Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 - dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] - - dZ .*= dA - dZ .+= this.Bout*dth3 - return dZ,dZ -end - -function JTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Y),nFeatIn(this)) - Z = reshape(Zin,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - - dth3 = vec(sum(this.Bout'*Z,2)) - dAZ = dA.*Z - dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y.+this.Bin*th2,tmp[1]) # this not type stable - dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)) - dth1 = JthetaTmv(this.K, dAZ,theta,Y) # this not type stable - - dY = Kop'*reshape(dAZ,:,nex) - dtheta = [vec(dth1); vec(dth2); vec(dth3); vec(dth4)] - - return dtheta, dY - -end - -function JthetaTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA = tmp[2] - nex = div(length(Zin),nFeatOut(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - - Z = reshape(Zin,:,nex); - dAZ = dA.*Z; - dth3 = vec(sum(this.Bout'*Z,2)); - Kop = getOp(this.K,th1) - dth4,dAZ = JTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) - dth1 = JthetaTmv(this.K,dAZ,theta,Y); - dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)); - return [vec(dth1); vec(dth2); vec(dth3); vec(dth4)]; -end - -function JYTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Y),nFeatIn(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - Z = reshape(Zin,:,nex) - dAZ::Array{T,2} = dA.*Z - dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) - ret::Array{T,2} = Kop'*reshape(dAZ,:,nex) - return ret #TODO: @lars or eldad rename this variable as I'm not sure what to call it -end +export singleLayer,getSingleLayer + +mutable struct singleLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} + activation :: Function # activation function + K :: TK # transformation type + nLayer :: TN # normalization layer + Bin :: Array{T} # bias inside nonlinearity + Bout :: Array{T} # bias outside nonlinearity + +end + +function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0),activation=tanhActivation) + singleLayer(activation,K,nLayer,Bin,Bout); +end + + +function splitWeights(this::singleLayer{T},theta::Array{T}) where {T <: Number} + th1 = theta[1:nTheta(this.K)] + cnt = length(th1) + th2 = theta[cnt+(1:size(this.Bin,2))] + cnt += length(th2) + th3 = theta[cnt+(1:size(this.Bout,2))] + cnt += length(th3) + th4 = theta[cnt+(1:nTheta(this.nLayer))] + cnt += length(th4) + if cnt!=length(theta); error("splitWeights: length does not match"); end + return th1, th2, th3, th4 +end + +function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},doDerivative=false) where {T <: Number} + tmp = Array{Any}(2) + nex = div(length(Yin),nFeatIn(this)) + Y = reshape(Yin,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + + Yout::Array{T,2} = getOp(this.K,th1)*Y + Yout .+= this.Bin * th2 + Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,doDerivative) + Yout,tmp[2] = this.activation(Yout,doDerivative) + Yout .+= this.Bout*th3 + Ydata = Yout + return Ydata, Yout, tmp +end + +function nTheta(this::singleLayer) + return nTheta(this.K)+size(this.Bin,2) + size(this.Bout,2) + nTheta(this.nLayer) +end + +function nFeatIn(this::singleLayer) + return nFeatIn(this.K) +end + +function nFeatOut(this::singleLayer) + return nFeatOut(this.K) +end + +function nDataOut(this::singleLayer) + return nFeatOut(this.K) +end + +function initTheta(this::singleLayer{T}) where {T <: Number} + return [vec(initTheta(this.K)); convert(T,0.01)*ones(T,size(this.Bin,2),1) ; convert(T,0.01)*ones(T,size(this.Bout,2),1); initTheta(this.nLayer) ] +end + + +function Jthetamv(this::singleLayer{T},dtheta::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Yin),nFeatIn(this)) + Y = reshape(Yin,:,nex) + + th1,th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + dZ::Array{T,2} = Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 + Kop = getOp(this.K,th1) + dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] + dZ .*= dA + dZ .+= this.Bout*dth3 + return dZ, dZ +end + +function JYmv(this::singleLayer{T},dYin::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA = tmp[2] + nex = div(length(dYin),nFeatIn(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + dY = reshape(dYin,:,nex) + dZ = Kop*dY + dZ = JYmv(this.nLayer,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] + # dZ = dA.*dZ + dZ .*= dA + return dZ,dZ +end + +function Jmv(this::singleLayer{T},dtheta::Array{T},dYin::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Yin),nFeatIn(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) + + dY = reshape(dYin,:,nex); + Kop = getOp(this.K,th1) + dZ::Array{T, 2} = Kop*dY; + + Y = reshape(Yin,:,nex); + dZ += Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 + dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] + + dZ .*= dA + dZ .+= this.Bout*dth3 + return dZ,dZ +end + +function JTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Y),nFeatIn(this)) + Z = reshape(Zin,:,nex) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + + dth3 = vec(sum(this.Bout'*Z,2)) + dAZ = dA.*Z + dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y.+this.Bin*th2,tmp[1]) # this not type stable + dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)) + dth1 = JthetaTmv(this.K, dAZ,theta,Y) # this not type stable + + dY = Kop'*reshape(dAZ,:,nex) + dtheta = [vec(dth1); vec(dth2); vec(dth3); vec(dth4)] + + return dtheta, dY + +end + +function JthetaTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA = tmp[2] + nex = div(length(Zin),nFeatOut(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + + Z = reshape(Zin,:,nex); + dAZ = dA.*Z; + dth3 = vec(sum(this.Bout'*Z,2)); + Kop = getOp(this.K,th1) + dth4,dAZ = JTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) + dth1 = JthetaTmv(this.K,dAZ,theta,Y); + dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)); + return [vec(dth1); vec(dth2); vec(dth3); vec(dth4)]; +end + +function JYTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} + dA::Array{T,2} = tmp[2] + nex = div(length(Y),nFeatIn(this)) + th1,th2,th3,th4 = splitWeights(this,theta) + Kop = getOp(this.K,th1) + Z = reshape(Zin,:,nex) + dAZ::Array{T,2} = dA.*Z + dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) + ret::Array{T,2} = Kop'*reshape(dAZ,:,nex) + return ret #TODO: @lars or eldad rename this variable as I'm not sure what to call it +end diff --git a/src/optimization/sgd.jl b/src/optimization/sgd.jl index a2feab5..a74de3a 100644 --- a/src/optimization/sgd.jl +++ b/src/optimization/sgd.jl @@ -26,7 +26,7 @@ end Base.display(this::SGD)=println("SGD(maxEpochs=$(this.maxEpochs),miniBatch=$(this.miniBatch),learningRate=$(this.learningRate),momentum=$(this.momentum),nesterov=$(this.nesterov),ADAM=$(this.ADAM))") function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array{T},Yv::Array{T},Cv::Array{T}) where {T} - + # evaluate training and validation epoch = 1; xOld = copy(xc); @@ -65,11 +65,19 @@ function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array dJ = lr*dJk + this.momentum*dJ end xc = xc - dJ + # xc = xc - zero(T)*dJ + # ss = randn(T,length(dJ))*T(1e-3) + # J1, = evalObjFctn(objFun,xc,Y,C) + # J2, = evalObjFctn(objFun,xc+ss,Y,C) + # println(abs(J1-J2)," ", abs(J1-J2-dot(dJk[:],ss[:]))) end # we sample 2^12 images from the training set for displaying the objective. idt = ids[1:min(nex,2^12)] Jc,para = evalObjFctn(objFun,xc,Y[:,idt],C[:,idt]); Jval,pVal = getMisfit(objFun,xc,Yv,Cv,false); + + + if this.out; @printf "%d\t%1.2e\t%1.2f\t%1.2e\t%1.2e\t%1.2f\n" epoch Jc 100*(1-para[3]/para[2]) norm(xOld-xc) Jval 100*(1-pVal[3]/pVal[2]) diff --git a/test/kernel/convFFTKernelTest.jl b/test/kernel/convFFTKernelTest.jl index c9afa3e..f6a8eb0 100644 --- a/test/kernel/convFFTKernelTest.jl +++ b/test/kernel/convFFTKernelTest.jl @@ -1,35 +1,37 @@ -using Base.Test -using Meganet -using LinearOperators - - -nImg = [8,10] -sK = [3,3,4,4] -for TYPE=[Float64,Float32] - K = getConvFFTKernel(TYPE,nImg,sK) - - @testset "adjoint test $TYPE" begin - theta = initTheta(K) - A = getOp(K,theta); - v = randn(TYPE,nFeatIn(K)) - w = randn(TYPE,nFeatOut(K)) - - t1 = dot(w,A*v) - t2 = dot(v,A'*w) - # println("adjointTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) - end - - @testset "derivative Test" begin - th = initTheta(K); - dth = initTheta(K); - nex = 1; - Y = randn(TYPE,nFeatIn(K),nex)+nex; - Z = randn(TYPE,nFeatOut(K),nex)-nex; - - t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); - t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); - # println("derivativeTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) - end -end +using Base.Test +using Meganet +using LinearOperators + + +nImg = [8,10] +sK = [3,3,4,4] +for TYPE=[Float64,Float32] + K = getConvFFTKernel(TYPE,nImg,sK) + + @testset "adjoint test $TYPE" begin + nex = 2; + theta = initTheta(K) + A = getOp(K,theta); + v = randn(TYPE,nFeatIn(K),nex) + w = randn(TYPE,nFeatOut(K),nex) + + t1 = vecdot(w,A*v) + t2 = vecdot(v,A'*w) + + # println("adjointTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) + end + + @testset "derivative Test" begin + th = initTheta(K); + dth = initTheta(K); + nex = 2; + Y = randn(TYPE,nFeatIn(K),nex)+nex; + Z = randn(TYPE,nFeatOut(K),nex)-nex; + + t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); + t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); + # println("derivativeTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) + end +end diff --git a/test/kernel/convGEMMKernelTest.jl b/test/kernel/convGEMMKernelTest.jl index 8b16941..f70bfd6 100644 --- a/test/kernel/convGEMMKernelTest.jl +++ b/test/kernel/convGEMMKernelTest.jl @@ -1,55 +1,40 @@ -using Base.Test -using Meganet -using LinearOperators - - -nImg = [8,10] -sK = [3,3,4,4] -for TYPE=[Float64,Float32] - K = getConvGEMMKernel(TYPE,nImg,sK) - - @testset "adjoint test $TYPE" begin - theta = initTheta(K) - A = getOp(K,theta); - v = randn(TYPE,nFeatIn(K)) - w = randn(TYPE,nFeatOut(K)) - - t1 = dot(w,A*v) - t2 = dot(v,A'*w) - # println("adjointTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) - end - - @testset "derivative Test" begin - th = initTheta(K); - dth = initTheta(K); - nex = 1; - Y = randn(TYPE,nFeatIn(K),nex)+nex; - Z = randn(TYPE,nFeatOut(K),nex)-nex; - - t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); - t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); - # println("derivativeTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) - end - - @testset "new derivitive test" begin - nImage = [16,16]; - sK = [3,3,2,4]; - K = randn(TYPE,tuple(sK...)); - Y = randn(TYPE,nImage[1],nImage[2],sK[3],2); - Z = randn(TYPE,nImage[1],nImage[2],sK[4],2); - Kernel2 = getConvGEMMKernel(TYPE,nImage,sK); - AY = Amv(Kernel2,K,Y); - ATZ = ATmv(Kernel2,K,Z); - - v1 = vecdot(Z,AY); - v2 = vecdot(ATZ,Y); - - v3 = vecdot(Z,Jthetamv(Kernel2,K,(TYPE)[],Y)); - v4 = vecdot(K,JthetaTmv(Kernel2,Z,(TYPE)[],Y)); - @test norm(v1-v2)/norm(v2) < 1e3*eps(TYPE) && - norm(v2-v3)/norm(v3) < 1e3*eps(TYPE) && - norm(v3-v4)/norm(v4) < 1e3*eps(TYPE) - end -end +using Base.Test +using Meganet +using LinearOperators + + +nImg = [8,10] +sK = [3,3,2,4] +for TYPE=[Float64,Float32] + K = getConvGEMMKernel(TYPE,nImg,sK) + + @testset "adjoint test $TYPE" begin + nex = 2; + theta = initTheta(K) + A = getOp(K,theta); + v = randn(TYPE,nFeatIn(K),nex) + w = randn(TYPE,nFeatOut(K),nex) + + t1 = dot(w,A*v) + t2 = dot(v,A'*w) + # println("adjointTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) + end + + @testset "derivative Test" begin + th = initTheta(K); + dth = initTheta(K); + nex = 2; + Y = randn(TYPE,nFeatIn(K),nex); + Z = randn(TYPE,nFeatOut(K),nex); + + t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); + t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); + # println("derivativeTest t1=$t1\t t2=$t2") + @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) + end +end + + + + diff --git a/test/layer/doubleSymLayerTest.jl b/test/layer/doubleSymLayerTest.jl index 13a8f1c..ac5c964 100644 --- a/test/layer/doubleSymLayerTest.jl +++ b/test/layer/doubleSymLayerTest.jl @@ -1,61 +1,61 @@ -using Base.Test -using Meganet - -for TYPE=[Float64,Float32] - K = getDenseKernel(TYPE,[32,18]) - nex = 8 - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getTVNormLayer(TYPE,[8,4]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin,Bout) - @testset "doubleSymLayer (dense/TV) $TYPE" begin - testAbstractMeganetElement(L) - end - - K = getDenseKernel(TYPE,[32,18]) - nex = 8 - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getBatchNormLayer(TYPE,[8,4]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin,Bout) - @testset "doubleSymLayer (dense/Batch) $TYPE" begin - testAbstractMeganetElement(L) - end - - nImg = [32 32] - nc = 16 - nex = 50 - K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc],isTrainable=false) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin,Bout) - @testset "doubleSymLayer (conv/Batch/not trainable) $TYPE" begin - testAbstractMeganetElement(L,nex=nex) - end - - - nImg = [8 4] - nc = 3 - nex = 4 - K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin,Bout) - @testset "doubleSymLayer (conv/Batch) $TYPE" begin - testAbstractMeganetElement(L,nex=nex) - end - - nImg = [16 8] - nc = 6 - nex = 8 - K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getTVNormLayer(TYPE,[prod(nImg),nc]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin,Bout) - @testset "doubleSymLayer (conv/TV) $TYPE" begin - testAbstractMeganetElement(L) - end -end +using Base.Test +using Meganet + +for TYPE=[Float64,Float32] + K = getDenseKernel(TYPE,[32,18]) + nex = 8 + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getTVNormLayer(TYPE,[8,4]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (dense/TV) $TYPE" begin + testAbstractMeganetElement(L) + end + + K = getDenseKernel(TYPE,[32,18]) + nex = 8 + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getBatchNormLayer(TYPE,[8,4]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (dense/Batch) $TYPE" begin + testAbstractMeganetElement(L) + end + + nImg = [32 32] + nc = 16 + nex = 50 + K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc],isTrainable=false) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (conv/Batch/not trainable) $TYPE" begin + testAbstractMeganetElement(L,nex=nex) + end + + + nImg = [8 4] + nc = 3 + nex = 4 + K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (conv/Batch) $TYPE" begin + testAbstractMeganetElement(L,nex=nex) + end + + nImg = [16 8] + nc = 6 + nex = 8 + K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) + Bin = randn(TYPE,nFeatOut(K),4) + Bout = randn(TYPE,nFeatIn(K),3) + nLayer = getTVNormLayer(TYPE,[prod(nImg),nc]) + L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) + @testset "doubleSymLayer (conv/TV) $TYPE" begin + testAbstractMeganetElement(L) + end +end From abec9cacbbbc48bf204e5bb898b33b8c72219d04 Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Tue, 13 Feb 2018 18:01:02 -0800 Subject: [PATCH 05/22] "Eran's updated codes" --- regemiii/EResNN_CIFAR10.jl | 102 --------------- regemiii/Meganet.jl | 49 ------- regemiii/abstractConvKernel.jl | 50 -------- regemiii/convFFTKernel.jl | 126 ------------------ regemiii/convFFTKernelTest.jl | 37 ------ regemiii/convGEMMKernel.jl | 227 --------------------------------- regemiii/convGEMMKernelTest.jl | 40 ------ regemiii/doubleSymLayer.jl | 215 ------------------------------- regemiii/doubleSymLayerTest.jl | 61 --------- regemiii/reluActivation.jl | 44 ------- regemiii/singleLayer.jl | 158 ----------------------- 11 files changed, 1109 deletions(-) delete mode 100644 regemiii/EResNN_CIFAR10.jl delete mode 100644 regemiii/Meganet.jl delete mode 100644 regemiii/abstractConvKernel.jl delete mode 100644 regemiii/convFFTKernel.jl delete mode 100644 regemiii/convFFTKernelTest.jl delete mode 100644 regemiii/convGEMMKernel.jl delete mode 100644 regemiii/convGEMMKernelTest.jl delete mode 100644 regemiii/doubleSymLayer.jl delete mode 100644 regemiii/doubleSymLayerTest.jl delete mode 100644 regemiii/reluActivation.jl delete mode 100644 regemiii/singleLayer.jl diff --git a/regemiii/EResNN_CIFAR10.jl b/regemiii/EResNN_CIFAR10.jl deleted file mode 100644 index e9b60ba..0000000 --- a/regemiii/EResNN_CIFAR10.jl +++ /dev/null @@ -1,102 +0,0 @@ -using MAT, Meganet - -n = 256; -# Y_train,C_train,Y_test,C_test = getCIFAR10(n,Pkg.dir("Meganet")*"/data/CIFAR10/"); -Y_train,C_train,Y_test,C_test = getCIFAR10(n,"../data/CIFAR10/"); -# using PyPlot -# y = Y_train[:,50]; y = y - minimum(y); y = y./maximum(y); -# y = reshape(y,32,32,3); -# y[:,:,1] = y[:,:,1]';y[:,:,2] = y[:,:,2]';y[:,:,3] = y[:,:,3]'; -# figure(); imshow(y) - -miniBatchSize = 32; -nImg = [32; 32] -cin = 3 -nc = [16;32;64;64] -nt = 2*[1;1;1] -h = [1.;1.;1.] - -TYPE = Float32; -act = reluActivation; -getConvKernel = (nImg,sK) -> getConvGEMMKernel(TYPE,nImg,sK); -# getConvKernel = (nImg,sK) -> getConvFFTKernel(TYPE,nImg,sK); -#getConvKernel = (nImg,sK) -> getSparseConvKernel2D(TYPE,nImg,sK); - -# opening layer -K1 = getConvKernel(nImg,[3,3,cin,nc[1]]); - -nL = getBatchNormLayer(TYPE,[prod(nImg);nc[1]],isTrainable=true); -Bin = kron(eye(TYPE,nc[1]),ones(TYPE,prod(nImg),1)); -blocks = [getSingleLayer(TYPE,K1,nL,Bin = Bin,activation = act)] - -for k=1:length(nt) - # ResNN layers - K2 = getConvKernel(nImg,[3,3,nc[k],nc[k]]) - nL = getBatchNormLayer(TYPE,[prod(nImg);nc[k]],isTrainable=true) - Bin = kron(eye(TYPE,nc[k]),ones(TYPE,prod(nImg),1)); - Bout = zeros(TYPE, nFeatIn(K2),0); - - L2 = getDoubleSymLayer(TYPE,K2,nL,Bin=Bin,activation=act) - RN = getResNN(TYPE,L2,nt[k],h[k]) - - if k (24, 500))) -# end diff --git a/regemiii/Meganet.jl b/regemiii/Meganet.jl deleted file mode 100644 index a38a01d..0000000 --- a/regemiii/Meganet.jl +++ /dev/null @@ -1,49 +0,0 @@ -module Meganet - -using LinearOperators, MAT, Base.Test - -import JLD, BenchmarkTools - -include("AbstractMeganetElement.jl") - -include("activations/tanhActivation.jl") -include("activations/reluActivation.jl") -include("activations/identityActivation.jl") - - -include("integrators/NN.jl") -include("integrators/connector.jl") -include("integrators/ResNN.jl") -include("kernelTypes/abstractConvKernel.jl"); -include("kernelTypes/denseKernel.jl") -include("kernelTypes/sparseKernel.jl") -include("kernelTypes/convFFTKernel.jl"); -include("kernelTypes/convGEMMKernel.jl"); -include("kernelTypes/convCircKernel.jl"); -include("kernelTypes/convDiagKernel.jl"); - - - -include("layers/affineScalingLayer.jl") -include("layers/normLayer.jl") -include("layers/doubleSymLayer.jl") -include("layers/singleLayer.jl") - -include("loss/softMaxLoss.jl") -include("regularization/TikhonovReg.jl") - -include("optimization/dnnBatchObjFctn.jl") -include("optimization/sgd.jl") - -include("utils/getConvMatPeriodic.jl") -include("utils/testAbstractMeganetElement.jl") -include("utils/testLossFunction.jl") -include("utils/utilities.jl"); -include("utils/checkDerivative.jl"); -include("utils/normalizeData.jl"); - -include("utils/getCIFAR10.jl"); -include("utils/Benchmark.jl"); - - -end diff --git a/regemiii/abstractConvKernel.jl b/regemiii/abstractConvKernel.jl deleted file mode 100644 index 6e49529..0000000 --- a/regemiii/abstractConvKernel.jl +++ /dev/null @@ -1,50 +0,0 @@ -export nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta, AbstractConvKernel - -abstract type AbstractConvKernel{T} <: AbstractMeganetElement{T} end - -## All convKernel types are assumed to have fields nImage (size of the image) and sK (size of the Convolution Kernel) - -function nImgIn(this::AbstractConvKernel) - return [this.nImg[1]; this.nImg[2]; this.sK[3]] -end - -function nImgOut(this::AbstractConvKernel) - return [this.nImg[1]; this.nImg[2]; this.sK[4]] -end - -function nFeatIn(this::AbstractConvKernel) - return prod(nImgIn(this)); -end -function nFeatOut(this::AbstractConvKernel) - return prod(nImgOut(this)); -end - -function nTheta(this::AbstractConvKernel) - return prod(this.sK); -end - - -function getOp(this::AbstractConvKernel{T},theta::Array{T}) where {T <: Number} - - m = prod(nImgOut(this)) - n = prod(nImgIn(this)) - - A = LinearOperator{T}(m,n,false,false, - v -> Amv(this,theta,v), - Nullable{Function}(), - w -> ATmv(this,theta,w)) - return A -end - -function initTheta(this::AbstractConvKernel{T}) where {T <: Number} - - sd = T(0.01); - theta = sd*randn(T,prod(this.sK)); - #id1 = find(theta>2*sd); - #theta(id1[:]) = randn(numel(id1),1); - - #id2 = find(theta< -2*sd); - #theta(id2(:)) = randn(numel(id2),1); - #theta = max(min(2*sd, theta),-2*sd); - return theta -end diff --git a/regemiii/convFFTKernel.jl b/regemiii/convFFTKernel.jl deleted file mode 100644 index 97f02d0..0000000 --- a/regemiii/convFFTKernel.jl +++ /dev/null @@ -1,126 +0,0 @@ -export convFFTKernel, getEigs,getConvFFTKernel -## For the functions nImgIn, nImgOut, nFeatIn, nFeatOut, nTheta, getOp, initTheta : see AbstractConvKernel.jl -## All convKernel types are assumed to have fields nImage and sK -mutable struct convFFTKernel{T} <: AbstractConvKernel{T} - nImg :: Array{Int,1} - sK :: Array{Int,1} - S :: Array{Complex{T},2} -end - -function getConvFFTKernel(TYPE::Type,nImg,sK) - S = getEigs(Complex{TYPE},nImg,sK) - return convFFTKernel{TYPE}(nImg,sK,S) -end - -function getEigs(TYPE,nImg,sK) - S = zeros(TYPE,prod(nImg),prod(sK[1:2])); - for k=1:prod(sK[1:2]) - Kk = zeros(sK[1],sK[2]); - Kk[k] = 1; - Ak = getConvMatPeriodic(TYPE,Kk,[nImg[1],nImg[2], 1]); - Akk = full(convert(Array{TYPE},Ak[:,1])); - S[:,k] = vec(fft2(reshape(Akk,nImg[1],nImg[2]) )); - end - return S -end - -export Amv -function Amv(this::convFFTKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} - - nex = div(numel(Y),prod(nImgIn(this))) - - # compute convolution - AY = zeros(Complex{T},tuple([nImgOut(this); nex]...)); - theta = reshape(theta, tuple([prod(this.sK[1:2]); this.sK[3:4]]...)); - Yh = ifft2(reshape(Y,tuple([nImgIn(this); nex]...))); - #### allocate stuff for the loop - Sk = zeros(Complex{T},tuple(nImgOut(this)...)) - #T = zeros(Complex{eltype(Y)},tuple(nImgOut(this)...)) - nn = nImgOut(this); nn[3] = 1; - sumT = zeros(Complex{T},tuple([nn;nex]...)) - #### - - for k=1:this.sK[4] - Sk = reshape(this.S*theta[:,:,k],tuple(nImgIn(this)...)); - #T = Sk .* Yh; - #sumT = sum(T,3) - sumT = hadamardSum(sumT,Yh,Sk) - AY[:,:,k,:] = sumT[:,:,1,:]; - end - AY = real(fft2(AY)); - Y = reshape(AY,:,nex); - return Y -end - -function ATmv(this::convFFTKernel{T},theta::Array{T},Z::Array{T}) where {T<:Number} - - nex = div(numel(Z),prod(nImgOut(this))); - ATY = zeros(Complex{T},tuple([nImgIn(this); nex]...)); - theta = reshape(theta, prod(this.sK[1:2]),this.sK[3],this.sK[4]); - #### allocate stuff for the loop - Sk = zeros(Complex{T},tuple(nImgOut(this)...)) - #T = zeros(Complex{eltype(Z)},tuple(nImgOut(this)...)) - nn = nImgOut(this); nn[3] = 1; - sumT = zeros(Complex{T},tuple([nn;nex]...)) - #### - - Yh = fft2(reshape(Z,tuple([nImgOut(this); nex]...))); - for k=1:this.sK[3] - tk = theta[:,k,:] - #if size(this.S,2) == 1 - # tk = reshape(tk,1,:); - #end - Sk = reshape(this.S*tk,tuple(nImgOut(this)...)); - #T = Sk.*Yh; - #sumT = sum(T,3) - sumT = hadamardSum(sumT,Yh,Sk) - ATY[:,:,k,:] = sumT[:,:,1,:]; - end - ATY = real(ifft2(ATY)); - ATY = reshape(ATY,:,nex); - return ATY -end - -function Jthetamv(this::convFFTKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} - - nex = div(numel(Y),nFeatIn(this)); - Y = reshape(Y,:,nex); - Z = Amv(this,dtheta,Y); - return Z -end - -function JthetaTmv(this::convFFTKernel{T},Z::Array{T},dummy::Array{T},Y::Array{T}) where {T<:Number} - # derivative of Z*(A(theta)*Y) w.r.t. theta - - nex = div(numel(Y),nFeatIn(this)); - - dth1 = zeros(this.sK[1]*this.sK[2],this.sK[3],this.sK[4]); - Y = permutedims(reshape(Y,tuple([nImgIn(this); nex ]...)),[1 2 4 3]); - Yh = reshape(fft2(Y),prod(this.nImg[1:2]),nex*this.sK[3]); - Zh = permutedims(ifft2(reshape(Z,tuple([nImgOut(this); nex]...))),[1 2 4 3]); - Zh = reshape(Zh,:, this.sK[4]); - - for k=1:prod(this.sK[1:2]) - temp = conj(this.S[:,k]) .* Yh - temp = reshape(temp,:,this.sK[3]) - dth1[k,:,:] = real(conj(temp)'*Zh); - end - - dtheta = reshape(dth1,tuple(this.sK...)); - return dtheta -end - -function hadamardSum(sumT::Array{T},Yh::Array{T},Sk::Array{T}) where {T<:Number} - sumT .= 0.0; - for i4 = 1:size(Yh,4) - for i3 = 1:size(Yh,3) - for i2 = 1:size(Yh,2) - for i1 = 1:size(Yh,1) - @inbounds tt = Sk[i1,i2,i3] - @inbounds sumT[i1,i2,1,i4] += tt * Yh[i1,i2,i3,i4] - end - end - end - end - return sumT -end diff --git a/regemiii/convFFTKernelTest.jl b/regemiii/convFFTKernelTest.jl deleted file mode 100644 index f6a8eb0..0000000 --- a/regemiii/convFFTKernelTest.jl +++ /dev/null @@ -1,37 +0,0 @@ -using Base.Test -using Meganet -using LinearOperators - - -nImg = [8,10] -sK = [3,3,4,4] -for TYPE=[Float64,Float32] - K = getConvFFTKernel(TYPE,nImg,sK) - - @testset "adjoint test $TYPE" begin - nex = 2; - theta = initTheta(K) - A = getOp(K,theta); - v = randn(TYPE,nFeatIn(K),nex) - w = randn(TYPE,nFeatOut(K),nex) - - t1 = vecdot(w,A*v) - t2 = vecdot(v,A'*w) - - # println("adjointTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) - end - - @testset "derivative Test" begin - th = initTheta(K); - dth = initTheta(K); - nex = 2; - Y = randn(TYPE,nFeatIn(K),nex)+nex; - Z = randn(TYPE,nFeatOut(K),nex)-nex; - - t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); - t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); - # println("derivativeTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) - end -end diff --git a/regemiii/convGEMMKernel.jl b/regemiii/convGEMMKernel.jl deleted file mode 100644 index 24bcda7..0000000 --- a/regemiii/convGEMMKernel.jl +++ /dev/null @@ -1,227 +0,0 @@ -export convGEMMKernel,Amv,ATmv,transposeTest,getConvGEMMKernel - -mutable struct convGEMMKernel{T} <: AbstractConvKernel{T} - nImg :: Array{Int,1} - sK :: Array{Int,1} - shiftX :: Array{Int,1} - shiftT :: Array{Int,1} - aux_sk3 :: Array{T, 3} - aux_sk4 :: Array{T, 3} -end -function getConvGEMMKernel(TYPE::Type,nImg,sK) - - if sK[1] == 1 && sK[2] == 1 - shiftX = [0;0]; - shiftT = [0;0]; - elseif sK[1] == 3 && sK[2] == 3 - shiftX = [0;-1;0;0;1;0]; - shiftT = [1;0;0;0;0;-1]; - else - error("Code only supports 1X1 and 3X3 convolutions"); - end - - aux_sk3 = zeros(TYPE,nImg[1],nImg[2],sK[3]); - aux_sk4 = zeros(TYPE,nImg[1],nImg[2],sK[4]); - - return convGEMMKernel{TYPE}(copy(nImg),copy(sK),shiftX,shiftT,aux_sk3,aux_sk4); -end - -function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} - ## We assume that the data Y is held in the order XYCN. - sK = this.sK; - nImg = this.nImg; - nex = div(numel(Y),prod(nImgIn(this))) - # compute convolution - Y = reshape(Y,nImg[1],nImg[2],this.sK[3],nex); - AY = Array{T, 3}(nImg[1]*nImg[2],this.sK[4],nex); - aux = this.aux_sk3; - AYk = reshape(this.aux_sk4,nImg[1]*nImg[2],sK[4]); - ### reshape the kernels for gemm!: - K = reshape(theta, sK[1], sK[2], sK[3], sK[4]) - KK = Array{Array{T,2}}(sK[1],sK[2]); - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds KK[k1,k2] = K[k1,k2,:,:]'; - end - end - for k = 1:nex - AYk[:] = zero(T) - AYk = multConv2Dblock(Y,KK, AYk,aux,this.shiftX,this.shiftT,k); - @inbounds AY[:,:,k] = AYk; - - end - AY_out = reshape(AY,:,nex); - return AY_out -end - -function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:Number} - nImg = this.nImg; - sK = this.sK; - nex = div(numel(Zin),prod(nImgOut(this))); - K = reshape(theta, sK[1], sK[2], sK[3], sK[4]); - Z = reshape(Zin,nImg[1],nImg[2],sK[4],nex); - aux = this.aux_sk4; - ATZ = zeros(T,nImg[1]*nImg[2],sK[3],nex); - ATZk = reshape(this.aux_sk3,nImg[1]*nImg[2],sK[3]); - - ### reshape the kernels for gemm!: - KK = Array{Array{T,2}}(sK[1],sK[2]); - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds KK[k1,k2] = K[k1,k2,:,:]; - end - end - ## flipping: - KK = flipdim(flipdim(KK,2),1); - for k = 1:nex - ATZk[:] = zero(T) - ATZk = multConv2Dblock(Z,KK, ATZk,aux,this.shiftX,this.shiftT,k); - @inbounds ATZ[:,:,k] = ATZk; - end - ATZ_out = reshape(ATZ,:,nex); - return ATZ_out -end - -function Jthetamv(this::convGEMMKernel{T},dtheta::Array{T},dummy::Array{T},Y::Array{T},temp=nothing) where {T<:Number} - nex = div(numel(Y),nFeatIn(this)); - Z = Amv(this,dtheta,Y); - return Z -end - -function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin::Array{T}) where {T<:Number} - # derivative of Z*(A(theta)*Y) w.r.t. theta - sK = this.sK - nImg = this.nImg - nex = div(numel(Yin),prod(nImgIn(this))) - # compute convolution - Y = reshape(Yin, nImg[1], nImg[2], this.sK[3], nex) - Z = reshape(Zin, nImg[1]*nImg[2], this.sK[4], nex) - Zk = reshape(this.aux_sk4, nImg[1]*nImg[2], this.sK[4]); - aux = this.aux_sk3; - ### reshape the kernels for gemm!: - dtheta = zeros(T, sK[1], sK[2], sK[3], sK[4]) - KK = Array{Array{T, 2}}(sK[1], sK[2]) - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds KK[k1, k2] = zeros(T, sK[3], sK[4]) - end - end - for k = 1:nex - getColumn!(Z, Zk, k) - multConv2Dblock(Y, KK, Zk, aux, this.shiftX, this.shiftT, k, doDerivative = 1) - end - ### Assemble the kernels from gemm!: - for k1 = 1:sK[1] - for k2 = 1:sK[2] - @inbounds dtheta[k1, k2, :, :] = KK[k1, k2] - end - end - dtheta_out = reshape(dtheta, sK[1], sK[2], sK[3], sK[4]) - return dtheta_out -end - - - -function getColumn!(Z::Array{T},Zk::Array{T},k::Int64) where {T<:Number} -for c=1:size(Z,2) - for j=1:size(Z,1) - @inbounds Zk[j,c] = Z[j,c,k]; - end -end -end - -function multConv2Dblock(x::Array{T},K::Array{Array{T,2},2}, y::Array{T}, tin::Array{T},shiftX,shiftT,imIdx;doDerivative = 0) where {T<:Number} - ## y = K*x - ## K - 3X3 array of Arrays - ## x - a vector of length |nImgag+2|*cin (zero padded) - ## y - a vector of length |nImgag|*cout - - nImg1 = size(x,1); - nImg2 = size(x,2); - cin = size(x,3); - cout = size(y,2); - OneType = one(T); - t = reshape(tin,nImg1,nImg2,cin); - kernelWidth = size(K,1); - # y = reshape(y,nImg1*nImg2,cout); # it is supposed to be of this shape... - k=1; - jt=0;it=0;jt=0;jx=0; - - for p = 1:2:2*kernelWidth - for q = 1:2:2*kernelWidth - lower = nImg2+shiftT[p+1] # Move outside of the forloop for increased speed - upper = nImg1+shiftT[q+1] # Move outside of the forloop for increased speed - for cc = 1:cin - jx = 1+shiftX[p]; # Moving these outside didn't seem to help - jt = 1+shiftT[p]; - if jt > 1 - ###################### Dirichlet ####################### - @inbounds t[:,1:(jt-1),cc] = zero(T); - ###################### Periodic ####################### - # ix = 1+shiftX[q]; - # if shiftT[q] > 0 - #@inbounds t[1,1,cc] = x[end,end,cc,imIdx]; - # end - # for it = (1+shiftT[q]):upper - #@inbounds t[it,1,cc] = x[ix,end,cc,imIdx]; - # ix +=1; - # end - # if shiftT[q+1] < 0 - #@inbounds t[end,1,cc] = x[1,end,cc,imIdx]; - # end - ###################### End Periodic ####################### - end - while jt <= lower - it = 1+shiftT[q]; - ix = 1+shiftX[q]; - if it > 1 - for ii = 1:(it-1) - ###################### Dirichlet ####################### - @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[1:(it-1),jt,cc] = 0.0 - faster unvectorized - ###################### Periodic ####################### - #@inbounds t[ii,jt,cc] = x[end,jx,cc,imIdx]; - end - end - while it <= upper - @inbounds t[it,jt,cc] = x[ix,jx,cc,imIdx]; - it+=1;ix+=1; - end - if it <= nImg1 - for ii = it:nImg1 - ###################### Dirichlet ####################### - @inbounds t[ii,jt,cc] = zero(T) #@inbounds t[it:nImg1,jt,cc] = 0.0 - faster unvectorized - ###################### Periodic ####################### - # @inbounds t[ii,jt,cc] = x[1,jx,cc,imIdx]; - end - end - jt+=1;jx+=1; - - end - if jt <= nImg2 - ###################### Dirichlet ####################### - @inbounds t[:,jt:nImg2,cc] = zero(T); - ###################### Periodic ####################### - # if shiftT[q] > 0 - # @inbounds t[1,end,cc] = x[end,1,cc,imIdx]; - # end - # ix = ix = 1+shiftX[q]; - # for it = (1+shiftT[q]):upper - # @inbounds t[it,end,cc] = x[ix,1,cc,imIdx]; - # ix +=1; - # end - # if shiftT[q+1] < 0 - # @inbounds t[end,end,cc] = x[1,1,cc,imIdx]; - # end - ###################### End Periodic ####################### - end - end - if doDerivative == 0 - BLAS.gemm!('N','T',OneType,reshape(t,nImg1*nImg2,cin),K[k],OneType,y); - else - BLAS.gemm!('T','N',OneType,reshape(t,nImg1*nImg2,cin),y,OneType,K[k]); - end - k+=1; - end - end - return y; -end diff --git a/regemiii/convGEMMKernelTest.jl b/regemiii/convGEMMKernelTest.jl deleted file mode 100644 index f70bfd6..0000000 --- a/regemiii/convGEMMKernelTest.jl +++ /dev/null @@ -1,40 +0,0 @@ -using Base.Test -using Meganet -using LinearOperators - - -nImg = [8,10] -sK = [3,3,2,4] -for TYPE=[Float64,Float32] - K = getConvGEMMKernel(TYPE,nImg,sK) - - @testset "adjoint test $TYPE" begin - nex = 2; - theta = initTheta(K) - A = getOp(K,theta); - v = randn(TYPE,nFeatIn(K),nex) - w = randn(TYPE,nFeatOut(K),nex) - - t1 = dot(w,A*v) - t2 = dot(v,A'*w) - # println("adjointTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t1) < 1e3*eps(TYPE) - end - - @testset "derivative Test" begin - th = initTheta(K); - dth = initTheta(K); - nex = 2; - Y = randn(TYPE,nFeatIn(K),nex); - Z = randn(TYPE,nFeatOut(K),nex); - - t1 = vec(Z)'*vec(Jthetamv(K,dth,th,Y)); - t2 = vec(dth)'*vec(JthetaTmv(K,Z,th,Y)); - # println("derivativeTest t1=$t1\t t2=$t2") - @test norm(t1-t2)/norm(t2) < 1e3*eps(TYPE) - end -end - - - - diff --git a/regemiii/doubleSymLayer.jl b/regemiii/doubleSymLayer.jl deleted file mode 100644 index 7fc41fb..0000000 --- a/regemiii/doubleSymLayer.jl +++ /dev/null @@ -1,215 +0,0 @@ -export DoubleSymLayer,getDoubleSymLayer - -""" - Implementation of symmetric double layer model - - Y(theta,Y0) = K(th1)'(activation( K(th1)\*Y0 + trafo.Bin\*th2))) + trafo.Bout\*th3 -""" -mutable struct DoubleSymLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} - activation :: Function # activation function - K :: TK # Kernel model, e.g., convMod - nLayer :: TN # normalization layer - Bin :: Array{T} # Bias inside the nonlinearity - Bout :: Array{T} # bias outside the nonlinearity -end - - -function getDoubleSymLayer(TYPE::Type,K,nLayer::AbstractMeganetElement{T}; - Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE, nFeatIn(K),0), - activation=tanhActivation) where {T <: Number} - BinT = convert.(T, Bin) - BoutT = convert.(T, Bout) - return DoubleSymLayer(activation,K,nLayer,BinT,BoutT); - -end - -function splitWeights(this::DoubleSymLayer{T},theta::Array{T}) where {T<:Number} - - th1 = theta[1:nTheta(this.K)::Int] - cnt = length(th1) - th2 = theta[cnt+(1:size(this.Bin,2))] - cnt = cnt + length(th2) - th3 = theta[cnt+(1:size(this.Bout,2))] - cnt = cnt + length(th3) - - th4 = theta[cnt+1:end]; - - return th1, th2, th3, th4 -end - -function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T<:Number} - - #QZ = [] - tmp = Array{Any}(2) - nex = div(length(Yin),nFeatIn(this))::Int - Y = reshape(Yin,:,nex) - - theta1,theta2,theta3,theta4 = splitWeights(this,theta) - Kop = getOp(this.K,theta1) - KY = Kop*Y - KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY) - Yt = KY - if !isempty(theta2) - Yt .+= this.Bin*theta2 - end - tmp[2] = copy(Yt) - Z::Array{T,2}, = this.activation(Yt,doDerivative) - Z = -(Kop'*Z) - if !isempty(theta3) - Z .+= this.Bout*theta3 - end - return Z, Z, tmp -end - -function nTheta(this::DoubleSymLayer) - return nTheta(this.K) + size(this.Bin,2)+ size(this.Bout,2) + nTheta(this.nLayer) -end - -function nFeatIn(this::DoubleSymLayer) - return nFeatIn(this.K) -end - -function nFeatOut(this::DoubleSymLayer) - return nFeatIn(this.K) -end - -function nDataOut(this::DoubleSymLayer) - return nFeatIn(this) -end - -function initTheta(this::DoubleSymLayer{T}) where {T<:Number} - theta = [vec(initTheta(this.K)); - T(0.01)*ones(T,size(this.Bin,2),1); - T(0.01)*ones(T,size(this.Bout,2),1); - initTheta(this.nLayer)]; - return theta -end - -function Jthetamv(this::DoubleSymLayer{T},dtheta::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - A,dA = this.activation(tmp[2],true) - th1, th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - - Kop = getOp(this.K,th1) - dKop = getOp(this.K,dth1) - dY = dKop*Y - - dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,copy(tmp[1]))[2] - dY = dY .+ this.Bin*dth2 - - dY = -(Kop'*(dA.*dY) + dKop'*A) .+ this.Bout*dth3 - return dY, dY -end - -function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - dA = this.activation(tmp[2],true)[2] - - nex = div(length(dY),nFeatIn(this)) - dY = reshape(dY,:,nex) - Y = reshape(Y,:,nex) - th1, th2,th3,th4 = splitWeights(this,theta) - - Kop = getOp(this.K,th1) - dY = Kop*dY - dY = JYmv(this.nLayer,dY,th4,Kop*Y,copy(tmp[1]))[2] - dZ = -(Kop'*(dA.*dY)) - return dZ, dZ -end - -function Jmv(this::DoubleSymLayer{T},dtheta::Array{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - A,dA = this.activation(copy(tmp[2]),true) - nex = div(length(Y),nFeatIn(this)) - - th1, th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - - Kop = getOp(this.K,th1) - dKop = getOp(this.K,dth1) - if length(dY)>1 - dY = reshape(dY,:,nex) - KdY = Kop*dY - else - KdY = 0 - end - dY = dKop*Y+KdY - dY = Jmv(this.nLayer,dth4,dY,th4,Kop*Y,tmp[1])[2] - - dY = reshape(dY,:,nex) - if !isempty(dth2) - dY .+= this.Bin*dth2 - end - - dY = -(Kop'*(dA.*dY) + dKop'*A) - if !isempty(dth3) - dth3 .+= this.Bout*dth3 - end - - return dY, dY -end - - -function JthetaTmv(this::DoubleSymLayer{T},Z::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - nex = div(length(Y),nFeatIn(this)) - Z = reshape(Z,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],true) - - dth3 = vec(sum(this.Bout'*Z,2)) - dAZ = dA.*(Kop*Z) - dth2 = vec(sum(this.Bin'*dAZ,2)) - - dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y,tmp[1]) - - dth1 = JthetaTmv(this.K,A,zeros(T,0),Z) - dth1 += JthetaTmv(this.K,dAZ,zeros(T,0),Y) - dtheta = [-vec(dth1); -vec(dth2); vec(dth3); -vec(dth4)] - return dtheta -end - -function JYTmv(this::DoubleSymLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - nex = div(length(Y),nFeatIn(this)) - Z = reshape(Zin,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],true) - - dAZ = dA.*(Kop*Z) - dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y,tmp[1]) - dAZ_out = reshape(dAZ,:,nex) - dY = -(Kop'*dAZ_out) - return dY -end - -function JTmv(this::DoubleSymLayer{T}, Zin::Array{T}, dummy::Array{T}, - theta::Array{T}, Yin::Array{T}, tmp) where {T<:Number} - - nex = div(length(Yin),nFeatIn(this)) - Z = reshape(Zin, :, nex) - Yt = reshape(tmp[2]::Array{T,2},:,nex) - Y = reshape(Yin,:,nex) - th1, th2, th3, th4 = splitWeights(this,theta) - #Kop = getOp(this.K,th1) - A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,true) - - dth3 = vec(sum(this.Bout'*Z,2)) - - KopZ = Amv(this.K, th1, Z) - dAZ1 = dA.*KopZ - - dth2 = vec(sum(this.Bin'*dAZ1,2)) - KopY = Amv(this.K, th1, Y) - dth4, dAZ2 = JTmv(this.nLayer,dAZ1,zeros(T,0),th4,KopY,tmp[1]) - dth1 = JthetaTmv(this.K,dAZ2,zeros(T,0),Y) - dth1 = dth1 + JthetaTmv(this.K,A,(T)[],Z) - dtheta = [-vec(dth1); -vec(dth2); vec(dth3);-vec(dth4)] - - dAZ_out = reshape(dAZ2,:,nex) - KopTdAZ = ATmv(this.K, th1, dAZ_out) - dY = -KopTdAZ - return dtheta, dY -end diff --git a/regemiii/doubleSymLayerTest.jl b/regemiii/doubleSymLayerTest.jl deleted file mode 100644 index ac5c964..0000000 --- a/regemiii/doubleSymLayerTest.jl +++ /dev/null @@ -1,61 +0,0 @@ -using Base.Test -using Meganet - -for TYPE=[Float64,Float32] - K = getDenseKernel(TYPE,[32,18]) - nex = 8 - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getTVNormLayer(TYPE,[8,4]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) - @testset "doubleSymLayer (dense/TV) $TYPE" begin - testAbstractMeganetElement(L) - end - - K = getDenseKernel(TYPE,[32,18]) - nex = 8 - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getBatchNormLayer(TYPE,[8,4]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) - @testset "doubleSymLayer (dense/Batch) $TYPE" begin - testAbstractMeganetElement(L) - end - - nImg = [32 32] - nc = 16 - nex = 50 - K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc],isTrainable=false) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) - @testset "doubleSymLayer (conv/Batch/not trainable) $TYPE" begin - testAbstractMeganetElement(L,nex=nex) - end - - - nImg = [8 4] - nc = 3 - nex = 4 - K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getBatchNormLayer(TYPE,[prod(nImg),nc]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) - @testset "doubleSymLayer (conv/Batch) $TYPE" begin - testAbstractMeganetElement(L,nex=nex) - end - - nImg = [16 8] - nc = 6 - nex = 8 - K = getSparseConvKernel2D(TYPE,nImg,[3,3,1,nc]) - Bin = randn(TYPE,nFeatOut(K),4) - Bout = randn(TYPE,nFeatIn(K),3) - nLayer = getTVNormLayer(TYPE,[prod(nImg),nc]) - L = getDoubleSymLayer(TYPE,K,nLayer,Bin=Bin,Bout=Bout) - @testset "doubleSymLayer (conv/TV) $TYPE" begin - testAbstractMeganetElement(L) - end -end diff --git a/regemiii/reluActivation.jl b/regemiii/reluActivation.jl deleted file mode 100644 index fe676e6..0000000 --- a/regemiii/reluActivation.jl +++ /dev/null @@ -1,44 +0,0 @@ -export reluActivation - -""" - relu activation A = relu(Y) - - Input: - - Y - array of features - - Optional Input: - - doDerivative - flag for computing derivative, set via varargin - Ex: reluActivation(Y,true); - - Output: - - A - activation - dA - derivatives -""" -function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T} - -A = max.(Y,zero(T)); - -if doDerivative - dA = sign.(A); -else - dA = zeros(T,0) -end - -return A,dA -end - - - -function reluActivation!(A::Array{T},dA::Array{T} = zeros(T,size(A)),doDerivative::Bool=false) where {T} -A .= max.(A,zero(T)); -if doDerivative - dA .= sign.(A); -else - dA = zeros(T,0) -end - -return A,dA -end diff --git a/regemiii/singleLayer.jl b/regemiii/singleLayer.jl deleted file mode 100644 index d7af01f..0000000 --- a/regemiii/singleLayer.jl +++ /dev/null @@ -1,158 +0,0 @@ -export singleLayer,getSingleLayer - -mutable struct singleLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{NN{T}, normLayer{T}}} <: AbstractMeganetElement{T} - activation :: Function # activation function - K :: TK # transformation type - nLayer :: TN # normalization layer - Bin :: Array{T} # bias inside nonlinearity - Bout :: Array{T} # bias outside nonlinearity - -end - -function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0),activation=tanhActivation) - singleLayer(activation,K,nLayer,Bin,Bout); -end - - -function splitWeights(this::singleLayer{T},theta::Array{T}) where {T <: Number} - th1 = theta[1:nTheta(this.K)] - cnt = length(th1) - th2 = theta[cnt+(1:size(this.Bin,2))] - cnt += length(th2) - th3 = theta[cnt+(1:size(this.Bout,2))] - cnt += length(th3) - th4 = theta[cnt+(1:nTheta(this.nLayer))] - cnt += length(th4) - if cnt!=length(theta); error("splitWeights: length does not match"); end - return th1, th2, th3, th4 -end - -function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},doDerivative=false) where {T <: Number} - tmp = Array{Any}(2) - nex = div(length(Yin),nFeatIn(this)) - Y = reshape(Yin,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - - Yout::Array{T,2} = getOp(this.K,th1)*Y - Yout .+= this.Bin * th2 - Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,doDerivative) - Yout,tmp[2] = this.activation(Yout,doDerivative) - Yout .+= this.Bout*th3 - Ydata = Yout - return Ydata, Yout, tmp -end - -function nTheta(this::singleLayer) - return nTheta(this.K)+size(this.Bin,2) + size(this.Bout,2) + nTheta(this.nLayer) -end - -function nFeatIn(this::singleLayer) - return nFeatIn(this.K) -end - -function nFeatOut(this::singleLayer) - return nFeatOut(this.K) -end - -function nDataOut(this::singleLayer) - return nFeatOut(this.K) -end - -function initTheta(this::singleLayer{T}) where {T <: Number} - return [vec(initTheta(this.K)); convert(T,0.01)*ones(T,size(this.Bin,2),1) ; convert(T,0.01)*ones(T,size(this.Bout,2),1); initTheta(this.nLayer) ] -end - - -function Jthetamv(this::singleLayer{T},dtheta::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Yin),nFeatIn(this)) - Y = reshape(Yin,:,nex) - - th1,th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - dZ::Array{T,2} = Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 - Kop = getOp(this.K,th1) - dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] - dZ .*= dA - dZ .+= this.Bout*dth3 - return dZ, dZ -end - -function JYmv(this::singleLayer{T},dYin::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA = tmp[2] - nex = div(length(dYin),nFeatIn(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - dY = reshape(dYin,:,nex) - dZ = Kop*dY - dZ = JYmv(this.nLayer,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] - # dZ = dA.*dZ - dZ .*= dA - return dZ,dZ -end - -function Jmv(this::singleLayer{T},dtheta::Array{T},dYin::Array{T},theta::Array{T},Yin::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Yin),nFeatIn(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) - - dY = reshape(dYin,:,nex); - Kop = getOp(this.K,th1) - dZ::Array{T, 2} = Kop*dY; - - Y = reshape(Yin,:,nex); - dZ += Jthetamv(this.K,dth1,th1,Y) .+ this.Bin*dth2 - dZ = Jmv(this.nLayer,dth4,dZ,th4,Kop*Y.+this.Bin*th2,tmp[1])[2] - - dZ .*= dA - dZ .+= this.Bout*dth3 - return dZ,dZ -end - -function JTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Y),nFeatIn(this)) - Z = reshape(Zin,:,nex) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - - dth3 = vec(sum(this.Bout'*Z,2)) - dAZ = dA.*Z - dth4,dAZ = JTmv(this.nLayer,dAZ,zeros(T,0),th4,Kop*Y.+this.Bin*th2,tmp[1]) # this not type stable - dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)) - dth1 = JthetaTmv(this.K, dAZ,theta,Y) # this not type stable - - dY = Kop'*reshape(dAZ,:,nex) - dtheta = [vec(dth1); vec(dth2); vec(dth3); vec(dth4)] - - return dtheta, dY - -end - -function JthetaTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA = tmp[2] - nex = div(length(Zin),nFeatOut(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - - Z = reshape(Zin,:,nex); - dAZ = dA.*Z; - dth3 = vec(sum(this.Bout'*Z,2)); - Kop = getOp(this.K,th1) - dth4,dAZ = JTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) - dth1 = JthetaTmv(this.K,dAZ,theta,Y); - dth2 = vec(sum(this.Bin'*reshape(dAZ,:,nex),2)); - return [vec(dth1); vec(dth2); vec(dth3); vec(dth4)]; -end - -function JYTmv(this::singleLayer{T},Zin::Array{T},dummy::Array{T},theta::Array{T},Y::Array{T},tmp) where {T <: Number} - dA::Array{T,2} = tmp[2] - nex = div(length(Y),nFeatIn(this)) - th1,th2,th3,th4 = splitWeights(this,theta) - Kop = getOp(this.K,th1) - Z = reshape(Zin,:,nex) - dAZ::Array{T,2} = dA.*Z - dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y.+this.Bin*th2,tmp[1]) - ret::Array{T,2} = Kop'*reshape(dAZ,:,nex) - return ret #TODO: @lars or eldad rename this variable as I'm not sure what to call it -end From e4a602c1bc30e8a43350b48f9befea74b44da1bd Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Wed, 14 Feb 2018 16:42:32 -0800 Subject: [PATCH 06/22] mem alloc doesnt happen every time for gemmkernel --- src/integrators/NN.jl | 6 --- src/kernelTypes/convGEMMKernel.jl | 87 ++++++++++++++++++++----------- src/layers/normLayer.jl | 2 - 3 files changed, 56 insertions(+), 39 deletions(-) diff --git a/src/integrators/NN.jl b/src/integrators/NN.jl index 2da950d..339647f 100644 --- a/src/integrators/NN.jl +++ b/src/integrators/NN.jl @@ -64,7 +64,6 @@ end # --------- forward problem ---------- function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) where {T<:Number} - # tic() Y::Array{T,2} = copy(Y0) nex = div(length(Y),nFeatIn(this))::Int nt = length(this.layers) @@ -78,8 +77,6 @@ function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) whe cnt = 0 for i=1:nt ni = nTheta(this.layers[i])::Int - # @timeit to "Apply" apply(this.layers[i],theta[cnt+(1:ni)],Y,doDerivative) - Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,doDerivative) if this.outTimes[i]==1 Ydata = [Ydata; this.Q*Yd] @@ -89,9 +86,6 @@ function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) whe end cnt = cnt + ni end - # println("Measuring time for all the layers in NN ",toc()) - # println(to) - # error("After 1 apply call from NN") return Ydata,Y,tmp end diff --git a/src/kernelTypes/convGEMMKernel.jl b/src/kernelTypes/convGEMMKernel.jl index 5bbdaf8..b1114e2 100644 --- a/src/kernelTypes/convGEMMKernel.jl +++ b/src/kernelTypes/convGEMMKernel.jl @@ -1,15 +1,24 @@ export convGEMMKernel,Amv,ATmv,transposeTest,getConvGEMMKernel using DistributedArrays mutable struct convGEMMKernel{T} <: AbstractConvKernel{T} - nImg :: Array{Int,1} - sK :: Array{Int,1} - shiftX :: Array{Int,1} - shiftT :: Array{Int,1} - aux_sk3 :: Array{T, 3} - aux_sk4 :: Array{T, 3} + nImg :: Array{Int,1} + sK :: Array{Int,1} + shiftX :: Array{Int,1} + shiftT :: Array{Int,1} + aux_sk3 :: Array{T, 3} + aux_sk4 :: Array{T, 3} + KK :: Array{Array{T,2}} + + prev_nex_AMV :: Int + AY :: Array{T,3} + prev_nex_ATmv :: Int + ATZ :: Array{T,3} + dtheta :: Array{T,4} + + end function getConvGEMMKernel(TYPE::Type,nImg,sK) - + if sK[1] == 1 && sK[2] == 1 shiftX = [0;0]; shiftT = [0;0]; @@ -22,8 +31,15 @@ function getConvGEMMKernel(TYPE::Type,nImg,sK) aux_sk3 = zeros(TYPE,nImg[1],nImg[2],sK[3]); aux_sk4 = zeros(TYPE,nImg[1],nImg[2],sK[4]); - # error("new GEMM") - return convGEMMKernel{TYPE}(copy(nImg),copy(sK),shiftX,shiftT,aux_sk3,aux_sk4); + + KK = Array{Array{TYPE,2}}(sK[1],sK[2]); + prev_nex_AMV = 0 + AY = zeros(TYPE,0,0,0) + prev_nex_ATmv = 0 + ATZ = zeros(TYPE,0,0,0) + dtheta = zeros(TYPE, sK[1], sK[2], sK[3], sK[4]) + + return convGEMMKernel{TYPE}(copy(nImg),copy(sK),shiftX,shiftT,aux_sk3,aux_sk4,KK,prev_nex_AMV, AY, prev_nex_ATmv, ATZ, dtheta); end function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} @@ -31,28 +47,34 @@ function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Numb sK = this.sK; nImg = this.nImg; nex = div(numel(Y),prod(nImgIn(this))) - # compute convolution + KK = this.KK + + # compute convolution Y = reshape(Y,nImg[1],nImg[2],this.sK[3],nex); - AY = Array{T, 3}(nImg[1]*nImg[2],this.sK[4],nex); + + if nex != this.prev_nex_AMV + this.AY = Array{T, 3}(nImg[1]*nImg[2],this.sK[4],nex); + end + aux = this.aux_sk3; AYk = reshape(this.aux_sk4,nImg[1]*nImg[2],sK[4]); ### reshape the kernels for gemm!: K = reshape(theta, sK[1], sK[2], sK[3], sK[4]) - KK = Array{Array{T,2}}(sK[1],sK[2]); + for k1 = 1:sK[1] for k2 = 1:sK[2] @inbounds KK[k1,k2] = K[k1,k2,:,:]'; end end - # AYk = @parallel vcat for k = 1:nex + for k = 1:nex AYk[:] = zero(T) AYk = multConv2Dblock(Y,KK, AYk,aux,this.shiftX,this.shiftT,k); - @inbounds AY[:,:,k] = AYk; - + @inbounds this.AY[:,:,k] = AYk; end - - AY_out = reshape(AY,:,nex); + AY_out = reshape(this.AY,:,nex) + this.prev_nex_AMV = nex + return AY_out end @@ -63,11 +85,16 @@ function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:N K = reshape(theta, sK[1], sK[2], sK[3], sK[4]); Z = reshape(Zin,nImg[1],nImg[2],sK[4],nex); aux = this.aux_sk4; - ATZ = zeros(T,nImg[1]*nImg[2],sK[3],nex); - ATZk = reshape(this.aux_sk3,nImg[1]*nImg[2],sK[3]); - + + ATZk = reshape(this.aux_sk3,nImg[1]*nImg[2],sK[3]); + + if nex != this.prev_nex_ATmv + this.ATZ = zeros(T,nImg[1]*nImg[2],sK[3],nex); + end + ### reshape the kernels for gemm!: - KK = Array{Array{T,2}}(sK[1],sK[2]); + KK = this.KK + for k1 = 1:sK[1] for k2 = 1:sK[2] @inbounds KK[k1,k2] = K[k1,k2,:,:]; @@ -75,12 +102,14 @@ function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:N end ## flipping: KK = flipdim(flipdim(KK,2),1); - for k = 1:nex + for k = 1:nex ATZk[:] = zero(T) ATZk = multConv2Dblock(Z,KK, ATZk,aux,this.shiftX,this.shiftT,k); - @inbounds ATZ[:,:,k] = ATZk; + @inbounds this.ATZ[:,:,k] = ATZk; end - ATZ_out = reshape(ATZ,:,nex); + ATZ_out = reshape(this.ATZ,:,nex); + this.prev_nex_ATmv = nex + return ATZ_out end @@ -101,8 +130,7 @@ function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin: Zk = reshape(this.aux_sk4, nImg[1]*nImg[2], this.sK[4]); aux = this.aux_sk3; ### reshape the kernels for gemm!: - dtheta = zeros(T, sK[1], sK[2], sK[3], sK[4]) - KK = Array{Array{T, 2}}(sK[1], sK[2]) + KK = this.KK for k1 = 1:sK[1] for k2 = 1:sK[2] @inbounds KK[k1, k2] = zeros(T, sK[3], sK[4]) @@ -115,15 +143,13 @@ function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin: ### Assemble the kernels from gemm!: for k1 = 1:sK[1] for k2 = 1:sK[2] - @inbounds dtheta[k1, k2, :, :] = KK[k1, k2] + @inbounds this.dtheta[k1, k2, :, :] = KK[k1, k2] end end - dtheta_out = reshape(dtheta, sK[1], sK[2], sK[3], sK[4]) + dtheta_out = reshape(this.dtheta, sK[1], sK[2], sK[3], sK[4]) return dtheta_out end - - function getColumn!(Z::Array{T},Zk::Array{T},k::Int64) where {T<:Number} for c=1:size(Z,2) for j=1:size(Z,1) @@ -137,7 +163,6 @@ function multConv2Dblock(x::Array{T},K::Array{Array{T,2},2}, y::Array{T}, tin::A ## K - 3X3 array of Arrays ## x - a vector of length |nImgag+2|*cin (zero padded) ## y - a vector of length |nImgag|*cout - nImg1 = size(x,1); nImg2 = size(x,2); cin = size(x,3); diff --git a/src/layers/normLayer.jl b/src/layers/normLayer.jl index f742845..acd393c 100644 --- a/src/layers/normLayer.jl +++ b/src/layers/normLayer.jl @@ -38,7 +38,6 @@ end function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T <: Number} - # tic() # first organize Y with channels nf = this.nData[2]::Int nex = div(length(Yin),nFeatIn(this))::Int @@ -54,7 +53,6 @@ function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=t Yout ./= S2 Yout2 = reshape(Yout,:,nex) - # println("Measuring time for batch norm ",toc()) return Yout2, Yout2, dA end From 9a7d271ab6d2e66972307bc787a8ff5211d9b8fd Mon Sep 17 00:00:00 2001 From: davidbegert Date: Thu, 15 Feb 2018 12:37:58 -0800 Subject: [PATCH 07/22] first attempt in reusing tmp --- examples/EResNN_CIFAR10.jl | 4 ++-- src/integrators/NN.jl | 16 ++++++++++++---- src/integrators/ResNN.jl | 13 +++++++++---- src/integrators/batchNormNN.jl | 4 ++++ src/integrators/connector.jl | 5 ++++- src/layers/affineScalingLayer.jl | 2 +- src/layers/doubleSymLayer.jl | 13 +++++++++---- src/layers/normLayer.jl | 2 +- src/layers/singleLayer.jl | 17 +++++++++++++++-- src/optimization/dnnBatchObjFctn.jl | 19 +++++++++++-------- src/optimization/sgd.jl | 15 +++++++++------ 11 files changed, 77 insertions(+), 33 deletions(-) diff --git a/examples/EResNN_CIFAR10.jl b/examples/EResNN_CIFAR10.jl index cd49cb3..d5e267d 100644 --- a/examples/EResNN_CIFAR10.jl +++ b/examples/EResNN_CIFAR10.jl @@ -58,7 +58,7 @@ B = kron(speye(TYPE,nc[end]),ones(TYPE, prod(nImg)))/prod(nImg) blocks = [blocks; getConnector(TYPE,B')] blocks[end].outTimes=1 -net = getNN(blocks) +net = getNN(blocks) theta = initTheta(net); display(net) @@ -80,7 +80,7 @@ W = max.(W,-.2) W = convert(Array{TYPE},W) solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test) -@time solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test) +@time solve(opt,objFun::dnnObjFctn,[vec(theta);vec(W)],Y_train,C_train,Y_test,C_test); # Profile.clear() # Profile.clear_malloc_data() diff --git a/src/integrators/NN.jl b/src/integrators/NN.jl index 9484a7e..65e2461 100644 --- a/src/integrators/NN.jl +++ b/src/integrators/NN.jl @@ -62,12 +62,15 @@ end # --------- forward problem ---------- -function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) where {T<:Number} +function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},tmp,doDerivative=true) where {T<:Number} Y::Array{T,2} = copy(Y0) nex = div(length(Y),nFeatIn(this))::Int nt = length(this.layers) - tmp = Array{Any}(nt+1,2) + if isempty(tmp) + tmp = Array{Any}(nt+1,2) + end + if doDerivative tmp[1,1] = Y0 end @@ -76,8 +79,13 @@ function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) whe cnt = 0 for i=1:nt ni = nTheta(this.layers[i])::Int - - Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,doDerivative) + if !isassigned(tmp,i,2) + println("assigning") + tmp[i,2] = Array{Any}(0) + else + println("not assigning") + end + Yd::Array{T,2}, Y, tmp[i,2] = apply(this.layers[i],theta[cnt+(1:ni)],Y,tmp[i,2],doDerivative) if this.outTimes[i]==1 Ydata = [Ydata; this.Q*Yd] end diff --git a/src/integrators/ResNN.jl b/src/integrators/ResNN.jl index 86f43e9..c98ed06 100644 --- a/src/integrators/ResNN.jl +++ b/src/integrators/ResNN.jl @@ -48,11 +48,13 @@ function initTheta(this::ResNN{T}) where {T<:Number} end # ------- apply forward problems ----------- -function apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},doDerivative=true) where {T<:Number} - +function apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},tmp,doDerivative=true) where {T<:Number} + if isempty(tmp) + tmp = Array{Any}(this.nt+1,2) + end nex = div(length(Y0),nFeatIn(this)) Y = reshape(Y0,:,nex) - tmp = Array{Any}(this.nt+1,2) + if doDerivative tmp[1,1] = Y0 end @@ -61,7 +63,10 @@ function apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},doDerivative=true Ydata::Array{T,2} = zeros(T,0,nex) for i=1:this.nt - Z,dummy,tmp[i,2] = apply(this.layer,theta[:,i],Y,doDerivative) + if !isassigned(tmp,i,2) + tmp[i,2] = Array{Any}(0) + end + Z,dummy,tmp[i,2] = apply(this.layer,theta[:,i],Y,tmp[i,2],doDerivative) Y += this.h * Z if doDerivative tmp[i+1,1] = Y diff --git a/src/integrators/batchNormNN.jl b/src/integrators/batchNormNN.jl index 7e1e789..cdf237a 100644 --- a/src/integrators/batchNormNN.jl +++ b/src/integrators/batchNormNN.jl @@ -58,6 +58,7 @@ function apply(this::batchNormNN{T},theta::Array{T},Y0::Array{T,2},doDerivative= nex = div(length(Y),nFeatIn(this))::Int nt = length(this.layers) + # tmp = Array{Array{T,2},2}(nt+1,2) tmp = Array{Any}(nt+1,2) if doDerivative tmp[1,1] = Y0 @@ -78,6 +79,9 @@ function apply(this::batchNormNN{T},theta::Array{T},Y0::Array{T,2},doDerivative= cnt = cnt + ni end + #To Keep typing consistent + # tmp[3,2] = Array{T,2}(0,0) + return Ydata,Y,tmp end diff --git a/src/integrators/connector.jl b/src/integrators/connector.jl index 5824642..68c389d 100644 --- a/src/integrators/connector.jl +++ b/src/integrators/connector.jl @@ -18,7 +18,10 @@ function getConnector(TYPE::Type, K; b = zero(TYPE),outTimes=0,Q=I) end -function apply(this::Connector{T},theta::Array{T},Y0::Array{T},doDerivative=true) where {T <: Number} +function apply(this::Connector{T},theta::Array{T},Y0::Array{T},tmp,doDerivative=true) where {T <: Number} + # if isempty(tmp) + #Potentially no need for this here + # end nex = div(length(Y0),nFeatIn(this)) Y0 = reshape(Y0,:,nex) Y = this.K*Y0 .+ this.b diff --git a/src/layers/affineScalingLayer.jl b/src/layers/affineScalingLayer.jl index 64a2da0..4df7e34 100644 --- a/src/layers/affineScalingLayer.jl +++ b/src/layers/affineScalingLayer.jl @@ -30,7 +30,7 @@ end function apply(this::AffineScalingLayer{T},theta::Array{T},Y::Array{T},doDerivative=false) where {T <: Number} Y = reshape(copy(Y),this.nData[1], this.nData[2],:) - dA = (T)[] + dA = Array{T,2}(0,0) nex = size(Y,3) s2,b2 = splitWeights(this,theta); diff --git a/src/layers/doubleSymLayer.jl b/src/layers/doubleSymLayer.jl index 5f48cf3..32c2249 100644 --- a/src/layers/doubleSymLayer.jl +++ b/src/layers/doubleSymLayer.jl @@ -37,22 +37,27 @@ function splitWeights(this::DoubleSymLayer{T},theta::Array{T}) where {T<:Number} return th1, th2, th3, th4 end -function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T<:Number} - +function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},tmp,doDerivative=true) where {T<:Number} + if isempty(tmp) + tmp = Array{Any}(2) + end #QZ = [] - tmp = Array{Any}(2) nex = div(length(Yin),nFeatIn(this))::Int Y = reshape(Yin,:,nex) theta1,theta2,theta3,theta4 = splitWeights(this,theta) Kop = getOp(this.K,theta1) KY = Kop*Y - KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY) + + #TODO: check is assigned and pass in tmp[1] + KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY,doDerivative) Yt = KY if !isempty(theta2) Yt .+= this.Bin*theta2 end tmp[2] = copy(Yt) + + #TODO: check is assigned and pass in tmp[1] wait why do we not update tmp[2]? Z::Array{T,2}, = this.activation(Yt,doDerivative) Z = -(Kop'*Z) if !isempty(theta3) diff --git a/src/layers/normLayer.jl b/src/layers/normLayer.jl index be6d4f7..599059e 100644 --- a/src/layers/normLayer.jl +++ b/src/layers/normLayer.jl @@ -38,7 +38,7 @@ function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=t nex = div(length(Yin),nFeatIn(this))::Int Y = reshape(Yin,:,nf,nex) - dA = (T)[] + dA = Array{T,2}(0,0) # subtract mean across pixels Yout = Y.-mean(Y,this.doNorm) diff --git a/src/layers/singleLayer.jl b/src/layers/singleLayer.jl index 875955e..1a78db0 100644 --- a/src/layers/singleLayer.jl +++ b/src/layers/singleLayer.jl @@ -27,16 +27,29 @@ function splitWeights(this::singleLayer{T},theta::Array{T}) where {T <: Number} return th1, th2, th3, th4 end -function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},doDerivative=false) where {T <: Number} - tmp = Array{Any}(2) +function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},tmp,doDerivative=false) where {T <: Number} + + if isempty(tmp) + tmp = Array{Any}(2) + end nex = div(length(Yin),nFeatIn(this)) Y = reshape(Yin,:,nex) th1,th2,th3,th4 = splitWeights(this,theta) Yout::Array{T,2} = getOp(this.K,th1)*Y Yout .+= this.Bin * th2 + # if !isassigned(tmp,1) + # tmp[1] = Array{Any}(0) #TODO get batchNormNN to return a 1D arg maybe? Just think about it + # end Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,doDerivative) + + # if !isassigned(tmp,2) + # tmp[2] = Array{Any}(0) #TODO We know this one is an array of T + # end Yout,tmp[2] = this.activation(Yout,doDerivative) + # println(typeof(tmp[2])) + # println(typeof(tmp[1])) + # error("check activation tmp") ## These two are different meaning it is difficult to type tmp Yout .+= this.Bout*th3 Ydata = Yout return Ydata, Yout, tmp diff --git a/src/optimization/dnnBatchObjFctn.jl b/src/optimization/dnnBatchObjFctn.jl index 42fd760..1e1a05e 100644 --- a/src/optimization/dnnBatchObjFctn.jl +++ b/src/optimization/dnnBatchObjFctn.jl @@ -17,28 +17,31 @@ mutable struct dnnObjFctn splitWeights(this::dnnObjFctn,x) = (return x[1:nTheta(this.net)], x[nTheta(this.net)+1:end]) -function getMisfit(this::dnnObjFctn,thetaW::Vector{T},Y::Array{T},C::Array{T},doDerivative=true) where {T} +function getMisfit(this::dnnObjFctn,thetaW::Vector{T},Y::Array{T},C::Array{T},tmp::Array{Any},doDerivative=true) where {T<:Number} theta,W = splitWeights(this,thetaW) - return getMisfit(this,theta,W,Y,C,doDerivative) + return getMisfit(this,theta,W,Y,C,tmp,doDerivative) end -function getMisfit(this::dnnObjFctn,theta::Array{T},W::Array{T},Y::Array{T},C::Array{T},doDerivative=true) where {T} +function getMisfit(this::dnnObjFctn,theta::Array{T},W::Array{T},Y::Array{T},C::Array{T},tmp::Array{Any},doDerivative=true) where {T<:Number} - YN,dummy,tmp = apply(this.net,theta,Y,doDerivative) + YN,dummy,tmp = apply(this.net,theta,Y,tmp,doDerivative) + # println(eltype(tmp[1,2][1][1,1])) + # println(size(tmp)) + # error("check tmp out") Fc,hisF,dWF,d2WF,dYF,d2YF = getMisfit(this.pLoss,W,YN,C,doDerivative,doDerivative) if doDerivative dYF = JthetaTmv(this.net,dYF,zeros(T,0),theta,Y,tmp) end - return Fc,hisF,vec(dYF),vec(dWF) + return Fc,hisF,vec(dYF),vec(dWF),tmp end -function evalObjFctn(this::dnnObjFctn,thetaW::Array{T},Y::Array{T},C::Array{T},doDerivative=true) where {T} +function evalObjFctn(this::dnnObjFctn,thetaW::Array{T},Y::Array{T},C::Array{T},tmp::Array{Any},doDerivative=true) where {T<:Number} theta,W = splitWeights(this,thetaW) # compute misfit - Fc,hisF,dFth,dFW = getMisfit(this,theta,W,Y,C,doDerivative) + Fc,hisF,dFth,dFW,tmp = getMisfit(this,theta,W,Y,C,tmp,doDerivative) # regularizer for weights Rth,dRth, = regularizer(this.pRegTheta,theta) @@ -49,5 +52,5 @@ function evalObjFctn(this::dnnObjFctn,thetaW::Array{T},Y::Array{T},C::Array{T},d Jc = Fc + Rth + RW dJ = [dFth+dRth; dFW+dRW] - return convert(T,Jc),hisF,convert(Array{T},dJ) + return convert(T,Jc),hisF,convert(Array{T},dJ),tmp end diff --git a/src/optimization/sgd.jl b/src/optimization/sgd.jl index a2feab5..0147e9c 100644 --- a/src/optimization/sgd.jl +++ b/src/optimization/sgd.jl @@ -25,7 +25,7 @@ end Base.display(this::SGD)=println("SGD(maxEpochs=$(this.maxEpochs),miniBatch=$(this.miniBatch),learningRate=$(this.learningRate),momentum=$(this.momentum),nesterov=$(this.nesterov),ADAM=$(this.ADAM))") -function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array{T},Yv::Array{T},Cv::Array{T}) where {T} +function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array{T},Yv::Array{T},Cv::Array{T}) where {T<:Number} # evaluate training and validation epoch = 1; @@ -44,6 +44,8 @@ function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array if this.out; display(this); end + # Declare tmp - We know nothing about its shape of datatypes + tmp = Array{Any}(0,0) while epoch <= this.maxEpochs nex = size(Y,2) @@ -52,9 +54,9 @@ function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array for k=1:ceil(Int64,nex/this.miniBatch) idk = ids[(k-1)*this.miniBatch+1: min(k*this.miniBatch,nex)] if this.nesterov && !this.ADAM - Jk,dummy,dJk = evalObjFctn(objFun,xc-this.momentum*dJ,Y[:,idk],C[:,idk]); + Jk,dummy,dJk,tmp = evalObjFctn(objFun,xc-this.momentum*dJ,Y[:,idk],C[:,idk],tmp); else - Jk,dummy,dJk = evalObjFctn(objFun,xc,Y[:,idk],C[:,idk]); + Jk,dummy,dJk,tmp = evalObjFctn(objFun,xc,Y[:,idk],C[:,idk],tmp); end if this.ADAM @@ -68,13 +70,14 @@ function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array end # we sample 2^12 images from the training set for displaying the objective. idt = ids[1:min(nex,2^12)] - Jc,para = evalObjFctn(objFun,xc,Y[:,idt],C[:,idt]); - Jval,pVal = getMisfit(objFun,xc,Yv,Cv,false); + Jc,para = evalObjFctn(objFun,xc,Y[:,idt],C[:,idt],tmp); #TODO: Do we really wanna train here? + + Jval,pVal = getMisfit(objFun,xc,Yv,Cv,tmp,false); if this.out; @printf "%d\t%1.2e\t%1.2f\t%1.2e\t%1.2e\t%1.2f\n" epoch Jc 100*(1-para[3]/para[2]) norm(xOld-xc) Jval 100*(1-pVal[3]/pVal[2]) end - + # println("Done one") xOld = copy(xc); epoch = epoch + 1; end From 1c8874e02a31080178b0a262dc718b1b1979cf01 Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Thu, 15 Feb 2018 15:30:28 -0800 Subject: [PATCH 08/22] improve memory convgemm --- src/integrators/connector.jl | 3 +-- src/kernelTypes/convGEMMKernel.jl | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/integrators/connector.jl b/src/integrators/connector.jl index 4f6be03..314070e 100644 --- a/src/integrators/connector.jl +++ b/src/integrators/connector.jl @@ -19,7 +19,7 @@ end function apply(this::Connector{T},theta::Array{T},Y0::Array{T},doDerivative=true) where {T <: Number} - # tic() + nex = div(length(Y0),nFeatIn(this)) Y0 = reshape(Y0,:,nex) Y = this.K*Y0 .+ this.b @@ -28,7 +28,6 @@ function apply(this::Connector{T},theta::Array{T},Y0::Array{T},doDerivative=true Ydata = this.Q*Y end tmp = Y0; - # println("Measuring time for connector ",toc()) return Ydata, Y, tmp end diff --git a/src/kernelTypes/convGEMMKernel.jl b/src/kernelTypes/convGEMMKernel.jl index b1114e2..e22bc70 100644 --- a/src/kernelTypes/convGEMMKernel.jl +++ b/src/kernelTypes/convGEMMKernel.jl @@ -33,17 +33,18 @@ function getConvGEMMKernel(TYPE::Type,nImg,sK) aux_sk4 = zeros(TYPE,nImg[1],nImg[2],sK[4]); KK = Array{Array{TYPE,2}}(sK[1],sK[2]); - prev_nex_AMV = 0 + prev_nex_AMV = -1 AY = zeros(TYPE,0,0,0) - prev_nex_ATmv = 0 + prev_nex_ATmv = -1 ATZ = zeros(TYPE,0,0,0) dtheta = zeros(TYPE, sK[1], sK[2], sK[3], sK[4]) - return convGEMMKernel{TYPE}(copy(nImg),copy(sK),shiftX,shiftT,aux_sk3,aux_sk4,KK,prev_nex_AMV, AY, prev_nex_ATmv, ATZ, dtheta); + return convGEMMKernel{TYPE}(copy(nImg),copy(sK),shiftX,shiftT,aux_sk3,aux_sk4,KK,prev_nex_AMV,AY,prev_nex_ATmv,ATZ,dtheta); end function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Number} ## We assume that the data Y is held in the order XYCN. + sK = this.sK; nImg = this.nImg; nex = div(numel(Y),prod(nImgIn(this))) @@ -74,18 +75,19 @@ function Amv(this::convGEMMKernel{T},theta::Array{T},Y::Array{T}) where {T<:Numb end AY_out = reshape(this.AY,:,nex) this.prev_nex_AMV = nex - + return AY_out end function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:Number} + nImg = this.nImg; sK = this.sK; nex = div(numel(Zin),prod(nImgOut(this))); K = reshape(theta, sK[1], sK[2], sK[3], sK[4]); Z = reshape(Zin,nImg[1],nImg[2],sK[4],nex); aux = this.aux_sk4; - + # ATZ = this.ATZ ATZk = reshape(this.aux_sk3,nImg[1]*nImg[2],sK[3]); if nex != this.prev_nex_ATmv @@ -109,7 +111,7 @@ function ATmv(this::convGEMMKernel{T},theta::Array{T},Zin::Array{T}) where {T<:N end ATZ_out = reshape(this.ATZ,:,nex); this.prev_nex_ATmv = nex - + return ATZ_out end @@ -129,6 +131,7 @@ function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin: Z = reshape(Zin, nImg[1]*nImg[2], this.sK[4], nex) Zk = reshape(this.aux_sk4, nImg[1]*nImg[2], this.sK[4]); aux = this.aux_sk3; + dtheta = zeros(T, sK[1], sK[2], sK[3], sK[4]) ### reshape the kernels for gemm!: KK = this.KK for k1 = 1:sK[1] @@ -143,10 +146,10 @@ function JthetaTmv(this::convGEMMKernel{T}, Zin::Array{T}, dummy::Array{T}, Yin: ### Assemble the kernels from gemm!: for k1 = 1:sK[1] for k2 = 1:sK[2] - @inbounds this.dtheta[k1, k2, :, :] = KK[k1, k2] + @inbounds dtheta[k1, k2, :, :] = KK[k1, k2] end end - dtheta_out = reshape(this.dtheta, sK[1], sK[2], sK[3], sK[4]) + dtheta_out = reshape(dtheta, sK[1], sK[2], sK[3], sK[4]) return dtheta_out end From fea6cc1a41ee9822a0ccdb272cbfd611a5a76b7a Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Thu, 15 Feb 2018 16:04:49 -0800 Subject: [PATCH 09/22] Delete regemiii.zip --- regemiii.zip | Bin 11386 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 regemiii.zip diff --git a/regemiii.zip b/regemiii.zip deleted file mode 100644 index bfcf3cb939bff8197d5b958700eed75d5f63033c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11386 zcmaL7WmKG7@;!{ZySuvucXxMpcN(|g5?q73JHZLscyManc zSOTV|E|(rwNT-Gi&8@Yj=YS!tWR#S+lFlX zDE{Nf@fcqoS~L5$gF@xUuY$5~O#4Mmp4U)|LaF4(NuV`CNggUpE!xo6M!TwsUHQ0v zg!;N-HKdbQC~Ko(Xlmpn!!&>m?!dmvNMpfT^@B^>gjOzir7SV{jlqEZx^qN1s(J_) zhlWS-a#dWRv%;Y^Y$-BSc@&z?VFMq#XC-7mIxRhVb>l|QzqfSiytcD^tAqJ z0{qnz=uUuKIN;}|Uo(DvdjqKcDQ=c)=* z@~c?59|mjAW49u@m49jgP#$aaFA3O)=>YEyCk=E|2(qx0 z>V{Y0HI^M2d=FvCaIL7MUup(Z`d3BJXg4NryP-{gZPJ4plv{UdYhXY0En9jLIm<^n zcg}vc7C^-e9y+Y4cU9LjeuyzPmN7y&_L*>8mAa`sItF*TVjnyJB zvVF34g3O&qKD^B^pKNOw8?n!rpdJcyn_0 z#p`eiT2nhp+ebzh2(<=zU?L~@ImP-`M$_w_EhUyO<(}K4FBr#4{^TM9&mw`$HT6>v zviEp>JrV!bbDq+@6IhGAR+`$%6***fhFrL%|K&Gf(wZN`MO7%Y0bbwv8Ac;S?cN}f z164RZ?nw4y{Wu+?r3}Jtk{kOcRcz5_z8htGEcw+P4zIk0S1NBQ)&4CoG)nKgAU((= zoMAnDg&Bg0;GMhTPo#LwU7k@J2VpAHj5wKq$qJvLLn6xeMBhomH3WjSfoX~Iym)x7 z`^~3(RO}`moDh$v8J4IvimaPgp*gi;@VxpOEVr^T`gTa71bGr;rU@Y4uhvnji;XIc zyB8^Zu33t*74fo0C}@>A2Qe0e6gOje<7NB5a3cN#v1)ZO*lW z%md$YwXM`k*v>2Dq3L8NE-F^A=O2GbiRMLT_8XZGK>(f5@u}6akILo`AcLz)IA6!X zZ(ow|ckZ+#A79&ZHVXip1%k%YlXqjoT#nHX%*t}mFnI6iOdyaTAeA2ksr+Br6ZK#0 zDJ7wx@NfF;(z$e6<3xWu1N%+v2w%TUYbWk;6_Bup{cF|n=(Kg?Gpx2f5C`K+oQ+-* z{Y*s{)BDSXv}~a{kD?yHk#3aY68iGOTT%Ye5uF;%(gpDTBg^wLoImO@6S3cy^XiHW zT}YP8WlSJf)?@gfYPSDyDpv@;b+*UAP1>R{N8^+K4bk>$SRdo$GdpFD5`Q4nW7KB2 zC&1H1BkA!)$%%ZFfIR~V)Rr3mSakegVg|R)acJw}OircN!(@#Dpf;weE@lbQYLO_qDt9 z#)BPg0yG5o?$XE-0=86dFjB)YrvzUIlesl^l!JuR@4DCrzsh&0QKm~g&EL>>5d@+! zwaNgLRcRikvj?;MlZ$ob1x(K}kRrRi$J?wfw2IZ+9ZN7yztduqxo?CtTDbO=Ty~U_ z5G|K`Ira@hDslVW)NVYiU8PZ%8ct1aBO?W5I*K8=?#O+2hVeTF!idAccOwal+j1y^ z<@Qb-Exd4QoTKcC);7(No*VY{$4r02+v97Ri4b1rB5$g9jMKGG9tesCu1PzTEZE{2 zu{iEn(zoY`&a$K1NAVA5HUr8Bc z(%otwkrNS7ilxdn#*M4P(>Cw2fB`BK>)zN9hiCUnQ@G}Z4@rv(DgwSV+AGf%; z)d$rmk};sAl1*0)11tN$o95_B!vzL8cWQ>td;53BS<%byJCD$mt6x)L;-L(F z8`iYNPxB3#f#@=!Zw%_UUA(tud1l{~rOysQWom^3=Bw9L^?iN7CBgr41ToIRRyjQ# z+b@}L!U)BJWjg&iEW@ak4LlYVS998>!(-&NjVw&_B;%Y_TYu)U7r)9yEYY2X##p9G zOA2Ma2Ai^h+U&?ELL3wIhL@LeQEUB0 zb=~Ye2l!aXGY#Wp5Tjh3(`11^FI+;o1lWHL*Dp+Gb{v%^PPm3Pe}t@JR&yt5fsjsiU9M;#cIZ?-V0H_))Rqaku`Q$0zb>LjbYC1Eh1`>}7V>e2 zs+sS%v!%$nMEr=oNHe}N*3QRb?E^!ku|io&(Sk97hdo(*58h`_^Nc))T&BH`!vb#{S9G&Dhg_RdcNzX9 zcnOMUGChDTjJR90Ky+2!0R+4*9PbCA|I(@xRhG>Z&2TI7YzCGj7^?Uon(Cw&97}FG zmt+`&x>vC%@)w@%@xZ%FjVm|Jm!Ksw)Ng~Ca^*(OmILr~DUZB|Z4)Ja$4$%jmR$n( zL|k|HWE@BXXQ7k~Wva@8^1g%P1dZbIE>7u|g)Wwzhb+U7%~>Rvtj|JOHih*7o0i2X zItz?p%fw@w9}ASqcH+`MqL&x&XMDX2s@|q6oAK$YPRU>U{&Zk$RvhUBlIvmKGCtj6!cDGsLQSlEMeKUtA`oO>C~$FtTCj1 zamm;ATIm*t%X_I9Ky)IX*q4s(qb1JM-dRv>^qa@*T-Sk>3}olYW``S>PISwmEnq7d z<4mr=sIh>pYB8rtMoFU6Jge?tJUbFM#Mk8?DNS&++0C;07A@=WW;8)bpj4_B=M0qH z7coz#JCj4dheP?4l~wQA4OznMe#E;KIUA44m6D<7rWXWph~Y-x&$G#rD|x028g&j? zSUeQPY~@j|+UYRP)4ub}?E(=QdrSxk=V%u`6Ug}xyEHZ3O zi|U|b`+hh#4%=tSVUZT}Rf*S!<{T8XM2j_oB(I-2R{VQ1SN`dC@F$>S9=!93RrOjN2S!;f`LziEWxaQB?DsEBk38ERYQ)=Ke9bS1>synlNyC= zkij?=_G-XcuDU-795mLGX(^ZF3to39R~c7zh@EMEd`|7$5!u z&0SYGHzt3$h4p18zqT%U$o*N!yF2NpuT$oUwUExF;vucgr_Zwycq@ zlciQ=(^0?P(BRs0&z0S>?9yY*MswtdYDkUVmbiOnQN%kP&J8LFyT0b1nCDl*b zD435n({mufo?lb|R~uP+bJ zDN#(i_Dq76;E&LdP+2TktU;{p=n4e=^AL>>2giVcfq-OvXw3huFU_6YP3SY-iN-#2r&Ho3SH}8ZvIWu{!qpSs?83*$#4#Fj%B2Rm4*r(zTR4_bgm=l z{RPR1tRic!7RQ#NMLgHdi)rggIx1kZ8a~dj#*>Tw`B3nkabpf_OXVUyJ2yL*-}`3& z7NTo{i#}6rNG-PpS9h!MTyTn(xe<~~6}l!AYHTUAHk3#fth<CC zSLF0Q6ohfR(b8FfEWD%^@--+`-N3#P0>4*VN0OMm0nPz%XZxhP%#*h0wKjX-`{m}8QO*ND31+>JAZZOq~&H-npZv&(V((5%qAq6ivSErf2f=XXe zYX!S`S{73-QW$JQgGwUtqR*+~mQAje#%wJ{8KN1K+&+~x6AO3C&OzEe>ppfm{0Yl_ zERyEsI4TNnVH;y$^s=sqX48o#*A!D5qxt8v=!Xmq5@zn1HtkB)Wc3wOdovjhv5A}# zWdL(2OUw@ZaSJH3(Cd@SZ9FLcjyO|B$w=c92`O3Y@r1~TWU*={++I$=_(%eR(XVX% zO*boyRlq>Gn_e4oSg|Ojz26iQSC_yrQvEbL9e2x76ZbvK(o?QGY;&wdMg2SuJk0>s zW~sSZq$_ePA;oMuXS+G2he(nE3w!p=7*BFWZ|G3gv5$MChNg2*Ze>=|U%fI!S_nsS z?F?Q6iv$C~;6evw%iqpQeHGVpImn)Xodd~1{4lq|1cEl(=&OwZEDR|8<=~RGU(S+R zjw5H)Z!@=;#P?`W*X?+>?V)-fLHJIBQAS5LaoBfk*WY1PnMgdnKoVgkZp zlQy7v9ktm9pzix;lra^esILNHL7mc%!{KxYOjvfz;mB|*g(Xj{ssqHK?1a_SR#T6C zLW;eOKO^JE;!v8@vnnK@fvy;u4za8pJQ0b-ECG4sS1FJI&@{+b95Wthj%U{7D~!)P zYqNg92itenC)C1_ZkldUBc+K-86AV1&C?nE`x8rIic$$H!@C4SwMSsU_J@)v$3Nr{OkL_d#jC^COGaMkciT+H zyIKT~78k6jmzm1s9(a^k>(#dy`>eQcs7r*Rp~{L|qc)4oyGN?<3OrTBZX&dzQYvRq z$9*OhDqN`}=oY^OXd>>w#Tb_;1(#4_mIc&`=xpwA2;_D zcaw3)$a80zTTW>-Y-kxwnE2k_YD2TUVVE;?^F`(1y5oa)!=<LBVeTTX)e0b(R@58x!1Bh4ABc&{h zjt06CGBSE|lE!TZ?yNUmr#B#-R|a&8eM6XD?CtjlgRDA#oL@u44eiQICcIVa>!%oz z6!7!H4{&6SE`K8`r{gPOA<_8?o-(ZQHKaN~fn*bYe)<9jHxtueTv(8*7Is*wWX5T_ zoFPk_&;>80N?!;&QKq&{_@@uV(zELsLSELeQOM60H@(>?vqRE5DY3?1e$PPTcJzYD z;)#wg9VAF~hYFwPPTrTbT#!Z;Uvo9rLQ7?8)K{*v6dep`gar|lF`;wnCT;WRY?XKQ z(npS=CL7Z6Hp)d%B`5phmP*tex}aZAbm}XUIC^ISO#lRmO2ptY@6F)X7g>p!S57AD zjWPqUuBP6y5Yqgw4sVgxuAzeXJYB;|}zCh>0kqP}`2i{Bzs z(ospWYZ6S|mxN31Yn(3r`jE?R7SFuAt*>4oEn`1vF?qnZH6Hq|Ezv+BLy{}{7VF?G zpV#(EA1ENeBcM}D=?lXnWBxriJ;jJ$_B-p-AZZbFLSy@@09qDnJS4KwcS0B^mB&^X zvTIl0s%|uu@A9#podz@a$ZrD>>3L$^`=Y^*x1q(v2~hY{eTXguHx`UPPoAo2yySZ( zYK8{K?k}qnu=Y%wgJwJxU7JM6JqdYLw+1#I+irW)WOo^$O910_e znBtE^qb%Kpx4Lm;tZ7hSwvTopKsh`hl%>KV|7Dw(`>GBSoJv1yZvjV7|K4Ip#ypT) z-7owG3@HnfXK05(8>S_-lR5~N`^l~2Vt?b`XQpV#4>57>^VGYCLFPI`EmnAY`4V?^ zCGvC@d!C`00S`+@iml`dTG*YVgA_wq4yX|nzaDR9rDCc7Lev3?4ZgQ~^BI2R{d2tB z-d^QN(bq60>!S3kdAX~|!n_Dc5G4EbEABYcQ?-U|MO*pTNNNyv8Do;4UzEyYb3~n3 z(@(XH9ca^F0}XcTs{9EW7wFboY{_yU3a@@jE%9siu3YVq!=ne3|M-E4A+r`Y;9+G! zi1G>z^J;8Xk^v-Z3BS=Q=x)?I_}7VBaE%b8PJHMtZmO;~-76@!>b_e&gB)X^50(L(p^F~z^*jT+N=4$vo6&XUmsGqntCpX91E5Hony&wf}&;CR{ zy{VE~yX|V(aIET^P@{~-pKig2f5i`EWhRhf{=QWr#jqdH~Kqv?v%U zM3<#Jv8WcVBQR2W;F3qgv)59XsoRDVV`!-s3mt9VHU9AX*CYnBkWMfzULOb;ubzmy1%D`~dkU5JHkMO_6L7=<@8KADhVma8Fa&k!xUiM8 zUOY{#j0VgQo@yYFqMGO$t@%1JReLSGxjVDYjxrcx=9j+V8{AM_^wxlVmxt`p86638PMbk%{?MV3$l&O z^XsDalHkT}WH(ftb*Es;F9D`yKl7cF0cpM`Mn|+eml>0o%FcTF0TSFYW+Xd|6xKXC z;s5g!r%@I~W*=T69P5A4Gu#I~D_B^WI9dQcj6`fopJUM1`M4&$Yva81>E-e_7lY#WrDbV!nNP?@ZQsKm<)ZaNDMB^7=SHDJ&qe5YXHVW zqY#pHaRcg_p&gN^cgG;O7P5jSHg(&b^#Rm872DV|igmSFMYBWpdU(QqZy-3yESfqN zvT?oZG%JrfTdEw26v%|J?RWn>(0nQh%l%Va@q8KO3i4618VbB+L?-Z{v2H1r4j41W zTqj}&5KAT0mCQMl1CSCp9nLuaF57TA-okb^3IQT{g1S*Ivu4*7#1)#0?}qyYMDHaY zk?VXy%Rx6Uv#z7WyFaQ!Ytg$L<$_#&aho~2qvs2@w|8O8&9p7Lb_m3@4Hq^`KVlp5 z?zg85j}J3?_{3nD3MH3UGtnRF(Qcwv$BJ*8p<4bZRXUDLLTBW#NlppQmsigu7h#*4 z;9(M8lV_UHV?A12{45Kt_)1|!IK}R#=A_=h%jF1N`ILmxA##fBl3KJn@w+dRw)z@u z0Wj10jDARRwS{X%=O1NxYV?3@-c4Lu+nz9Yybu4U@Tzbc{U`(l0eOb_U&#KMM7Ubm zyNj3sY&=W=HcpNo0GA}PDgjwg!mQ{Hga~rbXvN1R31f_nO-E;EY$y9vZKbTJL&@Iw zTGUD$QP9vvlhy@#e7A7RHWR@69k-#&BzIC~R;bqOTi5K-Pf60CoREubo{a7z(LEow{J&fBKYg7)1M~lkeAF%6{=7{&Dr>SpVHAJP=|CLQzT)b13JC~C z?ZN0FRpRGaO=3;Nrzr!=L5BRE5=FPg7S}E|rE)5t-6}wibK6 zKbj$M8xgCC%5g;{I5TU1pHZS!Ll+Tl=}8sfsVfI>57hrfUpA&Euv{JC&Z_`-!41O$ zWAGj#knbw+i)OiGdaJbonHt4-zTaC#!TPB@iFvXE_M|!`B^9Q0sk5p%^=eewPP2cc zwf85TWDmZr&YsX2zaip;he!T^l+)Q|)A)PG>UQc>5W zd+5ET@Yms&E3V6Pu#;a3U30sTn#!<>ir$g6^ujQe zTp0WA)i$h6r*CkH7N&}9Sx#BPTctU!`r8J95GN1za=$`%6b!`8Au7%K3un*wQoQ~h zpn}zUR&kQzS|PWdpxJoX=VR7c_sSbXW&~yWY;G!%ud`(E`g21p%u!Z95+P;$TnZ~w1;U8+XN8!Pj}T>U1XN(ds66CFLI4w*oW$eV!Wi;Pr3YB^suE*y zb7C_7gHOSF%Vb}Syh3X zt6QNb3ugq;TqqIq3S+Oww$6A)>XgRwEVgPXV{oiTa}=}g1I%s>E1JvhIv8CxVg;n5 zH|$boBr^JFUrDW3rb7?j!~Vha#(r@!=>yX*{}a9m1+;7Xkv= zMsjSZX5v2bz0Js?i*`BzS1_nz%4tCl+>;~m~!||vWU+Gi5cJUBrmqfL(_4rC9qv=9&Gu830ZdGB{Wl~c?Wbk$j z`5HuXgn#6H8cj{JO`sHK&15RU#cp#~fvVUqflwZGfp;_3~#WTRR!NRmnI_L>3onvNGUxh<$w#hP8mUOeF!_N&* zYnh#=vS#p`G&QGFa~ebg4OC)K9>pz% zii$>JGLj;ytSld#+pOu}w9Ae1wl4b{8S8hw%xO49lK^bo!iuirR?mQ;jPMDTo6poz>>niU}QE7T=73ACC_fH6vZ z<*hIs*KBN(xKa7ew|YP6;{FFp9IW0Dx-RFCOf1Gx+>9Zz+{x0#StfHu?=dX;nKAA? z?*UauMw`JUEmlZtjT1;~Ed-(Y@sTZ<-_7-nsG7o&C`DfUKiVLxhUfT`wv083aW^_l z>0*fXE(v1j1g*cU9pemvdx45L18|0dw<+YqAf&)A8==0XA3FDaSJpkrL->yODuR|M zI#qE@-#h4IMC2&EEKi~4LyAR52w<}drORc=V`QP@U=PhRk0%JO36q@ycqUGs#8wcBh1M6ei!^`z*bjWF-~BSVvRUoYP0B zZXE^&R1XDbhKXsm#<$4H;>%E2gE39REzTgY)$6phx6Xd3KU#L{Kc&lZfoP9*K~`X+ zW$4}yizs(gqxEuxz^6G`vA)Y5aZWNjVNrG#+R>A1<)>SnCjsU(x~~d@Ft<$plD0Hg zeW--RHpA2r6{#>0AV_AKy1YOjZBJ}|HL3@@Py};BhKun;VfT*032BVT8m(*RX-i^f zSia;>4nsm!n4GVv*X>p?sDaSxggMcjG~O9LXh~-5$*=F0GbTJ{-*mr>Y*+%|e&hT~ zYfdi8oMmhnUy0%A!(v~u?t?47LJ^1NMDEqw~8Um zw{c&^iC(Vk^x5C3Qt?6Ky02S3r(1P|c}T{P;l#PDydfOfu%IQufK@z=`D&Mj;0s2Nnj_U?=1X6Sn3p14wWN)aUx`oXqs?8y&Xvr*PH7mygokhrjPn*{Dif_&6P4J_1bXPe+cf!9#NnJ z!p!b<049X}BjDk9=GPKAYw-y`6E#pAVnj=d5NC%NPiO>c*nwQ;jC)&K$RmX5Y`Il9 zraiEip!?X~)82re@@VGq*7l^;L&{FJh?CwD<pP_q;x@Rc5{uc)bM56S46`qhEx6#sBdpbIV}?YoAR0dcU4CG+}BY z;dNouqr$dduK>3h`%I-f)uXb&6Rvt#DqWY}aII5gPkb2$f z$~R$JP`&1BO%TnTlB%Z4$mv}xBrJsXL#S-O-@O35pxM#>{e$tKVIa6=7Jwh}o*dy1 z1A>BKfc)pO=3g%&{kg39_xR`4&3~@`>#Xt*gZ?K3Kg{Cab?gV9{&V?Xr&NCm>VHB9 z@xP2W|GC9q`-Xo%K`!z?EA;>k%m1o5|DH5f;(zA+Uz+5fTl`hx{1K=B2}sg^+}HgtJN&h% u{{0XuWdBV2f7#-%Pvzg!P(ks3)M8~h$dC3g2nhVgr~Si1Rnh+W_5T2#Luu^* From 2e71f6d10a6bfe2e8edc2d4f9fdcec688148aaca Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Thu, 15 Feb 2018 16:05:19 -0800 Subject: [PATCH 10/22] Delete .dnnBatchObjFctn.jl.swp --- src/optimization/.dnnBatchObjFctn.jl.swp | Bin 1024 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/optimization/.dnnBatchObjFctn.jl.swp diff --git a/src/optimization/.dnnBatchObjFctn.jl.swp b/src/optimization/.dnnBatchObjFctn.jl.swp deleted file mode 100644 index 5b8b453a39133311739b11ea89142be426a106c5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1024 zcmYc?$V<%2S1{5u)iY*50?Mik47vHGxtS%2NTS%86(vQ9$tAkQ$wi3;so0f(MfEfC zb5r%9rt0fumF8q7>X#Ylnd$qcrYGj5mgpB3CF|!Glw{^+Rwb5X=I80B Date: Thu, 15 Feb 2018 16:05:27 -0800 Subject: [PATCH 11/22] Delete dnnBatchObjFctn.jl.save --- src/optimization/dnnBatchObjFctn.jl.save | 55 ------------------------ 1 file changed, 55 deletions(-) delete mode 100644 src/optimization/dnnBatchObjFctn.jl.save diff --git a/src/optimization/dnnBatchObjFctn.jl.save b/src/optimization/dnnBatchObjFctn.jl.save deleted file mode 100644 index 5d14a8c..0000000 --- a/src/optimization/dnnBatchObjFctn.jl.save +++ /dev/null @@ -1,55 +0,0 @@ - - -export dnnObjFctn, evalObjFctn - -""" -objective function for deep neural networks - -J(theta,C) = loss(h(W*Y(theta)), C) + Rtheta(theta) + R(W) - -""" -mutable struct dnnObjFctn - net :: AbstractMeganetElement # network param (including data) - pLoss # loss function - pRegTheta # regularizer for network parameters - pRegW # regularizer for classifier - dnnObjFctn(net,pLoss,pRegTheta,pRegW) = - new(net,pLoss,pRegTheta,pRegW) - end - -splitWeights(this::dnnObjFctn,x) = (return x[1:nTheta(this.net)], x[nTheta(this.net)+1:end]) - -function getMisfit(this::dnnObjFctn,thetaW::Vector{T},Y::Array{T},C::Array{T},doDerivative=true) where {T} - theta,W = splitWeights(this,thetaW) - return getMisfit(this,theta,W,Y,C,doDerivative) -end - -function getMisfit(this::dnnObjFctn,theta::Array{T},W::Array{T},Y::Array{T},C::Array{T},doDerivative=true) where {T} - - YN,dummy,tmp = apply(this.net,theta,Y,doDerivative) - - Fc,hisF,dWF,d2WF,dYF,d2YF = getMisfit(this.pLoss,W,YN,C,doDerivative,doDerivative) - - if doDerivative - dYF = JthetaTmv(this.net,dYF,zeros(T,0),theta,Y,tmp) - end - return Fc,hisF,vec(dYF),vec(dWF) -end - -function evalObjFctn(this::dnnObjFctn,thetaW::Array{T},Y::Array{T},C::Array{T},doDerivative=true) where {T} - theta,W = splitWeights(this,thetaW) - - # compute misfit - Fc,hisF,dFth,dFW = getMisfit(this,theta,W,Y,C,doDerivative) - - # regularizer for weights - Rth,dRth, = regularizer(this.pRegTheta,theta) - - # regularizer for classifier - RW,dRW, = regularizer(this.pRegW,W) - - Jc = Fc + Rth + RW - dJ = [dFth+dRth; dFW+dRW] - - return convert(T,Jc),hisF,convert(Array{T},dJ) -end From 98a66469c93b9ef90565d3deab06408d4bc25d0a Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Thu, 15 Feb 2018 17:12:01 -0800 Subject: [PATCH 12/22] Update REQUIRE --- REQUIRE | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/REQUIRE b/REQUIRE index 13fcb27..8628aa9 100644 --- a/REQUIRE +++ b/REQUIRE @@ -3,4 +3,5 @@ LinearOperators MAT PkgDev JLD -BenchmarkTools \ No newline at end of file +BenchmarkTools +DistriburedArrays From 5560a8af4bb7938ecd7d338e005b2cc57cb2aa4e Mon Sep 17 00:00:00 2001 From: moumitaTora Date: Thu, 15 Feb 2018 17:23:23 -0800 Subject: [PATCH 13/22] Update REQUIRE --- REQUIRE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/REQUIRE b/REQUIRE index 8628aa9..ed5812b 100644 --- a/REQUIRE +++ b/REQUIRE @@ -4,4 +4,4 @@ MAT PkgDev JLD BenchmarkTools -DistriburedArrays +DistributedArrays From a6f0a736962b378d674aaa7961e024da30c25e49 Mon Sep 17 00:00:00 2001 From: davidbegert Date: Fri, 16 Feb 2018 14:06:07 -0800 Subject: [PATCH 14/22] first pass of reusing tmp --- src/activations/identityActivation.jl | 12 +++++---- src/activations/reluActivation.jl | 35 ++++++++++++++------------- src/activations/tanhActivation.jl | 16 ++++++++++-- src/integrators/NN.jl | 21 ++++++++++++---- src/integrators/ResNN.jl | 26 +++++++++++++++----- src/integrators/batchNormNN.jl | 26 +++++++++++++------- src/integrators/connector.jl | 18 ++++++++------ src/layers/affineScalingLayer.jl | 2 +- src/layers/doubleSymLayer.jl | 34 ++++++++++++++++---------- src/layers/normLayer.jl | 4 +-- src/layers/singleLayer.jl | 20 ++++++--------- src/optimization/sgd.jl | 10 ++++---- 12 files changed, 139 insertions(+), 85 deletions(-) diff --git a/src/activations/identityActivation.jl b/src/activations/identityActivation.jl index 0c11e88..c84edcc 100644 --- a/src/activations/identityActivation.jl +++ b/src/activations/identityActivation.jl @@ -17,13 +17,15 @@ export identityActivation A - activation dA - derivatives """ -function identityActivation(Y::Array{T},doDerivative::Bool=false) where {T} +function identityActivation(Y::Array{T},dA,doDerivative::Bool=false) where {T} if doDerivative - dA = ones(T,Y); -else - dA = zeros(T,0) + if isempty(dA) + dA = ones(T,Y); + else + dA .= ones(T,Y); + end end -return A,dA +return A,dA #Depricated? A Isnt even declared lol end diff --git a/src/activations/reluActivation.jl b/src/activations/reluActivation.jl index fe676e6..680c19f 100644 --- a/src/activations/reluActivation.jl +++ b/src/activations/reluActivation.jl @@ -1,4 +1,4 @@ -export reluActivation +export reluActivation, reluActivation! """ relu activation A = relu(Y) @@ -19,26 +19,27 @@ export reluActivation """ function reluActivation(Y::Array{T},doDerivative::Bool=false) where {T} -A = max.(Y,zero(T)); + A = max.(Y,zero(T)); -if doDerivative - dA = sign.(A); -else - dA = zeros(T,0) -end + if doDerivative + dA = sign.(A); + else + dA = zeros(T,0) + end -return A,dA + return A,dA end -function reluActivation!(A::Array{T},dA::Array{T} = zeros(T,size(A)),doDerivative::Bool=false) where {T} -A .= max.(A,zero(T)); -if doDerivative - dA .= sign.(A); -else - dA = zeros(T,0) -end - -return A,dA +function reluActivation!(A::Array{T},dA,doDerivative::Bool=false) where {T} + A .= max.(A,zero(T)); + if doDerivative + if isempty(dA) + dA = sign.(A); + else + dA .= sign.(A); + end + end + return A,dA end diff --git a/src/activations/tanhActivation.jl b/src/activations/tanhActivation.jl index 06af3de..8778eea 100644 --- a/src/activations/tanhActivation.jl +++ b/src/activations/tanhActivation.jl @@ -1,4 +1,4 @@ -export tanhActivation +export tanhActivation, tanhActivation! """ hyperbolic tan activation A = tanh(Y) @@ -19,7 +19,6 @@ export tanhActivation """ function tanhActivation(Y::Array{T,2},doDerivative::Bool=false) where {T <: Number} - A = tanh.(Y) dA = zeros(A) if doDerivative @@ -27,3 +26,16 @@ function tanhActivation(Y::Array{T,2},doDerivative::Bool=false) where {T <: Numb end return A, dA end + +function tanhActivation!(A::Array{T,2},dA,doDerivative::Bool=false) where {T <: Number} + + A .= tanh.(A) + if doDerivative + if isempty(dA) + dA = one(T) .- A.^2 + else + dA .= one(T) .- A.^2 + end + end + return A, dA +end diff --git a/src/integrators/NN.jl b/src/integrators/NN.jl index 0d55786..65efde4 100644 --- a/src/integrators/NN.jl +++ b/src/integrators/NN.jl @@ -63,17 +63,23 @@ end # --------- forward problem ---------- -function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},tmp,doDerivative=true) where {T<:Number} - Y::Array{T,2} = copy(Y0) +function apply(this::NN{T},theta::Array{T},Y::Array{T,2},tmp,doDerivative=true) where {T<:Number} + nex = div(length(Y),nFeatIn(this))::Int nt = length(this.layers) - if isempty(tmp) + if isempty(tmp) #TODO Will have to make sure size of Y doesnt change tmp = Array{Any}(nt+1,2) end if doDerivative - tmp[1,1] = copy(Y0) + if isassigned(tmp,1,1) + #tmp[1,1] .= Y This does not work, need to hack like below :) + tmp11 = tmp[1,1] + tmp11 .= Y + else + tmp[1,1] = copy(Y) + end end Ydata::Array{T,2} = zeros(T,0,nex) @@ -89,7 +95,12 @@ function apply(this::NN{T},theta::Array{T},Y0::Array{T,2},tmp,doDerivative=true) Ydata = [Ydata; this.Q*Yd] end if doDerivative - tmp[i+1,1] = copy(Y) + if isassigned(tmp,i+1,1) + tmp1 = tmp[i+1,1] + tmp1 .= Y + else + tmp[i+1,1] = copy(Y) + end end cnt = cnt + ni end diff --git a/src/integrators/ResNN.jl b/src/integrators/ResNN.jl index c98ed06..63a199f 100644 --- a/src/integrators/ResNN.jl +++ b/src/integrators/ResNN.jl @@ -52,13 +52,20 @@ function apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},tmp,doDerivative= if isempty(tmp) tmp = Array{Any}(this.nt+1,2) end - nex = div(length(Y0),nFeatIn(this)) - Y = reshape(Y0,:,nex) if doDerivative - tmp[1,1] = Y0 + if isassigned(tmp,1,1) + tmp11 = tmp[1,1] + tmp11 .= Y0 + else + tmp[1,1] = copy(Y0) + end end + nex = div(length(Y0),nFeatIn(this)) + Y = reshape(Y0,:,nex) + + theta = reshape(theta_in,:,this.nt) Ydata::Array{T,2} = zeros(T,0,nex) @@ -68,12 +75,19 @@ function apply(this::ResNN{T},theta_in::Array{T},Y0::Array{T},tmp,doDerivative= end Z,dummy,tmp[i,2] = apply(this.layer,theta[:,i],Y,tmp[i,2],doDerivative) Y += this.h * Z - if doDerivative - tmp[i+1,1] = Y - end if this.outTimes[i]==1 Ydata = [Ydata;this.Q*Y] end + + if doDerivative + if isassigned(tmp,i+1,1) + tmp1 = tmp[i+1,1] + tmp1 .= Y + else + tmp[i+1,1] = copy(Y) + end + end + end return Ydata,Y,tmp end diff --git a/src/integrators/batchNormNN.jl b/src/integrators/batchNormNN.jl index cdf237a..251fc08 100644 --- a/src/integrators/batchNormNN.jl +++ b/src/integrators/batchNormNN.jl @@ -53,15 +53,21 @@ end # --------- forward problem ---------- -function apply(this::batchNormNN{T},theta::Array{T},Y0::Array{T,2},doDerivative=true) where {T<:Number} - Y::Array{T,2} = copy(Y0) +function apply(this::batchNormNN{T},theta::Array{T},Y::Array{T,2},tmp::Array{Any},doDerivative=true) where {T<:Number} nex = div(length(Y),nFeatIn(this))::Int nt = length(this.layers) - # tmp = Array{Array{T,2},2}(nt+1,2) - tmp = Array{Any}(nt+1,2) + if isempty(tmp) #TODO Will have to make sure size of Y doesnt change + tmp = Array{Any}(nt+1,2) + end + if doDerivative - tmp[1,1] = Y0 + if isassigned(tmp,1,1) + tmp11 = tmp[1,1] + tmp11 .= Y + else + tmp[1,1] = copy(Y) + end end Ydata::Array{T,2} = zeros(T,0,nex) @@ -74,14 +80,16 @@ function apply(this::batchNormNN{T},theta::Array{T},Y0::Array{T,2},doDerivative= Ydata = [Ydata; this.Q*Yd] end if doDerivative - tmp[i+1,1] = copy(Y) + if isassigned(tmp,i+1,1) + tmp1 = tmp[i+1,1] + tmp1 .= Y + else + tmp[i+1,1] = copy(Y) + end end cnt = cnt + ni end - #To Keep typing consistent - # tmp[3,2] = Array{T,2}(0,0) - return Ydata,Y,tmp end diff --git a/src/integrators/connector.jl b/src/integrators/connector.jl index a22c4da..fda7867 100644 --- a/src/integrators/connector.jl +++ b/src/integrators/connector.jl @@ -17,20 +17,24 @@ function getConnector(TYPE::Type, K; b = zero(TYPE),outTimes=0,Q=I) return Connector(K,b,outTimes,Q); end - function apply(this::Connector{T},theta::Array{T},Y0::Array{T},tmp,doDerivative=true) where {T <: Number} - # if isempty(tmp) - #Potentially no need for this here - # end - nex = div(length(Y0),nFeatIn(this)) Y0 = reshape(Y0,:,nex) - Y = this.K*Y0 .+ this.b + + if doDerivative + if isempty(tmp) + tmp = copy(Y0) + else + tmp .= Y0 + end + end + + Y = this.K*Y0 .+ this.b # TODO: Should be able to do this in place Ydata::Array{T,2} = Array{T, 2}(0, 0) # Temporary fix until we know what type Q is if this.outTimes==1 Ydata = this.Q*Y end - tmp = Y0; + return Ydata, Y, tmp end diff --git a/src/layers/affineScalingLayer.jl b/src/layers/affineScalingLayer.jl index 4df7e34..36a6fca 100644 --- a/src/layers/affineScalingLayer.jl +++ b/src/layers/affineScalingLayer.jl @@ -27,7 +27,7 @@ function scaleChannels!(Y::Array{T},s::Array{T},b::Array{T}) where {T <: Number} end end -function apply(this::AffineScalingLayer{T},theta::Array{T},Y::Array{T},doDerivative=false) where {T <: Number} +function apply(this::AffineScalingLayer{T},theta::Array{T},Y::Array{T},dA,doDerivative=false) where {T <: Number} Y = reshape(copy(Y),this.nData[1], this.nData[2],:) dA = Array{T,2}(0,0) diff --git a/src/layers/doubleSymLayer.jl b/src/layers/doubleSymLayer.jl index 3b7490d..20b9317 100644 --- a/src/layers/doubleSymLayer.jl +++ b/src/layers/doubleSymLayer.jl @@ -16,7 +16,7 @@ end function getDoubleSymLayer(TYPE::Type,K,nLayer::AbstractMeganetElement{T}; Bin=zeros(nFeatOut(K),0),Bout=zeros(nFeatIn(K),0), - activation=tanhActivation) where {T <: Number} + activation=tanhActivation!) where {T <: Number} BinT = convert.(T, Bin) BoutT = convert.(T, Bout) return DoubleSymLayer(activation,K,nLayer,BinT,BoutT); @@ -40,6 +40,8 @@ end function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},tmp,doDerivative=true) where {T<:Number} if isempty(tmp) tmp = Array{Any}(2) + tmp[1] = Array{Any}(0,0) + tmp[2] = Array{Any}(0,0) end #QZ = [] nex = div(length(Yin),nFeatIn(this))::Int @@ -47,18 +49,24 @@ function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},tmp,doDer theta1,theta2,theta3,theta4 = splitWeights(this,theta) Kop = getOp(this.K,theta1) - KY = Kop*Y + KY = Kop*Y # TODO: Look into making convolution in place - #TODO: check is assigned and pass in tmp[1] - KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY,doDerivative) + KY,dummy,tmp[1] = apply(this.nLayer,theta4,KY,tmp[1],doDerivative) Yt = KY if !isempty(theta2) Yt .+= this.Bin*theta2 end - tmp[2] = copy(Yt) - #TODO: check is assigned and pass in tmp[1] wait why do we not update tmp[2]? - Z::Array{T,2}, = this.activation(Yt,doDerivative) + if doDerivative + if isempty(tmp[2]) + tmp[2] = copy(Yt) + else + tmp2 = tmp[2] + tmp2 .= Yt + end + end + + Z::Array{T,2}, = this.activation(Yt,[],false) #We don't want to do derivatives here? Z = -(Kop'*Z) if !isempty(theta3) Z .+= this.Bout*theta3 @@ -92,7 +100,7 @@ end function Jthetamv(this::DoubleSymLayer{T},dtheta::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - A,dA = this.activation(tmp[2],true) + A,dA = this.activation(tmp[2],[],true) th1, th2,th3,th4 = splitWeights(this,theta) dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) @@ -109,7 +117,7 @@ end function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - dA = this.activation(tmp[2],true)[2] + dA = this.activation(tmp[2],[],true)[2] nex = div(length(dY),nFeatIn(this)) dY = reshape(dY,:,nex) @@ -124,7 +132,7 @@ function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},t end function Jmv(this::DoubleSymLayer{T},dtheta::Array{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - A,dA = this.activation(copy(tmp[2]),true) + A,dA = this.activation(copy(tmp[2]),[],true) nex = div(length(Y),nFeatIn(this)) th1, th2,th3,th4 = splitWeights(this,theta) @@ -161,7 +169,7 @@ function JthetaTmv(this::DoubleSymLayer{T},Z::Array{T},dummy::Array{T},theta::Ar Z = reshape(Z,:,nex) th1,th2,th3,th4 = splitWeights(this,theta) Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],true) + A,dA = this.activation(tmp[2],[],true) dth3 = vec(sum(this.Bout'*Z,2)) dAZ = dA.*(Kop*Z) @@ -181,7 +189,7 @@ function JYTmv(this::DoubleSymLayer{T},Zin::Array{T},dummy::Array{T},theta::Arra Z = reshape(Zin,:,nex) th1,th2,th3,th4 = splitWeights(this,theta) Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],true) + A,dA = this.activation(tmp[2],[],true) dAZ = dA.*(Kop*Z) dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y,tmp[1]) @@ -199,7 +207,7 @@ function JTmv(this::DoubleSymLayer{T}, Zin::Array{T}, dummy::Array{T}, Y = reshape(Yin,:,nex) th1, th2, th3, th4 = splitWeights(this,theta) #Kop = getOp(this.K,th1) - A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,true) + A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,[],true) dth3 = vec(sum(this.Bout'*Z,2)) diff --git a/src/layers/normLayer.jl b/src/layers/normLayer.jl index 87a5f78..ae620ef 100644 --- a/src/layers/normLayer.jl +++ b/src/layers/normLayer.jl @@ -35,7 +35,7 @@ function getTVNormLayer(TYPE::Type,nData;eps = convert(TYPE,1e-3),isTrainable::B end end -function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=true) where {T <: Number} +function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},dA,doDerivative=true) where {T <: Number} # first organize Y with channels nf = this.nData[2]::Int @@ -45,7 +45,7 @@ function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},doDerivative=t dA = Array{T,2}(0,0) # subtract mean across pixels - Yout = Y.-mean(Y,this.doNorm) + Yout = Y.-mean(Y,this.doNorm) #TODO: Cant see a reason why this shouldnt be done in place? # normalize S2 = sqrt.(mean(Yout.^2,this.doNorm) + this.eps) diff --git a/src/layers/singleLayer.jl b/src/layers/singleLayer.jl index 8771e26..a86af78 100644 --- a/src/layers/singleLayer.jl +++ b/src/layers/singleLayer.jl @@ -9,7 +9,7 @@ mutable struct singleLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{batchNorm end -function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0),activation=tanhActivation) +function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0),activation=tanhActivation!) singleLayer(activation,K,nLayer,Bin,Bout); end @@ -31,6 +31,8 @@ function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},tmp,doDerivati if isempty(tmp) tmp = Array{Any}(2) + tmp[1] = Array{Any}(0,0) + tmp[2] = Array{Any}(0) end nex = div(length(Yin),nFeatIn(this)) Y = reshape(Yin,:,nex) @@ -38,18 +40,10 @@ function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},tmp,doDerivati Yout::Array{T,2} = getOp(this.K,th1)*Y Yout .+= this.Bin * th2 - # if !isassigned(tmp,1) - # tmp[1] = Array{Any}(0) #TODO get batchNormNN to return a 1D arg maybe? Just think about it - # end - Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,doDerivative) - - # if !isassigned(tmp,2) - # tmp[2] = Array{Any}(0) #TODO We know this one is an array of T - # end - Yout,tmp[2] = this.activation(Yout,doDerivative) - # println(typeof(tmp[2])) - # println(typeof(tmp[1])) - # error("check activation tmp") ## These two are different meaning it is difficult to type tmp + Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,tmp[1],doDerivative) + + Yout,tmp[2] = this.activation(Yout,tmp[2],doDerivative) + Yout .+= this.Bout*th3 Ydata = Yout return Ydata, Yout, tmp diff --git a/src/optimization/sgd.jl b/src/optimization/sgd.jl index 70646fe..625035e 100644 --- a/src/optimization/sgd.jl +++ b/src/optimization/sgd.jl @@ -75,13 +75,13 @@ function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array end # we sample 2^12 images from the training set for displaying the objective. idt = ids[1:min(nex,2^12)] - Jc,para = evalObjFctn(objFun,xc,Y[:,idt],C[:,idt],tmp); #TODO: Do we really wanna do this?? + # Jc,para = evalObjFctn(objFun,xc,Y[:,idt],C[:,idt],tmp); #TODO: Do we really wanna do this?? Jval,pVal = getMisfit(objFun,xc,Yv,Cv,tmp,false); - if this.out; - @printf "%d\t%1.2e\t%1.2f\t%1.2e\t%1.2e\t%1.2f\n" epoch Jc 100*(1-para[3]/para[2]) norm(xOld-xc) Jval 100*(1-pVal[3]/pVal[2]) - end - # println("Done one") + # if this.out; + # @printf "%d\t%1.2e\t%1.2f\t%1.2e\t%1.2e\t%1.2f\n" epoch Jc 100*(1-para[3]/para[2]) norm(xOld-xc) Jval 100*(1-pVal[3]/pVal[2]) + # end + println("Val accuracy: ", 100*(1-pVal[3]/pVal[2])) xOld = copy(xc); epoch = epoch + 1; end From 0ea2585a2c9a073e5c6069a2875c4c5bb35a3ef6 Mon Sep 17 00:00:00 2001 From: davidbegert Date: Mon, 19 Feb 2018 11:45:00 -0800 Subject: [PATCH 15/22] seperated in place and not in place activations --- src/layers/doubleSymLayer.jl | 25 +++++++++++++------------ src/layers/singleLayer.jl | 18 ++++++++++-------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/layers/doubleSymLayer.jl b/src/layers/doubleSymLayer.jl index 20b9317..326312f 100644 --- a/src/layers/doubleSymLayer.jl +++ b/src/layers/doubleSymLayer.jl @@ -7,6 +7,7 @@ export DoubleSymLayer,getDoubleSymLayer """ mutable struct DoubleSymLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{batchNormNN{T}, normLayer{T}}} <: AbstractMeganetElement{T} activation :: Function # activation function + activation! :: Function # in place activation function K :: TK # Kernel model, e.g., convMod nLayer :: TN # normalization layer Bin :: Array{T} # Bias inside the nonlinearity @@ -16,10 +17,10 @@ end function getDoubleSymLayer(TYPE::Type,K,nLayer::AbstractMeganetElement{T}; Bin=zeros(nFeatOut(K),0),Bout=zeros(nFeatIn(K),0), - activation=tanhActivation!) where {T <: Number} + activation=tanhActivation,activation_inplace=tanhActivation!) where {T <: Number} BinT = convert.(T, Bin) BoutT = convert.(T, Bout) - return DoubleSymLayer(activation,K,nLayer,BinT,BoutT); + return DoubleSymLayer(activation,activation_inplace,K,nLayer,BinT,BoutT); end @@ -40,8 +41,8 @@ end function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},tmp,doDerivative=true) where {T<:Number} if isempty(tmp) tmp = Array{Any}(2) - tmp[1] = Array{Any}(0,0) - tmp[2] = Array{Any}(0,0) + tmp[1] = Array{Any}(0) + tmp[2] = Array{Any}(0) end #QZ = [] nex = div(length(Yin),nFeatIn(this))::Int @@ -66,7 +67,7 @@ function apply(this::DoubleSymLayer{T},theta::Array{T},Yin::Array{T,2},tmp,doDer end end - Z::Array{T,2}, = this.activation(Yt,[],false) #We don't want to do derivatives here? + Z::Array{T,2}, = this.activation!(Yt,[],false) #We don't want to do derivatives here? Z = -(Kop'*Z) if !isempty(theta3) Z .+= this.Bout*theta3 @@ -100,7 +101,7 @@ end function Jthetamv(this::DoubleSymLayer{T},dtheta::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - A,dA = this.activation(tmp[2],[],true) + A,dA = this.activation(tmp[2],true) th1, th2,th3,th4 = splitWeights(this,theta) dth1,dth2,dth3,dth4 = splitWeights(this,dtheta) @@ -116,8 +117,8 @@ function Jthetamv(this::DoubleSymLayer{T},dtheta::Array{T},theta::Array{T},Y::Ar end function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - - dA = this.activation(tmp[2],[],true)[2] + #TODO: Look into why this activation cannot be done in place (tests fail) + dA = this.activation(tmp[2],true)[2] nex = div(length(dY),nFeatIn(this)) dY = reshape(dY,:,nex) @@ -132,7 +133,7 @@ function JYmv(this::DoubleSymLayer{T},dY::Array{T},theta::Array{T},Y::Array{T},t end function Jmv(this::DoubleSymLayer{T},dtheta::Array{T},dY::Array{T},theta::Array{T},Y::Array{T},tmp) where {T<:Number} - A,dA = this.activation(copy(tmp[2]),[],true) + A,dA = this.activation(copy(tmp[2]),true) nex = div(length(Y),nFeatIn(this)) th1, th2,th3,th4 = splitWeights(this,theta) @@ -169,7 +170,7 @@ function JthetaTmv(this::DoubleSymLayer{T},Z::Array{T},dummy::Array{T},theta::Ar Z = reshape(Z,:,nex) th1,th2,th3,th4 = splitWeights(this,theta) Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],[],true) + A,dA = this.activation(tmp[2],true) dth3 = vec(sum(this.Bout'*Z,2)) dAZ = dA.*(Kop*Z) @@ -189,7 +190,7 @@ function JYTmv(this::DoubleSymLayer{T},Zin::Array{T},dummy::Array{T},theta::Arra Z = reshape(Zin,:,nex) th1,th2,th3,th4 = splitWeights(this,theta) Kop = getOp(this.K,th1) - A,dA = this.activation(tmp[2],[],true) + A,dA = this.activation(tmp[2],true) dAZ = dA.*(Kop*Z) dAZ = JYTmv(this.nLayer,dAZ,(T)[],th4,Kop*Y,tmp[1]) @@ -207,7 +208,7 @@ function JTmv(this::DoubleSymLayer{T}, Zin::Array{T}, dummy::Array{T}, Y = reshape(Yin,:,nex) th1, th2, th3, th4 = splitWeights(this,theta) #Kop = getOp(this.K,th1) - A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,[],true) + A::Array{T,2}, dA::Array{T,2} = this.activation(Yt,true) dth3 = vec(sum(this.Bout'*Z,2)) diff --git a/src/layers/singleLayer.jl b/src/layers/singleLayer.jl index a86af78..04d1d22 100644 --- a/src/layers/singleLayer.jl +++ b/src/layers/singleLayer.jl @@ -1,16 +1,18 @@ export singleLayer,getSingleLayer mutable struct singleLayer{T, TK <: AbstractConvKernel{T}, TN <: Union{batchNormNN{T}, normLayer{T}}} <: AbstractMeganetElement{T} - activation :: Function # activation function - K :: TK # transformation type - nLayer :: TN # normalization layer - Bin :: Array{T} # bias inside nonlinearity - Bout :: Array{T} # bias outside nonlinearity + activation :: Function # activation function + activation! :: Function # in place activation function + K :: TK # transformation type + nLayer :: TN # normalization layer + Bin :: Array{T} # bias inside nonlinearity + Bout :: Array{T} # bias outside nonlinearity end -function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0),activation=tanhActivation!) - singleLayer(activation,K,nLayer,Bin,Bout); +function getSingleLayer(TYPE::Type, K,nLayer;Bin=zeros(TYPE,nFeatOut(K),0),Bout=zeros(TYPE,nFeatOut(K),0), + activation=tanhActivation,activation_inplace=tanhActivation!) + singleLayer(activation,activation_inplace,K,nLayer,Bin,Bout); end @@ -42,7 +44,7 @@ function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},tmp,doDerivati Yout .+= this.Bin * th2 Yout,dummy,tmp[1] = apply(this.nLayer,th4,Yout,tmp[1],doDerivative) - Yout,tmp[2] = this.activation(Yout,tmp[2],doDerivative) + Yout,tmp[2] = this.activation!(Yout,tmp[2],doDerivative) Yout .+= this.Bout*th3 Ydata = Yout From 2a0a3da80597f73fc5b628f98b0dce68eb3dd301 Mon Sep 17 00:00:00 2001 From: davidbegert Date: Mon, 19 Feb 2018 12:00:58 -0800 Subject: [PATCH 16/22] updated tests for in place tmp function definitions --- src/activations/tanhActivation.jl | 2 +- src/optimization/dnnBatchObjFctn.jl | 10 ++++++---- src/utils/testAbstractMeganetElement.jl | 16 ++++++++-------- test/optimization/dnnObjFctnTest.jl | 6 +++--- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/activations/tanhActivation.jl b/src/activations/tanhActivation.jl index 8778eea..aa491dc 100644 --- a/src/activations/tanhActivation.jl +++ b/src/activations/tanhActivation.jl @@ -27,7 +27,7 @@ function tanhActivation(Y::Array{T,2},doDerivative::Bool=false) where {T <: Numb return A, dA end -function tanhActivation!(A::Array{T,2},dA,doDerivative::Bool=false) where {T <: Number} +function tanhActivation!(A::Array{T,2},dA=[],doDerivative::Bool=false) where {T <: Number} A .= tanh.(A) if doDerivative diff --git a/src/optimization/dnnBatchObjFctn.jl b/src/optimization/dnnBatchObjFctn.jl index 1e1a05e..8653d2d 100644 --- a/src/optimization/dnnBatchObjFctn.jl +++ b/src/optimization/dnnBatchObjFctn.jl @@ -17,12 +17,12 @@ mutable struct dnnObjFctn splitWeights(this::dnnObjFctn,x) = (return x[1:nTheta(this.net)], x[nTheta(this.net)+1:end]) -function getMisfit(this::dnnObjFctn,thetaW::Vector{T},Y::Array{T},C::Array{T},tmp::Array{Any},doDerivative=true) where {T<:Number} +function getMisfit(this::dnnObjFctn,thetaW::Vector{T},Y::Array{T},C::Array{T},tmp::Array,doDerivative=true) where {T<:Number} theta,W = splitWeights(this,thetaW) return getMisfit(this,theta,W,Y,C,tmp,doDerivative) end -function getMisfit(this::dnnObjFctn,theta::Array{T},W::Array{T},Y::Array{T},C::Array{T},tmp::Array{Any},doDerivative=true) where {T<:Number} +function getMisfit(this::dnnObjFctn,theta::Array{T},W::Array{T},Y::Array{T},C::Array{T},tmp::Array,doDerivative=true) where {T<:Number} YN,dummy,tmp = apply(this.net,theta,Y,tmp,doDerivative) @@ -32,12 +32,14 @@ function getMisfit(this::dnnObjFctn,theta::Array{T},W::Array{T},Y::Array{T},C::A Fc,hisF,dWF,d2WF,dYF,d2YF = getMisfit(this.pLoss,W,YN,C,doDerivative,doDerivative) if doDerivative - dYF = JthetaTmv(this.net,dYF,zeros(T,0),theta,Y,tmp) + # tic() + dYF = JthetaTmv(this.net,dYF,zeros(T,0),theta,Y,tmp) + # toc() end return Fc,hisF,vec(dYF),vec(dWF),tmp end -function evalObjFctn(this::dnnObjFctn,thetaW::Array{T},Y::Array{T},C::Array{T},tmp::Array{Any},doDerivative=true) where {T<:Number} +function evalObjFctn(this::dnnObjFctn,thetaW::Array{T},Y::Array{T},C::Array{T},tmp::Array,doDerivative=true) where {T<:Number} theta,W = splitWeights(this,thetaW) # compute misfit diff --git a/src/utils/testAbstractMeganetElement.jl b/src/utils/testAbstractMeganetElement.jl index 8b38a2b..7df1ee4 100644 --- a/src/utils/testAbstractMeganetElement.jl +++ b/src/utils/testAbstractMeganetElement.jl @@ -10,7 +10,7 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false theta .+= .1 # To test if Y changes for affineScalingLayer Y = randn(T,nFeatIn(L),nex) Yo = copy(Y) - Zd,Z,tmp = apply(L,theta,Y,true) + Zd,Z,tmp = apply(L,theta,Y,[],true) @test norm(Y-Yo)/norm(Yo) < 1e4*eps(T) dY = randn(T,nFeatIn(L),nex) @@ -42,7 +42,7 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false @testset "apply without derivatives" begin theta = initTheta(L) Y = randn(T,nFeatIn(L),nex) - Z = apply(L,theta,Y,false) + Z = apply(L,theta,Y,[],false) end @@ -53,10 +53,10 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false function testFun(x,v=[]) if !(isempty(v)) - Z = apply(L,theta,x,true) + Z = apply(L,theta,x,[],true) return Z[2], reshape(JYmv(L,v,theta,x,Z[3])[2],size(Z[2])) else - return apply(L,theta,x)[2] + return apply(L,theta,x,[])[2] end end chkDer, = checkDerivative(testFun,copy(Y),out=out) @@ -69,7 +69,7 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false dY = randn(T,nFeatIn(L),nex) Z = randn(T,nFeatOut(L),nex) - tmp = apply(L,theta,Y,true) + tmp = apply(L,theta,copy(Y),[],true) Z1 = JYmv(L,copy(dY),theta,copy(Y),tmp[3])[2] Z2 = JYTmv(L,copy(Z),(T)[],theta,copy(Y),tmp[3]) @@ -86,10 +86,10 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false function testFunTh(x,v=[]) if !(isempty(v)) - Z = apply(L,x,copy(Y),true) + Z = apply(L,x,copy(Y),[],true) return Z[2], reshape(Jthetamv(L,v,x,copy(Y),Z[3])[2],size(Z[2])) else - return apply(L,x,copy(Y))[2] + return apply(L,x,copy(Y),[])[2] end end chkDer, = checkDerivative(testFunTh,copy(theta),out=out) @@ -102,7 +102,7 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false dtheta = randn(T,nTheta(L)) Z = randn(T,nFeatOut(L),nex) - tmp = apply(L,theta,copy(Y),true) + tmp = apply(L,theta,copy(Y),[],true) Z1 = Jthetamv(L,copy(dtheta),copy(theta),copy(Y),copy(tmp[3]))[2] Z2 = JthetaTmv(L,copy(Z),(T)[],theta,copy(Y),tmp[3]) diff --git a/test/optimization/dnnObjFctnTest.jl b/test/optimization/dnnObjFctnTest.jl index d41853f..0c96e12 100644 --- a/test/optimization/dnnObjFctnTest.jl +++ b/test/optimization/dnnObjFctnTest.jl @@ -31,7 +31,7 @@ objFun = dnnObjFctn(net,pLoss,pRegTh,pRegW) @testset "dThLoss $TYPE" begin function testdThLoss(x,v=nothing) - F,his,dF, = getMisfit(objFun,x,W,Y,C,true) + F,his,dF, = getMisfit(objFun,x,W,Y,C,[],true) if v!==nothing return F,dot(dF,v) else @@ -44,7 +44,7 @@ end @testset "dWLoss $TYPE" begin function testdWLoss(x,v=nothing) - F,his,dFth,dF = getMisfit(objFun,theta,x,Y,C,true) + F,his,dFth,dF = getMisfit(objFun,theta,x,Y,C,[],true) if v!==nothing return F,dot(dF,v) else @@ -57,7 +57,7 @@ end @testset "dJ $TYPE" begin function testdJ(x,v=nothing) - F,his,dF = evalObjFctn(objFun,x,Y,C,true) + F,his,dF = evalObjFctn(objFun,x,Y,C,[],true) if v!==nothing return F,dot(dF,v) else From e33aaf099baf98c6255e117f30ffd101185208cd Mon Sep 17 00:00:00 2001 From: davidbegert Date: Mon, 19 Feb 2018 12:11:15 -0800 Subject: [PATCH 17/22] sgd no longer learns from the 2^12 examples --- src/optimization/sgd.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/optimization/sgd.jl b/src/optimization/sgd.jl index 625035e..28570ac 100644 --- a/src/optimization/sgd.jl +++ b/src/optimization/sgd.jl @@ -75,13 +75,13 @@ function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array end # we sample 2^12 images from the training set for displaying the objective. idt = ids[1:min(nex,2^12)] - # Jc,para = evalObjFctn(objFun,xc,Y[:,idt],C[:,idt],tmp); #TODO: Do we really wanna do this?? + Jtrain,ptrain = getMisfit(objFun,xc,Y[:,idt],C[:,idt],tmp,false); Jval,pVal = getMisfit(objFun,xc,Yv,Cv,tmp,false); - # if this.out; - # @printf "%d\t%1.2e\t%1.2f\t%1.2e\t%1.2e\t%1.2f\n" epoch Jc 100*(1-para[3]/para[2]) norm(xOld-xc) Jval 100*(1-pVal[3]/pVal[2]) - # end - println("Val accuracy: ", 100*(1-pVal[3]/pVal[2])) + if this.out; + @printf "%d\t%1.2e\t%1.2f\t%1.2e\t%1.2e\t%1.2f\n" epoch Jtrain 100*(1-ptrain[3]/ptrain[2]) norm(xOld-xc) Jval 100*(1-pVal[3]/pVal[2]) + end + xOld = copy(xc); epoch = epoch + 1; end From f473dda73423ec076e26174f4d6b0c1ebf7b461a Mon Sep 17 00:00:00 2001 From: davidbegert Date: Mon, 19 Feb 2018 12:26:38 -0800 Subject: [PATCH 18/22] changed norm layer to be in place and changed tests to allow for this --- src/layers/normLayer.jl | 10 +++++----- src/utils/testAbstractMeganetElement.jl | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/layers/normLayer.jl b/src/layers/normLayer.jl index ae620ef..fe3c8ce 100644 --- a/src/layers/normLayer.jl +++ b/src/layers/normLayer.jl @@ -45,14 +45,14 @@ function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},dA,doDerivativ dA = Array{T,2}(0,0) # subtract mean across pixels - Yout = Y.-mean(Y,this.doNorm) #TODO: Cant see a reason why this shouldnt be done in place? + Y .-= mean(Y,this.doNorm) #TODO: Cant see a reason why this shouldnt be done in place? # normalize - S2 = sqrt.(mean(Yout.^2,this.doNorm) + this.eps) - Yout ./= S2 + S2 = sqrt.(mean(Y.^2,this.doNorm) + this.eps) + Y ./= S2 - Yout2 = reshape(Yout,:,nex) - return Yout2, Yout2, dA + Yout = reshape(Y,:,nex) + return Yout, Yout, dA end function nTheta(this::normLayer) diff --git a/src/utils/testAbstractMeganetElement.jl b/src/utils/testAbstractMeganetElement.jl index 7df1ee4..0d5473e 100644 --- a/src/utils/testAbstractMeganetElement.jl +++ b/src/utils/testAbstractMeganetElement.jl @@ -10,7 +10,7 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false theta .+= .1 # To test if Y changes for affineScalingLayer Y = randn(T,nFeatIn(L),nex) Yo = copy(Y) - Zd,Z,tmp = apply(L,theta,Y,[],true) + Zd,Z,tmp = apply(L,theta,copy(Y),[],true) @test norm(Y-Yo)/norm(Yo) < 1e4*eps(T) dY = randn(T,nFeatIn(L),nex) @@ -53,10 +53,10 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false function testFun(x,v=[]) if !(isempty(v)) - Z = apply(L,theta,x,[],true) + Z = apply(L,theta,copy(x),[],true) return Z[2], reshape(JYmv(L,v,theta,x,Z[3])[2],size(Z[2])) else - return apply(L,theta,x,[])[2] + return apply(L,theta,copy(x),[])[2] end end chkDer, = checkDerivative(testFun,copy(Y),out=out) From c788e69a802ecbcd89f190dd3e2d5ebdb22397b2 Mon Sep 17 00:00:00 2001 From: davidbegert Date: Mon, 19 Feb 2018 13:53:12 -0800 Subject: [PATCH 19/22] remove unneeeded comments/clean --- src/integrators/connector.jl | 2 +- src/layers/normLayer.jl | 2 +- src/layers/singleLayer.jl | 2 +- src/optimization/sgd.jl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/integrators/connector.jl b/src/integrators/connector.jl index fda7867..cb81177 100644 --- a/src/integrators/connector.jl +++ b/src/integrators/connector.jl @@ -29,7 +29,7 @@ function apply(this::Connector{T},theta::Array{T},Y0::Array{T},tmp,doDerivative= end end - Y = this.K*Y0 .+ this.b # TODO: Should be able to do this in place + Y = this.K*Y0 .+ this.b Ydata::Array{T,2} = Array{T, 2}(0, 0) # Temporary fix until we know what type Q is if this.outTimes==1 Ydata = this.Q*Y diff --git a/src/layers/normLayer.jl b/src/layers/normLayer.jl index fe3c8ce..1d9c019 100644 --- a/src/layers/normLayer.jl +++ b/src/layers/normLayer.jl @@ -45,7 +45,7 @@ function apply(this::normLayer{T},theta::Array{T},Yin::Array{T,2},dA,doDerivativ dA = Array{T,2}(0,0) # subtract mean across pixels - Y .-= mean(Y,this.doNorm) #TODO: Cant see a reason why this shouldnt be done in place? + Y .-= mean(Y,this.doNorm) # normalize S2 = sqrt.(mean(Y.^2,this.doNorm) + this.eps) diff --git a/src/layers/singleLayer.jl b/src/layers/singleLayer.jl index 04d1d22..2593578 100644 --- a/src/layers/singleLayer.jl +++ b/src/layers/singleLayer.jl @@ -33,7 +33,7 @@ function apply(this::singleLayer{T},theta::Array{T},Yin::Array{T},tmp,doDerivati if isempty(tmp) tmp = Array{Any}(2) - tmp[1] = Array{Any}(0,0) + tmp[1] = Array{Any}(0) tmp[2] = Array{Any}(0) end nex = div(length(Yin),nFeatIn(this)) diff --git a/src/optimization/sgd.jl b/src/optimization/sgd.jl index 28570ac..e5bc560 100644 --- a/src/optimization/sgd.jl +++ b/src/optimization/sgd.jl @@ -44,7 +44,7 @@ function solve(this::SGD{T},objFun::dnnObjFctn,xc::Array{T},Y::Array{T},C::Array if this.out; display(this); end - # Declare tmp - We know nothing about its shape of datatypes + # Declare tmp - We know nothing about its shape or datatypes tmp = Array{Any}(0,0) while epoch <= this.maxEpochs From a5484182396849158aca58629fd907497514ac3d Mon Sep 17 00:00:00 2001 From: Justin Date: Mon, 19 Feb 2018 14:05:43 -0800 Subject: [PATCH 20/22] Update batchNormNN.jl Quick fix for some tmps which aren't Any (could be Array{Float32} - such as for the connector). Array{Float32} NOT <: Array{Any} --- src/integrators/batchNormNN.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/integrators/batchNormNN.jl b/src/integrators/batchNormNN.jl index 251fc08..83f1500 100644 --- a/src/integrators/batchNormNN.jl +++ b/src/integrators/batchNormNN.jl @@ -53,7 +53,7 @@ end # --------- forward problem ---------- -function apply(this::batchNormNN{T},theta::Array{T},Y::Array{T,2},tmp::Array{Any},doDerivative=true) where {T<:Number} +function apply(this::batchNormNN{T},theta::Array{T},Y::Array{T,2},tmp::Array,doDerivative=true) where {T<:Number} nex = div(length(Y),nFeatIn(this))::Int nt = length(this.layers) From 01f31325705a9077b63bf7d7c12a49896125a81a Mon Sep 17 00:00:00 2001 From: Justin Date: Mon, 19 Feb 2018 14:16:05 -0800 Subject: [PATCH 21/22] Update dnnBatchObjFctn.jl cleaned up some forgotten debugging --- src/optimization/dnnBatchObjFctn.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/optimization/dnnBatchObjFctn.jl b/src/optimization/dnnBatchObjFctn.jl index 8653d2d..b484da2 100644 --- a/src/optimization/dnnBatchObjFctn.jl +++ b/src/optimization/dnnBatchObjFctn.jl @@ -26,15 +26,10 @@ function getMisfit(this::dnnObjFctn,theta::Array{T},W::Array{T},Y::Array{T},C::A YN,dummy,tmp = apply(this.net,theta,Y,tmp,doDerivative) - # println(eltype(tmp[1,2][1][1,1])) - # println(size(tmp)) - # error("check tmp out") Fc,hisF,dWF,d2WF,dYF,d2YF = getMisfit(this.pLoss,W,YN,C,doDerivative,doDerivative) if doDerivative - # tic() dYF = JthetaTmv(this.net,dYF,zeros(T,0),theta,Y,tmp) - # toc() end return Fc,hisF,vec(dYF),vec(dWF),tmp end From 0f5de924de127d1c3fc4d0f3ffa39c5fbe3eab7d Mon Sep 17 00:00:00 2001 From: Justin Date: Mon, 19 Feb 2018 14:22:31 -0800 Subject: [PATCH 22/22] Update testAbstractMeganetElement.jl Removed a useless test... --- src/utils/testAbstractMeganetElement.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/utils/testAbstractMeganetElement.jl b/src/utils/testAbstractMeganetElement.jl index 0d5473e..266ff74 100644 --- a/src/utils/testAbstractMeganetElement.jl +++ b/src/utils/testAbstractMeganetElement.jl @@ -10,8 +10,9 @@ function testAbstractMeganetElement(L::AbstractMeganetElement{T};out::Bool=false theta .+= .1 # To test if Y changes for affineScalingLayer Y = randn(T,nFeatIn(L),nex) Yo = copy(Y) + # THE FOLLOWING HACK (copy(Y)) EFFECTIVELY CANCELS THIS TEST...COMMENTING IT OUT Zd,Z,tmp = apply(L,theta,copy(Y),[],true) - @test norm(Y-Yo)/norm(Yo) < 1e4*eps(T) +# @test norm(Y-Yo)/norm(Yo) < 1e4*eps(T) dY = randn(T,nFeatIn(L),nex) Z1 = JYmv(L,dY,theta,Y,tmp)[2]