From 2843e5daea71033712585761331f1d5dd93e8e54 Mon Sep 17 00:00:00 2001 From: Abel Gonzalez Date: Thu, 27 Apr 2017 12:17:21 +0200 Subject: [PATCH] Training of part and object baseline --- demo_prts.m | 32 ++ .../frcn/boxes/BoxBestOverlapFastRcnn.m | 2 +- .../parts/DetectionPartsToPascalVOCFiles.m | 61 +++ .../VOC_modified/VOCevaldetParts_modified.m | 141 +++++++ .../examples/parts/calvinNNPartDetection.m | 83 ++++ .../examples/parts/computeIoATableSingle.cpp | 95 +++++ .../parts/computeIoATableSingle.mexa64 | Bin 0 -> 12954 bytes matconvnet-calvin/examples/parts/createIMDB.m | 214 ++++++++++ .../parts/evalPartAndObjectDetection.m | 168 ++++++++ .../examples/parts/getPartNames.m | 14 + .../parts/imdb/ImdbPartDetectionJointObjPrt.m | 352 +++++++++++++++++ .../parts/misc/getSegmentBoundingBox.m | 20 + .../examples/parts/part2ind_4imdb.m | 369 ++++++++++++++++++ .../examples/parts/part2names_4imdb.m | 163 ++++++++ .../examples/parts/saveGStructs.m | 102 +++++ .../parts/selective_search_boxes_min.m | 87 +++++ .../examples/parts/setupDataOptsPrts.m | 45 +++ .../examples/parts/setupImdbPartDetection.m | 40 ++ .../examples/parts/testPartDetection.m | 153 ++++++++ matconvnet-calvin/matlab/@CalvinNN/CalvinNN.m | 1 + .../convertNetworkToPrtObjFastRcnn.m | 221 +++++++++++ .../matlab/@CalvinNN/testPrtObj.m | 58 +++ .../matlab/setup/downloadPASCALParts.m | 39 ++ matconvnet-calvin/matlab/setup/setupParts.m | 38 ++ 24 files changed, 2497 insertions(+), 1 deletion(-) create mode 100644 demo_prts.m create mode 100644 matconvnet-calvin/examples/parts/DetectionPartsToPascalVOCFiles.m create mode 100644 matconvnet-calvin/examples/parts/VOC_modified/VOCevaldetParts_modified.m create mode 100644 matconvnet-calvin/examples/parts/calvinNNPartDetection.m create mode 100644 matconvnet-calvin/examples/parts/computeIoATableSingle.cpp create mode 100755 matconvnet-calvin/examples/parts/computeIoATableSingle.mexa64 create mode 100644 matconvnet-calvin/examples/parts/createIMDB.m create mode 100644 matconvnet-calvin/examples/parts/evalPartAndObjectDetection.m create mode 100644 matconvnet-calvin/examples/parts/getPartNames.m create mode 100644 matconvnet-calvin/examples/parts/imdb/ImdbPartDetectionJointObjPrt.m create mode 100644 matconvnet-calvin/examples/parts/misc/getSegmentBoundingBox.m create mode 100644 matconvnet-calvin/examples/parts/part2ind_4imdb.m create mode 100644 matconvnet-calvin/examples/parts/part2names_4imdb.m create mode 100644 matconvnet-calvin/examples/parts/saveGStructs.m create mode 100644 matconvnet-calvin/examples/parts/selective_search_boxes_min.m create mode 100644 matconvnet-calvin/examples/parts/setupDataOptsPrts.m create mode 100644 matconvnet-calvin/examples/parts/setupImdbPartDetection.m create mode 100644 matconvnet-calvin/examples/parts/testPartDetection.m create mode 100644 matconvnet-calvin/matlab/@CalvinNN/convertNetworkToPrtObjFastRcnn.m create mode 100644 matconvnet-calvin/matlab/@CalvinNN/testPrtObj.m create mode 100644 matconvnet-calvin/matlab/setup/downloadPASCALParts.m create mode 100644 matconvnet-calvin/matlab/setup/setupParts.m diff --git a/demo_prts.m b/demo_prts.m new file mode 100644 index 0000000..a6c96fa --- /dev/null +++ b/demo_prts.m @@ -0,0 +1,32 @@ +% Demo Joint object and part detection +% +% It prepares all the necessary structures and then trains and test several +% networks +% + + +% Add folders to path +setup(); + +% Download datasets +downloadVOC2010(); + +downloadPASCALParts(); + +% Download base network +downloadNetwork('modelName','imagenet-caffe-alex'); + +% Download Selective Search +downloadSelectiveSearch(); + +% Create structures with part and object info +setupParts(); + +% Train and test baseline network + +% Add 'if' here to decide whether train baseline net or download +calvinNNPartDetection(); + +% Train and test our model, using as input baseline network + + diff --git a/matconvnet-calvin/examples/frcn/boxes/BoxBestOverlapFastRcnn.m b/matconvnet-calvin/examples/frcn/boxes/BoxBestOverlapFastRcnn.m index 7838497..855468d 100644 --- a/matconvnet-calvin/examples/frcn/boxes/BoxBestOverlapFastRcnn.m +++ b/matconvnet-calvin/examples/frcn/boxes/BoxBestOverlapFastRcnn.m @@ -1,4 +1,4 @@ -function [scores, index] = BoxBestOverlap(targetBoxes, testBoxes) +function [scores, index] = BoxBestOverlapFastRcnn(targetBoxes, testBoxes) % [scores, index] = BoxBestOverlap(targetBoxes, testBoxes) % % Get overlap scores (Pascal-wise) for testBoxes bounding boxes diff --git a/matconvnet-calvin/examples/parts/DetectionPartsToPascalVOCFiles.m b/matconvnet-calvin/examples/parts/DetectionPartsToPascalVOCFiles.m new file mode 100644 index 0000000..be53613 --- /dev/null +++ b/matconvnet-calvin/examples/parts/DetectionPartsToPascalVOCFiles.m @@ -0,0 +1,61 @@ +function [recall, prec, ap, apUpperBound] = DetectionPartsToPascalVOCFiles(set, idxPart, idxClass, boxes, boxIms, boxClfs, compName, doEval, overlapNms) + +% Filters overlapping boxes (near duplicates), creates official VOC +% detection files. Evaluates results. + +global DATAopts; + +DATAopts.testset = set; + +if ~exist('doEval', 'var') + doEval = 0; +end + + +partName = DATAopts.prt_classes{idxPart}; +objName = DATAopts.classes{idxClass}; +% Sort scores/boxes/images +[boxClfs, sI] = sort(boxClfs, 'descend'); +boxIms = boxIms(sI); +boxes = boxes(sI,:); + +% Filter boxes if wanted +if exist('overlapNms', 'var') && overlapNms > 0 + [uIms, ~, uN] = unique(boxIms); + keepIds = true(size(boxes,1), 1); + fprintf('Filtering %d: ', length(uIms)); + for i=1:length(uIms) + if mod(i,500) == 0 + fprintf('%d ', i); + end + currIds = find(uN == i); + [~, goodBoxesI] = BoxNMS(boxes(currIds,:), overlapNms); + keepIds(currIds) = goodBoxesI; + end + boxClfs = boxClfs(keepIds); + boxIms = boxIms(keepIds); + boxes = boxes(keepIds,:); + fprintf('\n'); +end + + + +% Save detection results using detection results +savePath = fullfile(DATAopts.resdir, 'Main', ['%s_det_', set, '_%s.txt']); +resultsName = sprintf(savePath, compName, [objName '-' partName]); +fid = fopen(resultsName,'w'); +for j=1:length(boxIms) + fprintf(fid,'%s %f %f %f %f %f\n', boxIms{j}, boxClfs(j),boxes(j,:)); +end +fclose(fid); +fprintf('\n'); + +if doEval + [recall, prec, ap] = VOCevaldetParts_modified(DATAopts, partName, objName, resultsName, false); + apUpperBound = max(recall); +else + recall = 0; + prec = 0; + ap = 0; + apUpperBound = 0; +end diff --git a/matconvnet-calvin/examples/parts/VOC_modified/VOCevaldetParts_modified.m b/matconvnet-calvin/examples/parts/VOC_modified/VOCevaldetParts_modified.m new file mode 100644 index 0000000..f4373b9 --- /dev/null +++ b/matconvnet-calvin/examples/parts/VOC_modified/VOCevaldetParts_modified.m @@ -0,0 +1,141 @@ +function [rec,prec,ap] = VOCevaldetParts_modified(DATAopts, cls, obj, loadName, draw, flipBoxes) + +% load test set +tic; + +gtids = GetImagesPlusLabels(DATAopts.testset); + +for i=1:length(gtids) + % display progress + if toc>1 + fprintf('%s: pr: load: %d/%d\n',cls,i,length(gtids)); + drawnow; + tic; + end + +% % Create annotation struct as if it was being read from file + recs(i).objects.class = []; + recs(i).objects.bbox = []; + recs(i).objects.difficult = []; +% + % add parts belonging to object class + objIm = DATAopts.imdbTest.parts{i}; + idxObj = 1; + for kk = 1:size(objIm) + if strcmp(DATAopts.imdbTest.objects{i}.class(kk), obj) && ~isempty(objIm{kk}) + for ll = 1:size(objIm{kk}.class_id,1) + recs(i).objects(idxObj).class = DATAopts.imdbTest.prt_classes{DATAopts.imdbTest.obj_class2id(obj)}{objIm{kk}.class_id(ll)}; + recs(i).objects(idxObj).bbox = objIm{kk}.bbox(ll,:); + recs(i).objects(idxObj).difficult = objIm{kk}.difficult(ll); + idxObj = idxObj + 1; + end + end + end +end + + +fprintf('%s: pr: evaluating detections\n',cls); + +% hash image ids +hash=VOChash_init_modified(gtids); + +% extract ground truth objects + +npos=0; +gt(length(gtids))=struct('BB',[],'diff',[],'det',[]); +for i=1:length(gtids) + % extract parts of class + clsinds=strcmp(cls,{recs(i).objects.class}); + gt(i).BB=cat(1,recs(i).objects(clsinds).bbox)'; + gt(i).diff=[recs(i).objects(clsinds).difficult]; + gt(i).det=false(length(clsinds),1); + npos=npos+sum(~gt(i).diff); +end + +% load results +% [ids,confidence,b1,b2,b3,b4]=textread(sprintf(DATAopts.detrespath,id,cls),'%s %f %f %f %f %f'); +[ids,confidence,b1,b2,b3,b4]=textread(loadName,'%s %f %f %f %f %f'); + +if exist('flipBoxes', 'var') && flipBoxes == true + BB=[b2 b1 b4 b3]'; +else + BB=[b1 b2 b3 b4]'; +end + +% sort detections by decreasing confidence +[sc,si]=sort(-confidence); +ids=ids(si); +BB=BB(:,si); + +% assign detections to ground truth objects +nd=length(confidence); +tp=zeros(nd,1); +fp=zeros(nd,1); +tic; +for d=1:nd + % display progress + if toc>1 + fprintf('%s: pr: compute: %d/%d\n',cls,d,nd); + drawnow; + tic; + end + + % find ground truth image + i=VOChash_lookup_modified(hash,ids{d}); + if isempty(i) + error('unrecognized image "%s"',ids{d}); + elseif length(i)>1 + error('multiple image "%s"',ids{d}); + end + + % assign detection to ground truth object if any + bb=BB(:,d); + ovmax=-inf; + for j=1:size(gt(i).BB,2) + bbgt=gt(i).BB(:,j); + bi=[max(bb(1),bbgt(1)) ; max(bb(2),bbgt(2)) ; min(bb(3),bbgt(3)) ; min(bb(4),bbgt(4))]; + iw=bi(3)-bi(1)+1; + ih=bi(4)-bi(2)+1; + if iw>0 & ih>0 + % compute overlap as area of intersection / area of union + ua=(bb(3)-bb(1)+1)*(bb(4)-bb(2)+1)+... + (bbgt(3)-bbgt(1)+1)*(bbgt(4)-bbgt(2)+1)-... + iw*ih; + ov=iw*ih/ua; + if ov>ovmax + ovmax=ov; + jmax=j; + end + end + end + % assign detection as true positive/don't care/false positive + if ovmax>=DATAopts.minoverlap + if ~gt(i).diff(jmax) + if ~gt(i).det(jmax) + tp(d)=1; % true positive + gt(i).det(jmax)=true; + else + fp(d)=1; % false positive (multiple detection) + end + end + else + fp(d)=1; % false positive + end +end + +% compute precision/recall +fp=cumsum(fp); +tp=cumsum(tp); +rec=tp/npos; +prec=tp./(fp+tp); + +ap=VOCap(rec,prec); + +if draw + % plot precision/recall + plot(rec,prec,'-'); + grid; + xlabel 'recall' + ylabel 'precision' + title(sprintf('class: %s, subset: %s, AP = %.3f',cls,DATAopts.testset,ap)); +end diff --git a/matconvnet-calvin/examples/parts/calvinNNPartDetection.m b/matconvnet-calvin/examples/parts/calvinNNPartDetection.m new file mode 100644 index 0000000..cb999ea --- /dev/null +++ b/matconvnet-calvin/examples/parts/calvinNNPartDetection.m @@ -0,0 +1,83 @@ +% function calvinNNDetection() +% +% Copyright by Holger Caesar, 2016 + +% Global variables +global glDatasetFolder glFeaturesFolder; +assert(~isempty(glDatasetFolder) && ~isempty(glFeaturesFolder)); + +%%% Settings +% Dataset +vocYear = 2010; +trainName = 'train'; +testName = 'val'; +vocName = sprintf('VOC%d', vocYear); +datasetDir = [fullfile(glDatasetFolder, vocName), '/']; + + +% Specify paths +outputFolder = fullfile(glFeaturesFolder, 'CNN-Models', 'Parts', vocName, sprintf('%s-testRelease', vocName)); +netPath = fullfile(glFeaturesFolder, 'CNN-Models', 'matconvnet', 'imagenet-caffe-alex.mat'); +logFilePath = fullfile(outputFolder, 'log.txt'); + +% Fix randomness +randSeed = 42; +rng(randSeed); + +% Setup dataset specific options and check validity +setupDataOptsPrts(vocYear, testName, datasetDir); +global DATAopts; % Database specific paths +assert(~isempty(DATAopts), 'Error: Dataset not initialized properly!'); + + +% Task-specific +nnOpts.testFn = @testPartDetection; +nnOpts.misc.overlapNms = 0.3; +% Objectives for both parts and objects +nnOpts.derOutputs = {'objectivePrt', 1, 'objectiveObj', 1, 'regressObjectivePrt', 1, 'regressObjectiveObj', 1}; + +% General +nnOpts.batchSize = 2; +nnOpts.numSubBatches = nnOpts.batchSize; % 1 image per sub-batch +nnOpts.weightDecay = 5e-4; +nnOpts.momentum = 0.9; +nnOpts.numEpochs = 16; +nnOpts.learningRate = [repmat(1e-3, 12, 1); repmat(1e-4, 4, 1)]; +nnOpts.misc.netPath = netPath; +nnOpts.expDir = outputFolder; +nnOpts.convertToTrain = 0; % perform explicit conversion to our architecure +nnOpts.fastRcnn = 0; +nnOpts.bboxRegress = 1; +nnOpts.gpus = []; % for automatic selection use: SelectIdleGpu(); + +% Create outputFolder +if ~exist(outputFolder, 'dir') + mkdir(outputFolder); +end + +% Start logging +diary(logFilePath); + +%%% Setup +% Start from pretrained network +net = load(nnOpts.misc.netPath); + +% Setup imdb +imdb = setupImdbPartDetection(trainName, testName, net); + +% Create calvinNN CNN class +% Do not transform into fast-rcnn with bbox regression +calvinn = CalvinNN(net, imdb, nnOpts); + +% Perform here the conversion to part/obj architecture +calvinn.convertNetworkToPrtObjFastRcnn; + +%%% Train +calvinn.train(); + +%%% Test +stats = calvinn.testPrtObj(); + +% TEST EVAL CODE WITH LOADED STATS +%%% Eval +evalPartAndObjectDetection(testName, stats, nnOpts); diff --git a/matconvnet-calvin/examples/parts/computeIoATableSingle.cpp b/matconvnet-calvin/examples/parts/computeIoATableSingle.cpp new file mode 100644 index 0000000..ea44673 --- /dev/null +++ b/matconvnet-calvin/examples/parts/computeIoATableSingle.cpp @@ -0,0 +1,95 @@ +#include "mex.h" +#include +#include +#include + +using namespace std; + +float computeOverlapBBoxes(float bb1x1, float bb1y1, float bb1x2, float bb1y2, float area1, + float bb2x1, float bb2y1, float bb2x2, float bb2y2, float area2) { + float xmin = std::max(bb1x1, bb2x1); + float xmax = std::min(bb1x2, bb2x2); + + if ((xmin > xmax)) // || (ymin > ymax)) + // { + //mexPrintf("zero: %.4f %.4f ; %.4f %.4f\n", xmin, xmax, ymin, ymax); + return 0; + //} + float ymin = std::max(bb1y1, bb2y1); + float ymax = std::min(bb1y2, bb2y2); + if (ymin > ymax) + return 0; + else { + float intersectionArea = (xmax - xmin + 1) * (ymax - ymin + 1); + //mexPrintf("Intersection: %.4f %.4f ; %.4f %.4f\n", xmin, xmax, ymin, ymax); + //mexPrintf("Intersection area: %.4f\n",intersectionArea); + //mexPrintf("Area1: %.4f\n",area1); + return intersectionArea/ area1; + } + +} + +double diffclock(clock_t clock1,clock_t clock2) +{ + double diffticks=clock1-clock2; + double diffms=(diffticks)/CLOCKS_PER_SEC; + return diffms; +} + +void mexFunction(int nlhs, mxArray *plhs[], + int nrhs, const mxArray *prhs[]) +{ +// input: two arrays of size nbWindows x 4 (x1,y1,x2,y2) and nbWindows2 x 4 +// output: overlap table of size nbWindows x nbWindows2 + float * bboxes, *overlapTable, *bboxes2; + int nbWindows, nbWindows2; + int dim = 4; + + //check number of inputs + if (nrhs != 2) { + mexErrMsgTxt("Two input args required"); + } + + // check dimension of input + if (mxGetN(prhs[0]) != 4 || mxGetN(prhs[1]) != 4) { + mexErrMsgTxt("Dimension should be 4"); + } + + // load input data + nbWindows = mxGetM(prhs[0]); + bboxes = (float*) mxGetData(prhs[0]); + bboxes2 = (float*) mxGetData(prhs[1]); + nbWindows2 = mxGetM(prhs[1]); + + // create output matrix + int dims[2]; + dims[0] = nbWindows; + dims[1] = nbWindows2; + plhs[0] = mxCreateNumericArray(2, dims, mxSINGLE_CLASS, mxREAL); + overlapTable = (float*) mxGetData(plhs[0]); + + //mxSINGLE_CLASS + // precompute area of each bbox (?) + vector area(nbWindows); + vector area2(nbWindows2); + for (int i = 1; i <= nbWindows; ++i) { + area[i-1] = (bboxes[i-1 +2*nbWindows]- bboxes[i-1] + 1)*(bboxes[i-1 + 3*nbWindows] - bboxes[i-1 + nbWindows] + 1); + } + for(int i = 0; i1fDMn$?lxK3?8e=< zZGu)(($wxOivb2KDl<3((s3Lq3K(m_Ev-$*&svxv>Wp@D7&m}pX|&c_jK6c=J?Y!c zuH*QJe>l0BymQa*oO92;=e~R2zIV?&L`msatYEG^2LZVM0uWa7uuDl;VH?m68Q@$_mrtMJ7J3DBBTBcE^<6F=fZJLg_PA z!w|)=q5os#I1jBdVZoGiskFXh_^G~co@8gx>rGfNWxKmzNB#UyONCP0sQeX*ORE|m zQ#Bkpwo0^N$A9C7cF{R9&`@>KxWlpF?OVH-P44~SOD}CC7i2yk{~Z5{S}=o7Tul7i z_C1%}yz}W-B~ajE7Z;0@bFzgGgI$mfNsjzD^Wg0T@DsrEwX?o}KE=(6|J?Ew0QuUX zd2;3|gHPZ;pZ<3X;3Ea_cmX_90RL$L{8t6=+Y!+D;)5xOvq})$0#Pn1E;ivtz7ci3 zyEhWnV}>_s=(^CG+FEpfAR6cn#*9F;t))H`2?yG|U7>&}%P-M={a$@*FzgKlAB5uV z+w_g9h5C>;77N5gZ=ipDJnS=qkucRg(P&GoyRF|4z5R^=V}nUrOw!;ryr|Vj170Jr zA>JE^27T^m)H|TxQe7?ddp8*?Zqaq4CmPwVcX|D~Hx!Eawm}fy-ixb0)C#6OygeB9 zqaQIeATZ4Se!VXcjYX)F#(=)9T7-gKz5T9OL{N`5zbxryT*G;0axv35;;8oTM zp>@c`tQTW#mSFvv3)qGgNPks7e_}4J+bi&Y1Ll?rks@68-6aIcsL;GqA)6(r^L^jC zbl?(!b~jARi$sNzlm448K7j4HSlq2JWBKFQ679mP^m)JKeX{Q&vJzrc?W>GeE47!1e4X2Hg%ykkxQg&FHiK&~57CFV#)y23vmeis~d=CjH zPEMg*eGaBQHsX&<{xtCv(uuU>-zABNZSPY_R`oEVh+ zkBO&gn&^}K8^lvcC+?U00pe-OCt4-{9pWjp6Sd&AR7o0umRd)6ySDtSmUu@SIy-NB zQRiU{yqts>`0386lQcf)Crtf^E&7Mf+O@f~dx!BOFdNsZnkD-%e|eYsXGI?kjm5+9vAW?7rLG?rz(p?RStxdZqXmNSYF3TEep@;Ta{19@7*n}Tt2w-_B(sy`skP5g6Pf3v@*C*S$S47b-=t%H@x$6s+6MLb z>G#UCF%LcEMU(Hh)LwmVdn>H%wQ+mHH2VdbyhIz5VrsQH*}lg!eBukS##Hfw##DKQ zv^hWv4Rcp=9>Z%$b|O-pBbx7+JGEA5zA4Rj8uM6Dc@WnvZ7-k4>>h{622ovVRfXm| zL*jBNE`J$Pn(-fFVCBi7H*7REcg>qSUi1u~aXk7rxT;T8uC7a!uav&Y?J(SG$yaD1 z8j>lBesl6k8c}L(5u8A!@^#!{mAlDvL#o);;!C3gr6j9^XW(1Pp`jeR;aP^%U32K6 zgNY|;MJG~i)Oz9-TFzQ>$DYJlTiQ03c^e{CHzH7>sxqp&P`$oNOM3R9*)^cgf>wj>1znbT;QZ7S)aK*H8#-)5J2jqBM`{aP zzgLRhp^w^-9D*$=2?Adu?`6j$gK{7*Cp-tWC$waWrldKw>anB@qKT#|AJY=;`;t#mC>698(2i#jom9il^SHhD{s0aU zx`XhN@a&)Xo?MWxkrQor3J-*9&&g>RA#Qc%3UKe;s3j26X`PERrG#RO1iC{imSiUX z4ztAXG7pgX&Lex_-La>L4ss3pAzb%?Zv4m8 z6s^=A(Bq&~_;-`-!HvS!UtwEZKBsI1cYGCO^joV8ZKOfPyo&Yn7TxZ+WP915xb5;= zuf3`2DuPL;0sq7BLGLQUJ+ESSQT^P~qE@tI%Kd;3K)%o_ze|dxJ;AmITbpGM=6rST ze5Y8<{h+Z>Uq)b0ztxLB0fv3Fe#!o5ajN8z(xEx_mBnU*5Mz!PERD}3b${)Y9o>U*<`P=#;`mBe--_CoLK7TV`WQs4GF*uzPQD&k{>GS(P zpCNoy*^MhljLXAh$he>5N{`PN*dIRl$o}`Mj^lHmI2sz`pnlH>DAmi(0BHz+wi zKS;S6zViCNCwy+6>v^FH$WldDE4p6MyA|E4=r%?1MM9RR6#a&xuPVytMeGkJYM*p2 zYwwDOjkt4_YmIB=4J&Hml3lUf1cY$K26_!|7pM_6X%E-J5hLL04#!(5QB$%FM}%}nDb*CCD=TK98V!WJWUp#{Aw#%=;UKO?0Ee0`oP`D< zi}>j%(-r8^w?@6a0lmi$Tjt;cXN!S;U!c#>(Qh2F$~JT_opqXRaQ!nr7{Z^oH|T?1 z1TJuKNx!C#u2@XCa6*aiWd=v@zo)c*X$|oGo0}Iw-=?eupQy5m3)~sp!)(v%n&~p7 z&G)yO{^>T>phjz&?RniZ<^GUg(qVgEkF~(4ZMNrqgemVEP?1G#3(N`)HCm%=&+C%u zQl-iLXFaC$O_25g=6PRWO5>xF+kX=k!jB5=8Ent{2GdrhSjawRX(;)xu4e$FxF{h#%7`~S4E=lSbV&q=1WY^W&5kKwPQLj7m^-25W? zK5Q+y{$GU9YCo?0GkwcyjC!trI!LqHf6HMSFwLDm(zTYKfFYNBe{miG=Pl*>%jMsO zExBZS&Nmpn5+F*helGt9=un^7K7FMVoDk{%&^`lQx_8;0pM#uN#`$9$clOWs-v_X@ z`se(I@g)$TEHlaUmF=0Hfy8ProK90$kv@}~zOo%tIvlgw^K-RN*>gX+0k&hl1jdB% z_y?8$L1mw-Prj_>VgQ6yTS3&>zA6W2d#03w#=fnp&pA!*Bdn-|Ijr=}YsYH8X1U3Y zsEvtkhnw-w~c&kX~ z#pep%XD#YzXP)5wGaGkI=Uryw6@vG>Y<#}peJmSaF#Z0LjelhNy(Js3#D3zmLb7fb zmm>c(AHGoVK43Mn3%q?e^XV@Vyl%7gFT*;@r=Pnkn`^K*Pd*^M&)IMan%jSRk1NVE zFZ6qk@KZVS`q(P*nen9OHMP(0uV7^{pQAJ9WfanU<03!#aLOCC z;a9>w^&FFp<8o5sGvh$=eC@n1^%qU&m2o)e_fV0TdEd?hZp$;jRZ_oPJg&N}ex+5})b!i@ko-e<3yg)mDEP&gwf9LDx6~OcLb9Djzbp>#r z#AoK)(hvH4eg_KRy9?ki6u=J@z=bbr#EdvH&3wp(y}f>;-rTgQ4GF&&a(#89SNBo2 zZ%lBKuLaU|e?;#NMY_Bp-ETyqG2I*Qml?64fD!P!R@dCJ5-sOZqP*as`5vp|-84EN z%y(gbytj7%CKgVoSLiI&{3*A-zCIYP-{@}f=$;J?l(#I$WY!xtLUGtn9R_V7q~%2^t4NVETZ># z!+y$*zPH97+`1L<^F0s>K@YFvW>U4lp#8YWw{*SXo(=Anrg|Yq%DK 0.4 + imdb.objects{i}.viewpoint{oo, 1} = ob.view(objIndx(iIoU)); + else + imdb.objects{i}.viewpoint{oo, 1} = []; + end + + obj_im = img(imdb.objects{i}.bbox(oo, 2):imdb.objects{i}.bbox(oo, 4), ... + imdb.objects{i}.bbox(oo, 1):imdb.objects{i}.bbox(oo, 3), :); + imdb.objects{i}.sizes(oo, 1) = size(obj_im, 1); + imdb.objects{i}.sizes(oo, 2) = size(obj_im, 2); + + % get annotations for all parts of object 'oo' + obj = anno.objects(oo); + + % no part annotations for this object instances + if numel(obj.parts) == 0 + imdb.parts{i, 1}{oo, 1} = []; + end + + % add field to the structure + [obj.parts(:).skip]=deal(0); + pc = 0; + + % ------------- LOOP OVER THE PARTS IN THE OBJECT + for pp = 1:numel(obj.parts) + + % if we flagged a part as 'already visited' (in case we merged + % it to a previously selected part), then skip it. + if obj.parts(pp).skip + continue; + end + + % Check if we are ignoring this part, skip if so + if sum(strcmp(obj.parts(pp).part_name, imdb.prt_original_class2id{imdb.objects{i}.class_id(oo, 1)}.keys)) == 0 + obj.parts(pp).skip = 1; + continue; + end + + + % part counter. using pp directly would cause 'wholes' in the + % structure when skipping a part + pc = pc + 1; + + % some parts in the dataset need to be merged (for example: + % left upper arm and left lower arm -> left arm - which for use is just arm) + classes_with_possible_merge = [10, 13, 15, 17]; + parts2merge = {'lfuleg', 'lflleg', 'rfuleg', 'rflleg', ... + 'lbuleg', 'lblleg', 'rbuleg', 'rblleg', ... + 'llarm', 'luarm', 'rlarm', 'ruarm', ... + 'llleg', 'luleg', 'rlleg', 'ruleg'}; + if ismember(imdb.objects{i}.class_id(oo), classes_with_possible_merge) && ... + ~isempty(strmatch(obj.parts(pp).part_name, parts2merge)) %#ok + + % find the other part (name) that should be merged with current + position = find(strncmp(obj.parts(pp).part_name, parts2merge,length(obj.parts(pp).part_name))); + + if mod(position, 2) + merge_with_name = parts2merge(position + 1); + else + merge_with_name = parts2merge(position - 1); + end + + % we know the name, let's look for it + position_merge_with = find(strcmp({obj.parts.part_name}, merge_with_name) == 1); + + if isempty(position_merge_with) + + % the part we want to merge with is occluded + imdb.parts{i, 1}{oo, 1}.class{pc, 1} = obj.parts(pp).part_name; + imdb.parts{i}{oo}.class_id(pc, 1) = imdb.prt_original_class2id{obj.class_ind}(obj.parts(pp).part_name); + imdb.parts{i}{oo}.bbox(pc, :) = getSegmentBoundingBox(obj.parts(pp).mask); + + else + imdb.parts{i, 1}{oo, 1}.class{pc, 1} = [obj.parts(pp).part_name, '+', obj.parts(position_merge_with).part_name]; + imdb.parts{i}{oo}.class_id(pc, 1) = imdb.prt_original_class2id{obj.class_ind}(obj.parts(pp).part_name); + + mask = obj.parts(pp).mask + obj.parts(position_merge_with).mask; + mask(mask > 1) = 1; + imdb.parts{i}{oo}.bbox(pc, :) = getSegmentBoundingBox(mask); + + % TEST for correctness: + % display the two individual boxes + the new (merged) one + % X = [getSegmentBoundingBox(obj.parts(pp).mask); getSegmentBoundingBox(obj.parts(position_merge_with).mask); imdb.parts{i}{oo}.bbox(pp, :)]; + % showboxes(img, X); + + obj.parts(position_merge_with).skip = 1; + end + + else + % get bounding box of the part + imdb.parts{i, 1}{oo, 1}.class{pc, 1} = obj.parts(pp).part_name; + imdb.parts{i}{oo}.class_id(pc, 1) = imdb.prt_original_class2id{obj.class_ind}(obj.parts(pp).part_name); + imdb.parts{i}{oo}.bbox(pc, :) = getSegmentBoundingBox(obj.parts(pp).mask); + + end + + % size of the part + prt_im = img(imdb.parts{i}{oo}.bbox(pc, 2):imdb.parts{i}{oo}.bbox(pc, 4), ... + imdb.parts{i}{oo}.bbox(pc, 1):imdb.parts{i}{oo}.bbox(pc, 3), :); + imdb.parts{i}{oo}.sizes(pc, 1) = size(prt_im, 1); + imdb.parts{i}{oo}.sizes(pc, 2) = size(prt_im, 2); + + % flag the part difficult if it is smaller than 20x20 + if imdb.parts{i}{oo}.sizes(pc, 1) < 20 || imdb.parts{i}{oo}.sizes(pc, 2) < 20 + imdb.parts{i}{oo}.difficult(pc, :) = 1; + else + imdb.parts{i}{oo}.difficult(pc, :) = 0; + end + + % 4 columns matrix: + % col 1: part ID (within the object class) + % col 2: object ID + % col 3: object index within the image (some images have multiple objects) + % col 4: image number + imdb.mapping = [imdb.mapping; [imdb.parts{i}{oo}.class_id(pc), imdb.objects{i}.class_id(oo), oo, i]]; + end + + end +end + diff --git a/matconvnet-calvin/examples/parts/evalPartAndObjectDetection.m b/matconvnet-calvin/examples/parts/evalPartAndObjectDetection.m new file mode 100644 index 0000000..6d07b64 --- /dev/null +++ b/matconvnet-calvin/examples/parts/evalPartAndObjectDetection.m @@ -0,0 +1,168 @@ +function evalPartAndObjectDetection(testName, stats, nnOpts) + +global DATAopts; + +% Get test images +trash = load(sprintf(DATAopts.imdb, testName)); +imdbTest = trash.imdb; +clear trash; +% Use images with at least one part +testIms = imdbTest.image_ids(unique(imdbTest.mapping(:,4))); + +% get image sizes +testCount = length(testIms); +imSizes = imdbTest.sizes(unique(imdbTest.mapping(:,4)),:); + +% Add imdb to DATAopts for part detection evaluation +DATAopts.imdbTest = imdbTest; + +[DATAopts.prt_classes, idxPartGlobal2idxClass] = getPartNames(imdbTest); + +%% Parts + +for cI = 1:105 + %% + currBoxes = cell(length(testIms), 1); + currScores = cell(length(testIms), 1); + for i=1:length(testIms) + currBoxes{i} = stats.results(i).boxesPrt{cI+1}; + currScores{i} = stats.results(i).scoresPrt{cI+1}; + end + + [currBoxes, fileIdx] = Cell2Matrix(gather(currBoxes)); + [currScores, fileIdx2] = Cell2Matrix(gather(currScores)); + + assert(isequal(fileIdx, fileIdx2)); % Should be equal + + currFilenames = testIms(fileIdx); + + [~, sI] = sort(currScores, 'descend'); + currScores = currScores(sI); + currBoxes = currBoxes(sI,:); + currFilenames = currFilenames(sI); + + [recallPrt{cI}, precPrt{cI}, apPrt(cI,1), upperBoundPrt{cI}] = ... + DetectionPartsToPascalVOCFiles(testName, cI, idxPartGlobal2idxClass(cI+1) , currBoxes, currFilenames, currScores, ... + 'Matconvnet-Calvin-Prt', 1, 0); + apPrt(cI) +end + +if isfield(stats.results(1), 'boxesRegressedPrt') + for cI = 1:105 + %% + currBoxes = cell(length(testIms), 1); + currScores = cell(length(testIms), 1); + for i=1:length(testIms) + % Get regressed boxes and refit them to the image + currBoxes{i} = stats.results(i).boxesRegressedPrt{cI+1}; + currBoxes{i}(:,1) = max(currBoxes{i}(:,1), 1); + currBoxes{i}(:,2) = max(currBoxes{i}(:,2), 1); + currBoxes{i}(:,3) = min(currBoxes{i}(:,3), imSizes(i,2)); + currBoxes{i}(:,4) = min(currBoxes{i}(:,4), imSizes(i,1)); + + currScores{i} = stats.results(i).scoresRegressedPrt{cI+1}; + end + + [currBoxes, fileIdx] = Cell2Matrix(gather(currBoxes)); + [currScores, fileIdx2] = Cell2Matrix(gather(currScores)); + + isequal(fileIdx, fileIdx2) % Should be equal + + currFilenames = testIms(fileIdx); + + [~, sI] = sort(currScores, 'descend'); + currScores = currScores(sI); + currBoxes = currBoxes(sI,:); + currFilenames = currFilenames(sI); + + %% + [recallPrt{cI}, precPrt{cI}, apRegressedPrt(cI,1), upperBoundPrt{cI}] = ... + DetectionPartsToPascalVOCFiles(testName, cI, idxPartGlobal2idxClass(cI+1) , currBoxes, currFilenames, currScores, ... + 'Matconvnet-Calvin-Prt', 1, 0); + apRegressedPrt(cI) + end + + apRegressedPrt + mean(apRegressedPrt) + +else + apRegressedPrt = 0; +end + + +for cI = 1 : 20 + % + currBoxes = cell(testCount, 1); + currScores = cell(testCount, 1); + for i = 1 : testCount + currBoxes{i} = stats.results(i).boxesObj{cI + 1}; + currScores{i} = stats.results(i).scoresObj{cI + 1}; + end + + [currBoxes, fileIdx] = Cell2Matrix(gather(currBoxes)); + [currScores, fileIdx2] = Cell2Matrix(gather(currScores)); + + assert(isequal(fileIdx, fileIdx2)); % Should be equal + + currFilenames = testIms(fileIdx); + + [~, sI] = sort(currScores, 'descend'); + currScores = currScores(sI); + currBoxes = currBoxes(sI,:); + currFilenames = currFilenames(sI); + + [recallObj{cI}, precObj{cI}, apObj(cI,1), upperBoundObj{cI}] = ... + DetectionToPascalVOCFiles(testName, cI, currBoxes, currFilenames, currScores, ... + 'Matconvnet-Calvin-Obj', 1, nnOpts.misc.overlapNms); + apObj(cI) +end + +apObj +mean(apObj) + +if isfield(stats.results(1), 'boxesRegressedObj') + for cI = 1 : 20 + % + currBoxes = cell(testCount, 1); + currScores = cell(testCount, 1); + + for i=1:testCount + % Get regressed boxes and refit them to the image + currBoxes{i} = stats.results(i).boxesRegressedObj{cI+1}; + currBoxes{i}(:,1) = max(currBoxes{i}(:, 1), 1); + currBoxes{i}(:,2) = max(currBoxes{i}(:, 2), 1); + currBoxes{i}(:,3) = min(currBoxes{i}(:, 3), imSizes(i,2)); + currBoxes{i}(:,4) = min(currBoxes{i}(:, 4), imSizes(i,1)); + + currScores{i} = stats.results(i).scoresRegressedObj{cI+1}; + end + + [currBoxes, fileIdx] = Cell2Matrix(gather(currBoxes)); + [currScores, fileIdx2] = Cell2Matrix(gather(currScores)); + + assert(isequal(fileIdx, fileIdx2)); % Should be equal + + currFilenames = testIms(fileIdx); + + [~, sI] = sort(currScores, 'descend'); + currScores = currScores(sI); + currBoxes = currBoxes(sI, :); + currFilenames = currFilenames(sI); + + % ShowImageRects(currBoxes(1:32, [2 1 4 3]), 4, 4, currFilenames(1:32), currScores(1:32)); + + % + [recallObj{cI}, precObj{cI}, apRegressedObj(cI,1), upperBoundObj{cI}] = ... + DetectionToPascalVOCFiles(testName, cI, currBoxes, currFilenames, currScores, ... + 'Matconvnet-Calvin-Obj', 1, nnOpts.misc.overlapNms); + apRegressedObj(cI) + end + + apRegressedObj + mean(apRegressedObj) +else + apRegressedObj = 0; +end + +% Save results to disk +save([nnOpts.expDir, '/', 'resultsEpochFinalTest.mat'], 'nnOpts', 'stats', 'apPrt', 'apRegressedPrt', 'apObj', 'apRegressedObj'); \ No newline at end of file diff --git a/matconvnet-calvin/examples/parts/getPartNames.m b/matconvnet-calvin/examples/parts/getPartNames.m new file mode 100644 index 0000000..cdaa4a9 --- /dev/null +++ b/matconvnet-calvin/examples/parts/getPartNames.m @@ -0,0 +1,14 @@ +function [allPartNames, idxPartGlobal2idxClass] = getPartNames(imdb) + idxPartGlobal2idxClass = zeros(imdb.prt_num_classes+1,1); + allPartNames = cell(imdb.prt_num_classes+1,1); + numPartsCls = cellfun(@(x) size(x,1), imdb.prt_classes); + + k = 1; + % Create map from idxPartGlobal to the class + for idxClass = 1:imdb.obj_num_classes + idxPartGlobal2idxClass(k+1:k+numPartsCls(idxClass)) = ones(numPartsCls(idxClass),1)*idxClass; + allPartNames(k+1:k+numPartsCls(idxClass)) = imdb.prt_classes{idxClass}; + k = k + numPartsCls(idxClass); + end + allPartNames = allPartNames(2:end); +end \ No newline at end of file diff --git a/matconvnet-calvin/examples/parts/imdb/ImdbPartDetectionJointObjPrt.m b/matconvnet-calvin/examples/parts/imdb/ImdbPartDetectionJointObjPrt.m new file mode 100644 index 0000000..d5a9d5a --- /dev/null +++ b/matconvnet-calvin/examples/parts/imdb/ImdbPartDetectionJointObjPrt.m @@ -0,0 +1,352 @@ +classdef ImdbPartDetectionJointObjPrt < ImdbMatbox + properties(SetAccess = protected, GetAccess = public) + negOverlapRange = [0.1 0.3]; + posOverlap = 0.6; + boxesPerIm = 128; + boxRegress = true; + instanceWeighting = false; + numClassesPrt = 1; + numClassesObj = 1; + end + methods + function obj = ImdbPartDetectionJointObjPrt(imageDir, imExt, matboxDir, filenames, datasetIdx, meanIm) + obj@ImdbMatbox(imageDir, imExt, matboxDir, filenames, datasetIdx, meanIm); + obj.minBoxSize = 10; + gStruct = obj.LoadGStruct(1); + obj.numClassesPrt = size(gStruct.overlapPrt, 2) + 1; + obj.numClassesObj = size(gStruct.overlapObj, 2) + 1; + end + + function [batchData, numElements] = getBatch(obj, batchInds, net, ~) + if length(batchInds) > 1 + error('Only supports batches of 1'); + end + + if nargin == 2 + gpuMode = false; + else + gpuMode = strcmp(net.device, 'gpu'); + end + + % Load image. Make correct size. Subtract average im. + [image, oriImSize] = obj.LoadImage(batchInds, gpuMode); + + % Sample boxes + gStruct = obj.LoadGStruct(batchInds); + + % Flip the image and boxes at training time + % Note: flipLR alternates between true and false in ImdbMatbox.initEpoch() + if obj.flipLR && strcmp(obj.datasetMode, 'train') + currImT = fliplr(image); + currBoxesT = gStruct.boxes; + currBoxesT(:,3) = oriImSize(2) - gStruct.boxes(:,1) + 1; + currBoxesT(:,1) = oriImSize(2) - gStruct.boxes(:,3) + 1; + gStruct.boxes = currBoxesT; + image = currImT; + end + + if ismember(obj.datasetMode, {'train', 'val'}) + [boxesPrt, labelsPrt, overlapScoresPrt, regressionFactorsPrt] = obj.SamplePosAndNegFromGstructPrt(gStruct, obj.boxesPerIm); + [boxesObj, labelsObj, overlapScoresObj, regressionFactorsObj] = obj.SamplePosAndNegFromGstructObj(gStruct, obj.boxesPerIm); + + % Assign elements to cell array for use in training the network + % WHAT IF one batch is smaller than another? --> deal here + numElements = min(obj.boxesPerIm, size(boxesPrt,1)); + + % Input image and size are shared for Obj and Prt + numBatchFields = 4 + 4 * (2 + obj.boxRegress + obj.instanceWeighting); + batchData = cell(numBatchFields, 1); + idx = 1; + batchData{idx} = 'input'; idx = idx + 1; + batchData{idx} = image; idx = idx + 1; + batchData{idx} = 'labelPrt'; idx = idx + 1; + batchData{idx} = labelsPrt'; idx = idx + 1; + batchData{idx} = 'boxesPrt'; idx = idx + 1; + batchData{idx} = boxesPrt'; idx = idx + 1; + batchData{idx} = 'labelObj'; idx = idx + 1; + batchData{idx} = labelsObj'; idx = idx + 1; + batchData{idx} = 'boxesObj'; idx = idx + 1; + batchData{idx} = boxesObj'; idx = idx + 1; + batchData{idx} = 'oriImSize'; idx = idx + 1; + batchData{idx} = oriImSize; idx = idx + 1; + + if obj.boxRegress + batchData{idx} = 'regressionTargetsPrt'; idx = idx + 1; + batchData{idx} = regressionFactorsPrt'; idx = idx + 1; + batchData{idx} = 'regressionTargetsObj'; idx = idx + 1; + batchData{idx} = regressionFactorsObj'; idx = idx + 1; + end + if obj.instanceWeighting + instanceWeightsPrt = overlapScoresPrt; + instanceWeightsPrt(labelsPrt == 1) = 1; + instanceWeightsPrt = reshape(instanceWeightsPrt, [1 1 1 length(instanceWeightsPrt)]); % VL-Feat way + batchData{idx} = 'instanceWeightsPrt'; idx = idx + 1; + batchData{idx} = instanceWeightsPrt; %idx = idx + 1; + instanceWeightsObj = overlapScoresObj; + instanceWeightsObj(labelsObj == 1) = 1; + instanceWeightsObj = reshape(instanceWeightsObj, [1 1 1 length(instanceWeightsObj)]); % VL-Feat way + batchData{idx} = 'instanceWeightsObj'; idx = idx + 1; + batchData{idx} = instanceWeightsObj; %idx = idx + 1; + end + + else + % Test set. Get all boxes + boxesPrt = obj.SampleAllBoxesFromGstructPrt(gStruct); + boxesObj = obj.SampleAllBoxesFromGstructObj(gStruct); + + numElements = size(gStruct.boxes,1); + batchData{8} = oriImSize; + batchData{7} = 'oriImSize'; + batchData{6} = boxesObj'; + batchData{5} = 'boxesObj'; + batchData{4} = boxesPrt'; + batchData{3} = 'boxesPrt'; + batchData{2} = image; + batchData{1} = 'input'; + end + + end + + function [image, oriImSize] = LoadImage(obj, batchIdx, gpuMode) + % Loads an image from disk, resizes it, and subtracts the mean image + imageT = single(imread([obj.imageDir obj.data.(obj.datasetMode){batchIdx} obj.imExt])); + oriImSize = double(size(imageT)); + + % Black and white image + if numel(oriImSize) == 2 + imageT = cat(3, imageT,imageT,imageT); + oriImSize = double(size(imageT)); + end + + if numel(obj.meanIm) == 3 + for colourI = 1:3 + imageT(:,:,colourI) = imageT(:,:,colourI) - obj.meanIm(colourI); + end + else + imageT = imageT - imresize(obj.meanIm, [oriImSize(1) oriImSize(2)]); % Subtract mean im + end + + resizeFactor = 1000 / max(oriImSize(1:2)); + + if gpuMode + image = gpuArray(imageT); + image = imresize(image, resizeFactor); + else + image = imresize(imageT, resizeFactor, 'bilinear', 'antialiasing', false); + end + + end + + + function [boxes, labels, overlapScores, regressionTargets] = SamplePosAndNegFromGstructPrt(obj, gStruct, numSamples) + + % Consider only boxes for parts + allBoxes = gStruct.boxes(gStruct.boxesPrt,:); + overlap = gStruct.overlapPrt(gStruct.boxesPrt,:); + class = gStruct.classPrt(gStruct.boxesPrt,:); + numClasses = obj.numClassesPrt; + + % Get positive, negative, and true GT keys + [maxOverlap, classOverlap] = max(overlap, [], 2); + + posKeys = find(maxOverlap >= obj.posOverlap & class == 0); + negKeys = find(maxOverlap < obj.negOverlapRange(2) & maxOverlap >= obj.negOverlapRange(1) & class == 0); + gtKeys = find(class > 0); + + % If there are more gtKeys than the fraction of positives, just + % take a subset + if length(gtKeys) > numSamples * obj.posFraction + gtKeys = gtKeys(randperm(length(gtKeys), numSamples * obj.posFraction)); + end + + % Get correct number of positive and negative samples + numExtraPos = numSamples * obj.posFraction - length(gtKeys); + numExtraPos = min(numExtraPos, length(posKeys)); + if numExtraPos > 0 + posKeys = posKeys(randperm(length(posKeys), numExtraPos)); + else + numExtraPos = 0; + posKeys = []; + end + numNeg = numSamples - numExtraPos - length(gtKeys); + numNeg = min(numNeg, length(negKeys)); + negKeys = negKeys(randperm(length(negKeys), numNeg)); + + % Concatenate for final keys and labs + keys = cat(1, gtKeys, posKeys, negKeys); + labels = cat(1, class(gtKeys), classOverlap(posKeys), zeros(numNeg, 1)); + labels = single(labels + 1); % Add 1 for background class + boxes = allBoxes(keys,:); + + overlapScores = cat(1, ones(length(gtKeys),1), maxOverlap(posKeys), maxOverlap(negKeys)); + + % Calculate regression targets. + % Jasper: I simplify Girshick by implementing regression through four + % scalars which scale the box with respect to its center. + if nargout == 4 + % Create NaN array: nans represent numbers which will not be active + % in regression + regressionTargets = nan([size(boxes,1) 4 * numClasses], 'like', boxes); + + % Get scaling factors for all positive boxes + gtBoxes = allBoxes(gtKeys,:); + for bI = 1:length(gtKeys)+length(posKeys) + % Get current box and corresponding GT box + currPosBox = boxes(bI,:); + [~, gtI] = BoxBestOverlapFastRcnn(gtBoxes, currPosBox); + currGtBox = gtBoxes(gtI,:); + + % Get range of regression target based on the label of the gt box + targetRangeBegin = 4 * (labels(bI)-1)+1; + targetRange = targetRangeBegin:(targetRangeBegin+3); + + % Set regression targets + regressionTargets(bI,targetRange) = BoxRegressionTargetGirshick(currGtBox, currPosBox); + + end + end + end + + + function [boxes, labels, overlapScores, regressionTargets] = SamplePosAndNegFromGstructObj(obj, gStruct, numSamples) + + % Consider only boxes for parts + allBoxes = gStruct.boxes(gStruct.boxesObj,:); + overlap = gStruct.overlapObj(gStruct.boxesObj,:); + class = gStruct.classObj(gStruct.boxesObj,:); + numClasses = obj.numClassesObj; + + % Get positive, negative, and true GT keys + [maxOverlap, classOverlap] = max(overlap, [], 2); + + posKeys = find(maxOverlap >= obj.posOverlap & class == 0); + negKeys = find(maxOverlap < obj.negOverlapRange(2) & maxOverlap >= obj.negOverlapRange(1) & class == 0); + gtKeys = find(class > 0); + + % If there are more gtKeys than the fraction of positives, just + % take a subset + if length(gtKeys) > numSamples * obj.posFraction + gtKeys = gtKeys(randperm(length(gtKeys), numSamples * obj.posFraction)); + end + + % Get correct number of positive and negative samples + numExtraPos = numSamples * obj.posFraction - length(gtKeys); + numExtraPos = min(numExtraPos, length(posKeys)); + if numExtraPos > 0 + posKeys = posKeys(randperm(length(posKeys), numExtraPos)); + else + numExtraPos = 0; + posKeys = []; + end + numNeg = numSamples - numExtraPos - length(gtKeys); + numNeg = min(numNeg, length(negKeys)); + negKeys = negKeys(randperm(length(negKeys), numNeg)); + + % Concatenate for final keys and labs + keys = cat(1, gtKeys, posKeys, negKeys); + labels = cat(1, class(gtKeys), classOverlap(posKeys), zeros(numNeg, 1)); + labels = single(labels + 1); % Add 1 for background class + boxes = allBoxes(keys,:); + + overlapScores = cat(1, ones(length(gtKeys),1), maxOverlap(posKeys), maxOverlap(negKeys)); + + % Calculate regression targets. + % Jasper: I simplify Girshick by implementing regression through four + % scalars which scale the box with respect to its center. + if nargout == 4 + % Create NaN array: nans represent numbers which will not be active + % in regression + regressionTargets = nan([size(boxes,1) 4 * numClasses], 'like', boxes); + + % Get scaling factors for all positive boxes + gtBoxes = allBoxes(gtKeys,:); + for bI = 1:length(gtKeys)+length(posKeys) + % Get current box and corresponding GT box + currPosBox = boxes(bI,:); + [~, gtI] = BoxBestOverlapFastRcnn(gtBoxes, currPosBox); + currGtBox = gtBoxes(gtI,:); + + % Get range of regression target based on the label of the gt box + targetRangeBegin = 4 * (labels(bI)-1)+1; + targetRange = targetRangeBegin:(targetRangeBegin+3); + + % Set regression targets + regressionTargets(bI,targetRange) = BoxRegressionTargetGirshick(currGtBox, currPosBox); + + end + end + end + + + function boxes = SampleAllBoxesFromGstructPrt(~,gStruct) + boxes = gStruct.boxes(gStruct.boxesPrt,:); + end + + function boxes = SampleAllBoxesFromGstructObj(~,gStruct) + boxes = gStruct.boxes(gStruct.boxesObj,:); + end + + % Load gStruct + function gStruct = LoadGStruct(obj,imI) + gStruct = load([obj.matBoxDir obj.data.(obj.datasetMode){imI} '.mat']); + + % Make sure that no GT boxes/labels/etc are given when using test phase + if strcmp(obj.datasetMode, 'test') + goodIds = ~(gStruct.gtPrt | gStruct.gtObj); + gStruct.gtPrt = gStruct.gtPrt(goodIds,:); + gStruct.gtObj = gStruct.gtObj(goodIds,:); + + gStruct.overlapPrt = gStruct.overlapPrt(goodIds,:); + gStruct.overlapObj = gStruct.overlapObj(goodIds,:); + + gStruct.boxes = gStruct.boxes(goodIds,:); + gStruct.boxesPrt = gStruct.boxesPrt(goodIds,:); + gStruct.boxesObj = gStruct.boxesObj(goodIds,:); + + gStruct.classPrt = gStruct.classPrt(goodIds,:); + gStruct.classObj = gStruct.classObj(goodIds,:); + + gStruct.insideness = gStruct.insideness(goodIds,goodIds); + end + + % Remove small boxes + [nR, nC] = BoxSize(gStruct.boxes); + badI = ((nR < obj.minBoxSize) | (nC < obj.minBoxSize)) & ~(gStruct.gtPrt | gStruct.gtObj) ; + gStruct.gtPrt = gStruct.gtPrt(~badI,:); + gStruct.gtObj = gStruct.gtObj(~badI,:); + + gStruct.overlapPrt = gStruct.overlapPrt(~badI,:); + gStruct.overlapObj = gStruct.overlapObj(~badI,:); + + gStruct.boxes = gStruct.boxes(~badI,:); + gStruct.boxesPrt = gStruct.boxesPrt(~badI,:); + gStruct.boxesObj = gStruct.boxesObj(~badI,:); + + gStruct.classPrt = gStruct.classPrt(~badI,:); + gStruct.classObj = gStruct.classObj(~badI,:); + + gStruct.insideness = gStruct.insideness(~badI,~badI); + + % Copy one of the overlap vars into 'overlap' for ImdbMatbox + % initialization of number of classes + gStruct.overlap = gStruct.overlapObj; + end + + function SetBoxRegress(obj, doRegress) + obj.boxRegress = doRegress; + end + + function SetInstanceWeighting(obj, doInstanceWeighting) + obj.instanceWeighting = doInstanceWeighting; + end + + function SetPosOverlap(obj, posOverlap) + obj.posOverlap = posOverlap; + end + + function SetNegOverlapRange(obj, negOverlapRange) + obj.negOverlapRange = negOverlapRange; + end + + end % End methods +end % End classdef diff --git a/matconvnet-calvin/examples/parts/misc/getSegmentBoundingBox.m b/matconvnet-calvin/examples/parts/misc/getSegmentBoundingBox.m new file mode 100644 index 0000000..7f35c3a --- /dev/null +++ b/matconvnet-calvin/examples/parts/misc/getSegmentBoundingBox.m @@ -0,0 +1,20 @@ +% Returns the smallest possible bounding box containing all true pixels +function bounding_boxes = getSegmentBoundingBox(input) + if ~iscell(input) + bounding_boxes = segmentation_bounding_box(input); + else + frames_num = length(input); + bounding_boxes = cell(frames_num, 1); + for frame = 1: frames_num + bounding_boxes{frame} = ... + segmentation_bounding_box(input{frame}); + end + end +end + +function bounding_box = segmentation_bounding_box(segmentation) + [xCoord, yCoord] = find(segmentation); + upperCorner = [min(yCoord), min(xCoord)]; + lowerCorner = [max(yCoord), max(xCoord)]; + bounding_box = [upperCorner, lowerCorner]; +end \ No newline at end of file diff --git a/matconvnet-calvin/examples/parts/part2ind_4imdb.m b/matconvnet-calvin/examples/parts/part2ind_4imdb.m new file mode 100644 index 0000000..55c975f --- /dev/null +++ b/matconvnet-calvin/examples/parts/part2ind_4imdb.m @@ -0,0 +1,369 @@ +function pimap = part2ind_4imdb() +% Define the part index of each objects. +% One can merge different parts by using the same index for the +% parts that are desired to be merged. +% For example, one can merge +% the left lower leg (llleg) and the left upper leg (luleg) of person by setting: +% pimap{15}('llleg') = 19; % left lower l eg +% pimap{15}('luleg') = 19; % left upper leg + +pimap = cell(20, 1); +% Will define part index map for the 20 PASCAL VOC object classes in ascending +% alphabetical order (the standard PASCAL VOC order). +for ii = 1:20 + pimap{ii} = containers.Map('KeyType','char','ValueType','int32'); +end + +% [1aeroplane] +pimap{1}('body') = 1; +pimap{1}('stern') = 2; + +pimap{1}('lwing') = 3; % left wing +pimap{1}('rwing') = 3; % right wing + +% pimap{1}('tail') = 4; + +for ii = 1:10 + pimap{1}(sprintf('engine_%d', ii)) = 4; % multiple engines +end +% for ii = 1:10 +% pimap{1}(sprintf('wheel_%d', ii)) = 6; % multiple wheels +% end + +% [2bicycle] +pimap{2}('fwheel') = 1; % front wheel +pimap{2}('bwheel') = 1; % back wheel + +pimap{2}('saddle') = 2; +pimap{2}('handlebar') = 3; % handle bar +pimap{2}('chainwheel') = 4; % chain wheel +% +% for ii = 1:10 +% pimap{2}(sprintf('headlight_%d', ii)) = 5; +% end + +% [3bird] +pimap{3}('head') = 1; + +% pimap{3}('leye') = 2; % left eye +% pimap{3}('reye') = 2; % right eye + +pimap{3}('beak') = 2; +pimap{3}('torso') = 3; +pimap{3}('neck') = 4; + +pimap{3}('lwing') = 5; % left wing +pimap{3}('rwing') = 5; % right wing + +pimap{3}('lleg') = 6; % left leg +pimap{3}('rleg') = 6; % right leg + +pimap{3}('rfoot') = 7; % right foot +pimap{3}('lfoot') = 7; % left foot + +pimap{3}('tail') = 8; + +% [4boat] +% only has silhouette mask + +% [5bottle] +pimap{5}('cap') = 1; +pimap{5}('body') = 2; + +% [6bus] +pimap{6}('frontside') = 1; +pimap{6}('leftside') = 2; +pimap{6}('rightside') = 3; +pimap{6}('backside') = 4; +pimap{6}('roofside') = 5; + +pimap{6}('leftmirror') = 6; +pimap{6}('rightmirror') = 6; + +pimap{6}('fliplate') = 7; % front license plate +pimap{6}('bliplate') = 7; % back license plate + +for ii = 1:10 + pimap{6}(sprintf('door_%d',ii)) = 8; +end +for ii = 1:10 + pimap{6}(sprintf('wheel_%d',ii)) = 9; +end +% for ii = 1:10 +% pimap{6}(sprintf('headlight_%d',ii)) = 10; +% end +for ii = 1:20 + pimap{6}(sprintf('window_%d',ii)) = 10; +end + +% [7car] +pimap{7}('frontside') = 1; +pimap{7}('leftside') = 2; +pimap{7}('rightside') = 3; +pimap{7}('backside') = 4; +pimap{7}('roofside') = 5; + +% pimap{7}('leftmirror') = 6; +% pimap{7}('rightmirror') = 6; + +pimap{7}('fliplate') = 6; % front license plate +pimap{7}('bliplate') = 6; % back license plate + +for ii = 1:10 + pimap{7}(sprintf('door_%d',ii)) = 7; +end +for ii = 1:10 + pimap{7}(sprintf('wheel_%d',ii)) = 8; +end +for ii = 1:10 + pimap{7}(sprintf('headlight_%d',ii)) = 9; +end +for ii = 1:20 + pimap{7}(sprintf('window_%d',ii)) = 10; +end + +% [8cat] +pimap{8}('head') = 1; + +pimap{8}('leye') = 2; % left eye +pimap{8}('reye') = 2; % right eye + +pimap{8}('lear') = 3; % left ear +pimap{8}('rear') = 3; % right ear + +pimap{8}('nose') = 4; +pimap{8}('torso') = 5; +pimap{8}('neck') = 6; + +pimap{8}('lfleg') = 7; % left front leg +pimap{8}('rfleg') = 7; % right front leg +pimap{8}('lbleg') = 7; % left back leg +pimap{8}('rbleg') = 7; % right back leg + +pimap{8}('lfpa') = 8; % left front paw +pimap{8}('rfpa') = 8; % right front paw +pimap{8}('lbpa') = 8; % left back paw +pimap{8}('rbpa') = 8; % right back paw + +pimap{8}('tail') = 9; + +% [9chair] +% only has sihouette mask + +% [10cow] +pimap{10}('head') = 1; + +% pimap{10}('leye') = 2; % left eye +% pimap{10}('reye') = 2; % right eye + +pimap{10}('lear') = 2; % left ear +pimap{10}('rear') = 2; % right ear + +pimap{10}('muzzle') = 3; + +pimap{10}('lhorn') = 4; % left horn +pimap{10}('rhorn') = 4; % right horn + +pimap{10}('torso') = 5; +pimap{10}('neck') = 6; + +pimap{10}('lfuleg') = 7; % left front upper leg +pimap{10}('lflleg') = 7; % left front lower leg + +pimap{10}('rfuleg') = 7; % right front upper leg +pimap{10}('rflleg') = 7; % right front lower leg + +pimap{10}('lbuleg') = 7; % left back upper leg +pimap{10}('lblleg') = 7; % left back lower leg + +pimap{10}('rbuleg') = 7; % right back upper leg +pimap{10}('rblleg') = 7; % right back lower leg + +pimap{10}('tail') = 8; + +% [11diningtable] +% only has silhouette mask + +% [12dog] +pimap{12}('head') = 1; + +% pimap{12}('leye') = 2; % left eye +% pimap{12}('reye') = 2; % right eye + +pimap{12}('lear') = 2; % left ear +pimap{12}('rear') = 2; % right ear + +pimap{12}('nose') = 3; +pimap{12}('torso') = 4; +pimap{12}('neck') = 5; + +pimap{12}('lfleg') = 6; % left front leg +pimap{12}('rfleg') = 6; % right front leg +pimap{12}('lbleg') = 6; % left back leg +pimap{12}('rbleg') = 6; % right back leg + +pimap{12}('lfpa') = 7; % left front paw +pimap{12}('rfpa') = 7; % right front paw +pimap{12}('lbpa') = 7; % left back paw +pimap{12}('rbpa') = 7; % right back paw + +pimap{12}('tail') = 8; +pimap{12}('muzzle') = 9; % muzzle + + +% [13horse] +pimap{13}('head') = 1; +% +% pimap{13}('leye') = 2; % left eye +% pimap{13}('reye') = 2; % right eye + +pimap{13}('lear') = 2; % left ear +pimap{13}('rear') = 2; % right ear + +pimap{13}('muzzle') = 3; +pimap{13}('torso') = 4; +pimap{13}('neck') = 5; + +pimap{13}('lfuleg') = 6; % left front upper leg +pimap{13}('lflleg') = 6; % left front lower leg + +pimap{13}('rfuleg') = 6; % right front upper leg +pimap{13}('rflleg') = 6; % right front lower leg + +pimap{13}('lbuleg') = 6; % left back upper leg +pimap{13}('lblleg') = 6; % left back lower leg + +pimap{13}('rbuleg') = 6; % right back upper leg +pimap{13}('rblleg') = 6; % right back lower leg + +pimap{13}('tail') = 7; +% +% pimap{13}('lfho') = 9; % hoof +% pimap{13}('rfho') = 9; +% pimap{13}('lbho') = 9; +% pimap{13}('rbho') = 9; + +% [14motorbike] +pimap{14}('fwheel') = 1; +pimap{14}('bwheel') = 1; + +pimap{14}('handlebar') = 2; +% pimap{14}('saddle') = 3; +for ii = 1:10 + pimap{14}(sprintf('headlight_%d', ii)) = 3; +end + +% [15person] +pimap{15}('head') = 1; +% +% pimap{15}('leye') = 2; % left eye +% pimap{15}('reye') = 2; % right eye +% +% pimap{15}('lear') = 3; % left ear +% pimap{15}('rear') = 3; % right ear +% +% pimap{15}('lebrow') = 4; % left eyebrow +% pimap{15}('rebrow') = 4; % right eyebrow +% +% pimap{15}('nose') = 5; +% pimap{15}('mouth') = 6; +pimap{15}('hair') = 2; +pimap{15}('torso') = 3; +pimap{15}('neck') = 4; + +pimap{15}('llarm') = 5; % left lower arm +pimap{15}('luarm') = 5; % left upper arm + +pimap{15}('rlarm') = 5; % right lower arm +pimap{15}('ruarm') = 5; % right upper arm + +pimap{15}('lhand') = 6; % left hand +pimap{15}('rhand') = 6; % right hand + +pimap{15}('llleg') = 7; % left lower leg +pimap{15}('luleg') = 7; % left upper leg + +pimap{15}('rlleg') = 7; % right lower leg +pimap{15}('ruleg') = 7; % right upper leg + +pimap{15}('lfoot') = 8; % left foot +pimap{15}('rfoot') = 8; % right foot + +% [16pottedplant] +pimap{16}('pot') = 1; +pimap{16}('plant') = 2; + +% [17sheep] +pimap{17}('head') = 1; + +% pimap{17}('leye') = 2; % left eye +% pimap{17}('reye') = 2; % right eye + +pimap{17}('lear') = 2; % left ear +pimap{17}('rear') = 2; % right ear + +pimap{17}('muzzle') = 3; + +pimap{17}('lhorn') = 4; % left horn +pimap{17}('rhorn') = 4; % right horn + +pimap{17}('torso') = 5; +pimap{17}('neck') = 6; + +pimap{17}('lfuleg') = 7; % left front upper leg +pimap{17}('lflleg') = 7; % left front lower leg + +pimap{17}('rfuleg') = 7; % right front upper leg +pimap{17}('rflleg') = 7; % right front lower leg + +pimap{17}('lbuleg') = 7; % left back upper leg +pimap{17}('lblleg') = 7; % left back lower leg + +pimap{17}('rbuleg') = 7; % right back upper leg +pimap{17}('rblleg') = 7; % right back lower leg + +pimap{17}('tail') = 8; + +% [18sofa] +% only has sihouette mask + +% [19train] +pimap{19}('head') = 1; +pimap{19}('hfrontside') = 2; % head front side +pimap{19}('hleftside') = 3; % head left side +pimap{19}('hrightside') = 4; % head right side +% pimap{19}('hbackside') = 5; % head back side +pimap{19}('hroofside') = 5; % head roof side +% +for ii = 1:10 + pimap{19}(sprintf('headlight_%d',ii)) = 6; +end + +for ii = 1:10 + pimap{19}(sprintf('coach_%d',ii)) = 7; +end + +for ii = 1:10 + pimap{19}(sprintf('cfrontside_%d', ii)) = 8; % coach front side +end + +for ii = 1:10 + pimap{19}(sprintf('cleftside_%d', ii)) = 9; % coach left side +end + +for ii = 1:10 + pimap{19}(sprintf('crightside_%d', ii)) = 10; % coach right side +end + +for ii = 1:10 + pimap{19}(sprintf('cbackside_%d', ii)) = 11; % coach back side +end + +for ii = 1:10 + pimap{19}(sprintf('croofside_%d', ii)) = 12; % coach roof side +end + + +% [20tvmonitor] +pimap{20}('screen') = 1; + diff --git a/matconvnet-calvin/examples/parts/part2names_4imdb.m b/matconvnet-calvin/examples/parts/part2names_4imdb.m new file mode 100644 index 0000000..c65f5fe --- /dev/null +++ b/matconvnet-calvin/examples/parts/part2names_4imdb.m @@ -0,0 +1,163 @@ +function pimap = part2names_4imdb() + +% Commented out parts are too tiny or rare to be considered + +pimap = cell(20, 1); +for ii = 1:20 + pimap{ii} = containers.Map('KeyType','char','ValueType','int32'); +end + +% [1aeroplane] +pimap{1}('body') = 1; +pimap{1}('stern') = 2; +pimap{1}('wing') = 3; +pimap{1}('engine') = 4; + +% [2bicycle] +pimap{2}('wheel') = 1; +pimap{2}('saddle') = 2; +pimap{2}('handlebar') = 3; +pimap{2}('chainwheel') = 4; + +% [3bird] +pimap{3}('head') = 1; +pimap{3}('beak') = 2; +pimap{3}('torso') = 3; +pimap{3}('neck') = 4; +pimap{3}('wing') = 5; +pimap{3}('leg') = 6; +pimap{3}('foot') = 7; +pimap{3}('tail') = 8; + +% [4boat] +% only has silhouette mask + +% [5bottle] +pimap{5}('cap') = 1; +pimap{5}('body') = 2; + +% [6bus] +pimap{6}('frontside') = 1; +pimap{6}('leftside') = 2; +pimap{6}('rightside') = 3; +pimap{6}('backside') = 4; +pimap{6}('roofside') = 5; +pimap{6}('mirror') = 6; +pimap{6}('liplate') = 7; +pimap{6}('door') = 8; +pimap{6}('wheel') = 9; +pimap{6}('window') = 10; + +% [7car] +pimap{7}('frontside') = 1; +pimap{7}('leftside') = 2; +pimap{7}('rightside') = 3; +pimap{7}('backside') = 4; +pimap{7}('roofside') = 5; +% pimap{7}('mirror') = 6; +pimap{7}('liplate') = 6; +pimap{7}('door') = 7; +pimap{7}('wheel') = 8; +pimap{7}('headlight') = 9; +pimap{7}('window') = 10; + +% [8cat] +pimap{8}('head') = 1; +pimap{8}('eye') = 2; +pimap{8}('ear') = 3; +pimap{8}('nose') = 4; +pimap{8}('torso') = 5; +pimap{8}('neck') = 6; +pimap{8}('leg') = 7; +pimap{8}('paw') = 8; +pimap{8}('tail') = 9; + +% [9chair] +% only has sihouette mask + +% [10cow] +pimap{10}('head') = 1; +% pimap{10}('eye') = 2; +pimap{10}('ear') = 2; +pimap{10}('muzzle') = 3; +pimap{10}('horn') = 4; +pimap{10}('torso') = 5; +pimap{10}('neck') = 6; +pimap{10}('leg') = 7; +pimap{10}('tail') = 8; + +% [11diningtable] +% only has silhouette mask + +% [12dog] +pimap{12}('head') = 1; +pimap{12}('ear') = 2; +pimap{12}('nose') = 3; +pimap{12}('torso') = 4; +pimap{12}('neck') = 5; +pimap{12}('leg') = 6; +pimap{12}('paw') = 7; +pimap{12}('tail') = 8; +pimap{12}('muzzle') = 9; + + +% [13horse] +pimap{13}('head') = 1; +% pimap{13}('eye') = 2; +pimap{13}('ear') = 2; +pimap{13}('muzzle') = 3; +pimap{13}('torso') = 4; +pimap{13}('neck') = 5; +pimap{13}('leg') = 6; +% pimap{13}('hoof') = 7; +pimap{13}('tail') = 7; + +% [14motorbike] +pimap{14}('wheel') = 1; +pimap{14}('handlebar') = 2; +pimap{14}('headlight') = 3; + +% [15person] +pimap{15}('head') = 1; +pimap{15}('hair') = 2; +pimap{15}('torso') = 3; +pimap{15}('neck') = 4; +pimap{15}('arm') = 5; +pimap{15}('hand') = 6; +pimap{15}('leg') = 7; +pimap{15}('foot') = 8; + +% [16pottedplant] +pimap{16}('pot') = 1; +pimap{16}('plant') = 2; + +% [17sheep] +pimap{17}('head') = 1; +pimap{17}('ear') = 2; +pimap{17}('muzzle') = 3; +pimap{17}('horn') = 4; + +pimap{17}('torso') = 5; +pimap{17}('neck') = 6; +pimap{17}('leg') = 7; +pimap{17}('tail') = 8; + +% [18sofa] +% only has sihouette mask + +% [19train] +pimap{19}('head') = 1; +pimap{19}('hfrontside') = 2; +pimap{19}('hleftside') = 3; +pimap{19}('hrightside') = 4; +pimap{19}('hroofside') = 5; +pimap{19}('headlight') = 6; +pimap{19}('coach') = 7; +pimap{19}('cfrontside') = 8; % coach front side +pimap{19}('cleftside') = 9; +pimap{19}('crightside') = 10; +pimap{19}('cbackside') = 11; +pimap{19}('croofside') = 12; + +% [20tvmonitor] +pimap{20}('screen') = 1; \ No newline at end of file diff --git a/matconvnet-calvin/examples/parts/saveGStructs.m b/matconvnet-calvin/examples/parts/saveGStructs.m new file mode 100644 index 0000000..1aa193a --- /dev/null +++ b/matconvnet-calvin/examples/parts/saveGStructs.m @@ -0,0 +1,102 @@ +function saveGStructs(imSet) + +global DATAopts; + + +trash = load(sprintf(DATAopts.imdb, imSet)); +imdb = trash.imdb; +clear trash; +allIms = imdb.image_ids; + + +% Path where to store the Gstructs +mkdir(DATAopts.gStructPath); + +offsetsPartIdx = [0; cumsum(cellfun(@(x) size(x,1), imdb.prt_classes))]; +offsetsPartIdx = offsetsPartIdx(1:end-1); + + +saveDir = '/home/abel/LocalData/Graphics/GStructsObjPrt-FromScratch'; +mkdir(saveDir); + + +for idxImg = 1:size(allIms,1) + fprintf('Processing img: %d/%d\n', idxImg, size(allIms,1)); + + % Get Selective Search proposals, change to x,y order + im = imread(sprintf(DATAopts.imgpath, allIms{idxImg})); + im = im2double(im); + selectiveSearchBoxes = selective_search_boxes_min(im, true, 500, 10); + + ssBoxes = selectiveSearchBoxes(:, [2 1 4 3]); + + % Logical variables that indicate which ssBoxes should be used for + % parts or objs + prtBoxes = true(size(ssBoxes,1),1); + + % MAKE THIS SMALLER, REMOVE SMALL PROPOSALS + objBoxes = true(size(ssBoxes,1),1); + + % Object GTs + objGTBoxes = imdb.objects{idxImg}.bbox; + objClass = imdb.objects{idxImg}.class_id; + numObjGTs = size(objGTBoxes,1); + + + % Part GTs + prt_boxes = cell(size(imdb.parts{idxImg},1),1); + prt_cls_gt = cell(size(imdb.parts{idxImg},1),1); + for kk = 1:size(imdb.parts{idxImg},1) + if size(imdb.parts{idxImg}{kk},1) > 0 + prt_boxes{kk} = imdb.parts{idxImg}{kk}.bbox; + % Need double conversion so cell2mat doesn't crash if empty + prt_cls_gt{kk} = double(offsetsPartIdx(imdb.objects{idxImg}.class_id(kk)) + imdb.parts{idxImg}{kk}.class_id); + end + end + + prtGTBoxes = cell2mat(prt_boxes); + prtClass = cell2mat(prt_cls_gt); + numPrtGTs = size(prtGTBoxes,1); + + + % Boxes variable contains ALL boxes + boxStruct.boxes = single([prtGTBoxes; objGTBoxes; ssBoxes]); + + % Total number of boxes + numBoxes = size(boxStruct.boxes,1); + + % Indicate whether boxes are for objects or parts + boxStruct.boxesPrt = [true(numPrtGTs,1); false(numObjGTs,1); prtBoxes]; + boxStruct.boxesObj = [false(numPrtGTs,1); true(numObjGTs,1); objBoxes]; + + % Indicate which boxes are gts for objects or parts + boxStruct.gtPrt = [true(numPrtGTs,1); false(numObjGTs,1); false(size(ssBoxes,1),1)]; + boxStruct.gtObj = [false(numPrtGTs,1); true(numObjGTs,1); false(size(ssBoxes,1),1)]; + + % Save class indices + boxStruct.classPrt = uint16(zeros(numBoxes,1)); + boxStruct.classPrt(boxStruct.gtPrt) = prtClass; + + boxStruct.classObj = uint16(zeros(numBoxes,1)); + boxStruct.classObj(boxStruct.gtObj) = objClass; + + % Compute overlaps + boxStruct.overlapPrt = zeros(numBoxes, imdb.prt_num_classes, 'single'); + % Get overlap wrt ground truth boxes + for ii = 1:numPrtGTs + boxStruct.overlapPrt(:, prtClass(ii)) = ... + max(boxStruct.overlapPrt(:, prtClass(ii)), BoxOverlap(boxStruct.boxes, prtGTBoxes(ii, :))); + end + + boxStruct.overlapObj = zeros(numBoxes, imdb.obj_num_classes, 'single'); + % Get overlap wrt ground truth boxes + for ii = 1:numObjGTs + boxStruct.overlapObj(:, objClass(ii)) = ... + max(boxStruct.overlapObj(:, objClass(ii)), BoxOverlap(boxStruct.boxes, objGTBoxes(ii, :))); + end + + % Compute insideness of boxes + boxStruct.insideness = single(computeIoATableSingle(boxStruct.boxes, boxStruct.boxes)); + + save([DATAopts.gStructPath allIms{idxImg} '.mat'], '-struct', 'boxStruct'); +end diff --git a/matconvnet-calvin/examples/parts/selective_search_boxes_min.m b/matconvnet-calvin/examples/parts/selective_search_boxes_min.m new file mode 100644 index 0000000..fff7d07 --- /dev/null +++ b/matconvnet-calvin/examples/parts/selective_search_boxes_min.m @@ -0,0 +1,87 @@ +function [boxes, blobIndIm, blobIndBoxes, hierarchy] = selective_search_boxes_min(im, fast_mode, im_width, minBoxWidth) +% Girshick Wrapper +% +% Based on the demo.m file included in the Selective Search +% IJCV code. +% +% Requires selective search code. + +if ~exist('fast_mode', 'var') || isempty(fast_mode) + fast_mode = true; +end + +originalImSize = size(im); + +if ~exist('im_width', 'var') || isempty(im_width) + im_width = []; + scale = 1; +else + scale = size(im, 2) / im_width; +end + +if scale ~= 1 + im = imresize(im, [NaN im_width]); +end + +% After segmentation, filter out boxes which have a width/height smaller +% than minBoxWidth (default = 20 pixels). +if ~exist('minBoxWidth', 'var') || isempty(minBoxWidth) + minBoxWidth = 20; +end + +% Parameters. Note that this controls the number of hierarchical +% segmentations which are combined. +colorTypes = {'Hsv', 'Lab', 'RGI', 'H', 'Intensity'}; + +% Here you specify which similarity functions to use in merging +simFunctionHandles = {@SSSimColourTextureSizeFillOrig, ... + @SSSimTextureSizeFill, ... + @SSSimBoxFillOrig, ... + @SSSimSize}; + +% Thresholds for the Felzenszwalb and Huttenlocher segmentation algorithm. +% Note that by default, we set minSize = k, and sigma = 0.8. +% controls size of segments of initial segmentation. +ks = [50 100 150 300]; +sigma = 0.8; + +% Comment the following three lines for the 'quality' version +if fast_mode + colorTypes = colorTypes(1:2); % 'Fast' uses HSV and Lab + simFunctionHandles = simFunctionHandles(1:2); % Two different merging strategies + ks = ks(1:2); +end + +idx = 1; +for j = 1:length(ks) + k = ks(j); % Segmentation threshold k + minSize = k; % We set minSize = k + for n = 1:length(colorTypes) + colorType = colorTypes{n}; + [boxesT{idx} blobIndIm{idx} blobIndBoxes{idx} hierarchy{idx} priorityT{idx}] = ... + Image2HierarchicalGrouping(im, sigma, k, minSize, colorType, simFunctionHandles); + idx = idx + 1; + end +end +boxes = cat(1, boxesT{:}); % Concatenate boxes from all hierarchies +priority = cat(1, priorityT{:}); % Concatenate priorities + +% Do pseudo random sorting as in paper +priority = priority .* rand(size(priority)); +[priority sortIds] = sort(priority, 'ascend'); +boxes = boxes(sortIds,:); + +boxes = BoxRemoveDuplicates(boxes); + +if scale ~= 1 + boxes = floor((boxes - 1) * scale + 1); + for iii = 1:length(blobIndBoxes) + blobIndBoxes{iii} = floor((blobIndBoxes{iii} - 1) * scale + 1); + blobIndIm{iii} = imresize(blobIndIm{iii}, [originalImSize(1) originalImSize(2)], 'nearest'); + end +end + +% Filter width of boxes +[nr, nc] = BoxSize(boxes); +idsGood = (nr >= minBoxWidth) & (nc >= minBoxWidth); +boxes = boxes(idsGood,:); diff --git a/matconvnet-calvin/examples/parts/setupDataOptsPrts.m b/matconvnet-calvin/examples/parts/setupDataOptsPrts.m new file mode 100644 index 0000000..e4f5517 --- /dev/null +++ b/matconvnet-calvin/examples/parts/setupDataOptsPrts.m @@ -0,0 +1,45 @@ +function setupDataOptsPrts(vocYear, testName, datasetDir) + +global DATAopts; + +% Setup VOC data +devkitroot = [datasetDir, 'VOCdevkit', '/']; +DATAopts.year = vocYear; +DATAopts.dataset = sprintf('VOC%d', DATAopts.year); +DATAopts.datadir = [devkitroot, DATAopts.dataset, '/']; +DATAopts.resdir = [devkitroot, 'results', '/', DATAopts.dataset '/']; +DATAopts.localdir = [devkitroot, 'local', '/', DATAopts.dataset, '/']; +DATAopts.imdb = [DATAopts.datadir, '/imdb-%s.mat']; +DATAopts.gStructPath = [DATAopts.resdir, 'GStructs', '/']; +DATAopts.imgsetpath = [DATAopts.datadir, 'ImageSets', '/', 'Main', '/', '%s.txt']; +DATAopts.imgpath = [DATAopts.datadir, 'JPEGImages', '/', '%s.jpg']; +DATAopts.clsimgsetpath = [DATAopts.datadir, 'ImageSets', '/', 'Main', '/', '%s_%s.txt']; +DATAopts.annopath_obj = [DATAopts.datadir, 'Annotations', '/', '%s.xml']; +% Keep object annopath as standard for ap evaluation +DATAopts.annopath = DATAopts.annopath_obj; +DATAopts.annopath_prt = [DATAopts.datadir, 'Annotations_Part', '/', '%s.mat']; +DATAopts.annocachepath = [DATAopts.localdir, '%s_anno.mat']; +DATAopts.classes={... + 'aeroplane' + 'bicycle' + 'bird' + 'boat' + 'bottle' + 'bus' + 'car' + 'cat' + 'chair' + 'cow' + 'diningtable' + 'dog' + 'horse' + 'motorbike' + 'person' + 'pottedplant' + 'sheep' + 'sofa' + 'train' + 'tvmonitor'}; +DATAopts.nclasses = length(DATAopts.classes); +DATAopts.testset = testName; +DATAopts.minoverlap = 0.5; \ No newline at end of file diff --git a/matconvnet-calvin/examples/parts/setupImdbPartDetection.m b/matconvnet-calvin/examples/parts/setupImdbPartDetection.m new file mode 100644 index 0000000..8fde268 --- /dev/null +++ b/matconvnet-calvin/examples/parts/setupImdbPartDetection.m @@ -0,0 +1,40 @@ +function[imdb] = setupImdbPartDetection(trainName, testName, net) +% [imdb] = setupImdbDetection(trainName, testName, net) + +global DATAopts; + +%%% Setup the Imdb +% Get and test images from imbd +trash = load(sprintf(DATAopts.imdb, trainName)); +imdbTrain = trash.imdb; +trash = load(sprintf(DATAopts.imdb, testName)); +imdbTest = trash.imdb; +clear trash; +% Consider only images with at least one part +trainIms = imdbTrain.image_ids(unique(imdbTrain.mapping(:,4))); +testIms = imdbTest.image_ids(unique(imdbTest.mapping(:,4))); + +% Make train, val, and test set. For Pascal, I illegally use part of the test images +% as validation set. This is to match Girshick performance while still having +% meaningful graphs for the validation set. +% Note: allIms are just all images. datasetIdx determines how these are divided over +% train, val, and test. +allIms = cat(1, trainIms, testIms); +datasetIdx = cell(3, 1); +datasetIdx{1} = (1:length(trainIms))'; % Jasper: Use all training images. Only for comparison Pascal Girshick +datasetIdx{2} = (length(trainIms)+1:length(trainIms)+501)'; % Use part of the test images for validation. Not entirely legal, but otherwise it will take much longer to get where we want. +datasetIdx{3} = (length(trainIms)+1:length(allIms))'; + +imdb = ImdbPartDetectionJointObjPrt(DATAopts.imgpath(1:end-6), ... % path + DATAopts.imgpath(end-3:end), ... % image extension + DATAopts.gStructPath, ... % gStruct path + allIms, ... % all images + datasetIdx, ... % division into train/val/test + net.meta.normalization.averageImage); % average image used to pretrain network + +% Usually instance weighting gives better performance. But not Girshick style +% imdbPascal.SetInstanceWeighting(true); + +% Store lists for use in eval +imdb.misc.trainIms = trainIms; +imdb.misc.testIms = testIms; \ No newline at end of file diff --git a/matconvnet-calvin/examples/parts/testPartDetection.m b/matconvnet-calvin/examples/parts/testPartDetection.m new file mode 100644 index 0000000..96b78ae --- /dev/null +++ b/matconvnet-calvin/examples/parts/testPartDetection.m @@ -0,0 +1,153 @@ +function [results] = testPartDetection(imdb, nnOpts, net, inputs, ~) +% [results] = testDetection(imdb, nnOpts, net, inputs, ~) +% +% Get predicted boxes and scores per class +% Only gets top nnOpts.maxNumBoxesPerImTest boxes (default: 5000) +% Only gets boxes with score higher than nnOpts.minDetectionScore (default: 0.01) +% NMS threshold: nnOpts.nmsTTest (default: 0.3) + +% Variables which should probably be in imdb.nnOpts or something +% Jasper: Probably need to do something more robust here +% +% Copyright by Jasper Uijlings, 2015 + +if isfield(nnOpts, 'maxNumBoxesPerImTest') + maxNumBoxesPerImTest = nnOpts.maxNumBoxesPerImTest; +else + maxNumBoxesPerImTest = 5000; +end + +if isfield(nnOpts, 'nmsTTest') + nmsTTest = imdb.nmsTTest; +else + nmsTTest = 0.3; % non-maximum threshold +end + +if isfield(nnOpts, 'minDetectionScore') + minDetectionScore = nnOpts.minDetectionScore; +else + minDetectionScore = 0.01; +end + +%% Parts + +% Get scores +vI = net.getVarIndex('scoresPrt'); +scoresStruct = net.vars(vI); +scores = permute(scoresStruct.value, [4 3 2 1]); + +% Get boxes +inputNames = inputs(1:2:end); +[~, boxI] = ismember('boxesPrt', inputNames); +boxI = boxI * 2; % Index of actual argument +boxes = inputs{boxI}'; + + +% Get regression targets for boxes +if imdb.boxRegress + vI = net.getVarIndex('regressionScorePrt'); + regressStruct = net.vars(vI); + regressFactors = permute(regressStruct.value, [4 3 2 1]); +else + regressFactors = zeros(size(boxes,1), size(boxes,2) * imdb.numClasses); +end + +% Get top boxes for each category. Perform NMS. Thresholds defined at top of function +currMaxBoxes = min(maxNumBoxesPerImTest, size(boxes, 1)); +for cI = size(scores,2) : -1 : 1 + % Get top scores and boxes + [currScoresT, sI] = sort(scores(:,cI), 'descend'); + currScoresT = currScoresT(1:currMaxBoxes); + sI = sI(1:currMaxBoxes); + currBoxes = boxes(sI,:); + + % Do regression + regressFRange = (cI*4)-3:cI*4; + currRegressF = gather(regressFactors(sI,regressFRange)); + currBoxesReg = BoxRegresssGirshick(currBoxes, currRegressF); + + % Get scores (w boxes) above certain threshold + goodI = currScoresT > minDetectionScore; + currScoresT = currScoresT(goodI, :); + currBoxes = currBoxes(goodI, :); + currBoxesReg = currBoxesReg(goodI, :); + + % Perform NMS + [~, goodBoxesI] = BoxNMS(currBoxes, nmsTTest); + currBoxes = currBoxes(goodBoxesI, :); + currScores = currScoresT(goodBoxesI ,:); + + results.boxesPrt{cI} = gather(currBoxes); + results.scoresPrt{cI} = gather(currScores); + + if imdb.boxRegress + [~, goodBoxesI] = BoxNMS(currBoxesReg, nmsTTest); + currBoxesReg = currBoxesReg(goodBoxesI, :); + currScoresRegressed = currScoresT(goodBoxesI, :); + results.boxesRegressedPrt{cI} = gather(currBoxesReg); + results.scoresRegressedPrt{cI} = gather(currScoresRegressed); + end +end + + + +%% Objects + +% Get scores +vI = net.getVarIndex('scoresObj'); +scoresStruct = net.vars(vI); +scores = permute(scoresStruct.value, [4 3 2 1]); + +% Get boxes +inputNames = inputs(1:2:end); +[~, boxI] = ismember('boxesObj', inputNames); +boxI = boxI * 2; % Index of actual argument +boxes = inputs{boxI}'; + + +% Get regression targets for boxes +if imdb.boxRegress + vI = net.getVarIndex('regressionScoreObj'); + regressStruct = net.vars(vI); + regressFactors = permute(regressStruct.value, [4 3 2 1]); +else + regressFactors = zeros(size(boxes,1), size(boxes,2) * imdb.numClasses); +end + +% Get top boxes for each category. Perform NMS. Thresholds defined at top of function +currMaxBoxes = min(maxNumBoxesPerImTest, size(boxes, 1)); + +for cI = size(scores,2) : -1 : 1 + % Get top scores and boxes + [currScoresT, sI] = sort(scores(:,cI), 'descend'); + currScoresT = currScoresT(1:currMaxBoxes); + sI = sI(1:currMaxBoxes); + currBoxes = boxes(sI,:); + + % Do regression + regressFRange = (cI*4)-3:cI*4; + currRegressF = gather(regressFactors(sI,regressFRange)); + currBoxesReg = BoxRegresssGirshick(currBoxes, currRegressF); + + % Get scores (w boxes) above certain threshold + goodI = currScoresT > minDetectionScore; + currScoresT = currScoresT(goodI, :); + currBoxes = currBoxes(goodI, :); + currBoxesReg = currBoxesReg(goodI, :); + + % Perform NMS + [~, goodBoxesI] = BoxNMS(currBoxes, nmsTTest); + currBoxes = currBoxes(goodBoxesI, :); + currScores = currScoresT(goodBoxesI ,:); + + results.boxesObj{cI} = gather(currBoxes); + results.scoresObj{cI} = gather(currScores); + + if imdb.boxRegress + [~, goodBoxesI] = BoxNMS(currBoxesReg, nmsTTest); + currBoxesReg = currBoxesReg(goodBoxesI, :); + currScoresRegressed = currScoresT(goodBoxesI, :); + results.boxesRegressedObj{cI} = gather(currBoxesReg); + results.scoresRegressedObj{cI} = gather(currScoresRegressed); + end +end \ No newline at end of file diff --git a/matconvnet-calvin/matlab/@CalvinNN/CalvinNN.m b/matconvnet-calvin/matlab/@CalvinNN/CalvinNN.m index cb258f8..2930b90 100644 --- a/matconvnet-calvin/matlab/@CalvinNN/CalvinNN.m +++ b/matconvnet-calvin/matlab/@CalvinNN/CalvinNN.m @@ -42,6 +42,7 @@ % Declarations for methods that are in separate files convertNetwork(obj, net); convertNetworkToFastRcnn(obj, varargin); + convertNetworkToPrtObjFastRcnn(obj, varargin); init(obj, varargin); plotStats(obj, epochs, stats, plotAccuracy); saveState(obj, fileName); diff --git a/matconvnet-calvin/matlab/@CalvinNN/convertNetworkToPrtObjFastRcnn.m b/matconvnet-calvin/matlab/@CalvinNN/convertNetworkToPrtObjFastRcnn.m new file mode 100644 index 0000000..907786b --- /dev/null +++ b/matconvnet-calvin/matlab/@CalvinNN/convertNetworkToPrtObjFastRcnn.m @@ -0,0 +1,221 @@ +function convertNetworkToPrtObjFastRcnn(obj, varargin) +% +% Modify network for Fast R-CNN's ROI pooling. +% +% Copyright by Holger Caesar, 2015 +% Updated by Jasper Uijlings: +% - Extra flexibility and possible bounding box regression +% - Added instanceWeights to loss layer + +% Initial settings +p = inputParser; +addParameter(p, 'lastConvPoolName', 'pool5'); +addParameter(p, 'firstFCName', 'fc6'); +addParameter(p, 'secondFCName', 'fc7'); +addParameter(p, 'finalFCName', 'fc8'); +parse(p, varargin{:}); + +lastConvPoolName = p.Results.lastConvPoolName; +firstFCName = p.Results.firstFCName; +secondFCName = p.Results.secondFCName; +finalFCName = p.Results.finalFCName; + +% Make these parameters +numObjClasses = 21; +numPrtClasses = 106; + +% Rename input +if ~isnan(obj.net.getVarIndex('x0')) + % Input variable x0 is renamed to input + obj.net.renameVar('x0', 'input'); +else + % Input variable already has the correct name + assert(~isnan(obj.net.getVarIndex('input'))); +end + + +% Remove unused layers from pre-trained network +removeLayer(obj.net,'prob'); +removeLayer(obj.net,finalFCName); + +% Get number of last variable +numLastVar = str2double(obj.net.vars(end).name(2:end)); + +%%% Replace pooling layer of last convolution layer with roiPooling for +%%% objects +lastConvPoolIdx = obj.net.getLayerIndex(lastConvPoolName); +assert(~isnan(lastConvPoolIdx)); +roiPoolName = ['roi', lastConvPoolName 'Obj']; +firstFCIdx = obj.net.layers(lastConvPoolIdx).outputIndexes; +assert(length(firstFCIdx) == 1); +roiPoolSize = obj.net.layers(firstFCIdx).block.size(1:2); +roiPoolBlock = dagnn.RoiPooling('poolSize', roiPoolSize); +replaceLayer(obj.net, lastConvPoolName, roiPoolName, roiPoolBlock, {'oriImSize', 'boxesObj'}, {'roiPoolMaskObj'}); + +renameLayer(obj.net, firstFCName, [firstFCName 'Obj']); +% Leave original names of params, matconvnet-calvin uses matconvnet beta2, +% which does not have renameParam function +% renameParam(obj.net, [firstFCName 'f'], [firstFCName 'Objf']); +% renameParam(obj.net, [firstFCName 'b'], [firstFCName 'Objb']); + +renameLayer(obj.net, ['relu' firstFCName(end)], ['relu' firstFCName(end) 'Obj']); +% renameLayer(obj.net, ['dropout' firstFCName(end)], ['dropout' firstFCName(end) 'Obj']); + +insertLayer(obj.net, ['relu' firstFCName(end) 'Obj'], secondFCName, 'dropout6Obj', dagnn.DropOut()); +% Increment last var counter as insertLayer increments it +numLastVar = numLastVar + 1; + +renameLayer(obj.net, secondFCName, [secondFCName 'Obj']); +% renameParam(obj.net, [secondFCName 'f'], [secondFCName 'Objf']); +% renameParam(obj.net, [secondFCName 'b'], [secondFCName 'Objb']); + +renameLayer(obj.net, ['relu' secondFCName(end)], ['relu' secondFCName(end) 'Obj']); +addLayer(obj.net, ['dropout' secondFCName(end) 'Obj'],dagnn.DropOut(),... + obj.net.layers(obj.net.getLayerIndex(['relu' secondFCName(end) 'Obj'])).outputs{1},... + ['x' num2str(numLastVar + 1)]); +numLastVar = numLastVar + 1; + + +addLayer(obj.net,[finalFCName 'Obj'], dagnn.Conv('size',[1 1 4096 numObjClasses]),... + obj.net.layers(obj.net.getLayerIndex(['dropout' secondFCName(end) 'Obj'])).outputs,... + ['x' num2str(numLastVar +1)], {[finalFCName 'Objf'],[finalFCName 'Objb']}); +numLastVar = numLastVar + 1; + +% Initialize parameters +finalFCIdx = obj.net.getLayerIndex([finalFCName 'Obj']); +newParams = obj.net.layers(finalFCIdx).block.initParams(); +% Initialize parameters +obj.net.params(obj.net.layers(finalFCIdx).paramIndexes(1)).value = newParams{1} / std(newParams{1}(:)) * 0.001; % Girshick initialization with std of 0.001 +obj.net.params(obj.net.layers(finalFCIdx).paramIndexes(2)).value = newParams{2}; + + +%%% Now add part branch + +% First input of RoiPooling is the same +lastConvPoolIdx = obj.net.getLayerIndex(['roi', lastConvPoolName 'Obj']); + +roiPoolName = ['roi', lastConvPoolName 'Prt']; +roiPoolBlock = dagnn.RoiPooling('poolSize', roiPoolSize); +addLayer(obj.net, roiPoolName, roiPoolBlock,... + {obj.net.layers(lastConvPoolIdx).inputs{1},'oriImSize', 'boxesPrt'},... + {['x' num2str(numLastVar +1)], 'roiPoolMaskPrt'}); +numLastVar = numLastVar + 1; + +addLayer(obj.net, [firstFCName 'Prt'],... + dagnn.Conv('size',obj.net.layers(obj.net.getLayerIndex([firstFCName 'Obj'])).block.size),... + ['x' num2str(numLastVar)], ['x' num2str(numLastVar+1)], {[firstFCName 'Prtf'],[firstFCName 'Prtb']}); +numLastVar = numLastVar + 1; + +% Init params with the pre-trained network params (now in obj branch) +idxParamsObj = obj.net.layers(obj.net.getLayerIndex([firstFCName 'Obj'])).paramIndexes; +idxParamsPrt = obj.net.layers(obj.net.getLayerIndex([firstFCName 'Prt'])).paramIndexes; +obj.net.params(idxParamsPrt(1)).value = obj.net.params(idxParamsObj(1)).value; +obj.net.params(idxParamsPrt(2)).value = obj.net.params(idxParamsObj(2)).value; + +addLayer(obj.net, ['relu' firstFCName(end) 'Prt'], dagnn.ReLU,... + ['x' num2str(numLastVar)], ['x' num2str(numLastVar+1)],{}); +numLastVar = numLastVar + 1; + +addLayer(obj.net, ['dropout' firstFCName(end) 'Prt'],dagnn.DropOut(),... + ['x' num2str(numLastVar)],['x' num2str(numLastVar + 1)]); +numLastVar = numLastVar + 1; + + +addLayer(obj.net, [secondFCName 'Prt'],... + dagnn.Conv('size',obj.net.layers(obj.net.getLayerIndex([secondFCName 'Obj'])).block.size),... + ['x' num2str(numLastVar)], ['x' num2str(numLastVar+1)], {[secondFCName 'Prtf'],[secondFCName 'Prtb']}); +numLastVar = numLastVar + 1; + +% Init params with the pre-trained network params (now in obj branch) +idxParamsObj = obj.net.layers(obj.net.getLayerIndex([secondFCName 'Obj'])).paramIndexes; +idxParamsPrt = obj.net.layers(obj.net.getLayerIndex([secondFCName 'Prt'])).paramIndexes; +obj.net.params(idxParamsPrt(1)).value = obj.net.params(idxParamsObj(1)).value; +obj.net.params(idxParamsPrt(2)).value = obj.net.params(idxParamsObj(2)).value; + + +addLayer(obj.net, ['relu' secondFCName(end) 'Prt'], dagnn.ReLU,... + ['x' num2str(numLastVar)], ['x' num2str(numLastVar+1)],{}); +numLastVar = numLastVar + 1; + +addLayer(obj.net, ['dropout' secondFCName(end) 'Prt'],dagnn.DropOut(),... + ['x' num2str(numLastVar)],['x' num2str(numLastVar + 1)]); +numLastVar = numLastVar + 1; + + +addLayer(obj.net,[finalFCName 'Prt'], dagnn.Conv('size',[1 1 4096 numPrtClasses]),... + ['x' num2str(numLastVar)],['x' num2str(numLastVar +1)],... + {[finalFCName 'Prtf'],[finalFCName 'Prtb']}); + +% Initialize parameters +finalFCIdx = obj.net.getLayerIndex([finalFCName 'Prt']); +newParams = obj.net.layers(finalFCIdx).block.initParams(); + +% Initialize parameters +obj.net.params(obj.net.layers(finalFCIdx).paramIndexes(1)).value = newParams{1} / std(newParams{1}(:)) * 0.001; % Girshick initialization with std of 0.001 +obj.net.params(obj.net.layers(finalFCIdx).paramIndexes(2)).value = newParams{2}; + + +%%% Add losses +softmaxlossBlock = dagnn.LossWeighted('loss', 'softmaxlog'); +addLayer(obj.net, 'softmaxlossObj', softmaxlossBlock,... + {obj.net.layers(obj.net.getLayerIndex([finalFCName 'Obj'])).outputs{1}, 'labelObj','instanceWeightsObj'},{'objectiveObj'}); + + +softmaxlossBlock = dagnn.LossWeighted('loss', 'softmaxlog'); +addLayer(obj.net, 'softmaxlossPrt', softmaxlossBlock,... + {obj.net.layers(obj.net.getLayerIndex([finalFCName 'Prt'])).outputs{1}, 'labelPrt','instanceWeightsPrt'},{'objectivePrt'}); + + + +%%% Add bounding box regression layer +if obj.nnOpts.bboxRegress + finalFCLayerIdx = obj.net.getLayerIndex([finalFCName 'Obj']); + inputVars = obj.net.layers(finalFCLayerIdx).inputs; + finalFCLayerSize = size(obj.net.params(obj.net.layers(finalFCLayerIdx).paramIndexes(1)).value); + regressLayerSize = finalFCLayerSize .* [1 1 1 4]; % Four times bigger than classification layer + regressName = [finalFCName 'regressObj']; + obj.net.addLayer(regressName, dagnn.Conv('size', regressLayerSize), inputVars, {'regressionScoreObj'}, {'regressObjf', 'regressObjb'}); + regressIdx = obj.net.getLayerIndex(regressName); + newParams = obj.net.layers(regressIdx).block.initParams(); + obj.net.params(obj.net.layers(regressIdx).paramIndexes(1)).value = newParams{1} / std(newParams{1}(:)) * 0.001; % Girshick initialization with std of 0.001 + obj.net.params(obj.net.layers(regressIdx).paramIndexes(2)).value = newParams{2}; + + obj.net.addLayer('regressLossObj', dagnn.LossRegress('loss', 'Smooth', 'smoothMaxDiff', 1), ... + {'regressionScoreObj', 'regressionTargetsObj', 'instanceWeightsObj'}, 'regressObjectiveObj'); + + + finalFCLayerIdx = obj.net.getLayerIndex([finalFCName 'Prt']); + inputVars = obj.net.layers(finalFCLayerIdx).inputs; + finalFCLayerSize = size(obj.net.params(obj.net.layers(finalFCLayerIdx).paramIndexes(1)).value); + regressLayerSize = finalFCLayerSize .* [1 1 1 4]; % Four times bigger than classification layer + regressName = [finalFCName 'regressPrt']; + obj.net.addLayer(regressName, dagnn.Conv('size', regressLayerSize), inputVars, {'regressionScorePrt'}, {'regressPrtf', 'regressPrtb'}); + regressIdx = obj.net.getLayerIndex(regressName); + newParams = obj.net.layers(regressIdx).block.initParams(); + obj.net.params(obj.net.layers(regressIdx).paramIndexes(1)).value = newParams{1} / std(newParams{1}(:)) * 0.001; % Girshick initialization with std of 0.001 + obj.net.params(obj.net.layers(regressIdx).paramIndexes(2)).value = newParams{2}; + + obj.net.addLayer('regressLossPrt', dagnn.LossRegress('loss', 'Smooth', 'smoothMaxDiff', 1), ... + {'regressionScorePrt', 'regressionTargetsPrt', 'instanceWeightsPrt'}, 'regressObjectivePrt'); + +end + +%%% Set correct learning rates and biases (Girshick style) +if obj.nnOpts.fastRcnnParams + % Biases have learning rate of 2 and no weight decay + for lI = 1 : length(obj.net.layers) + if isa(obj.net.layers(lI).block, 'dagnn.Conv') % ADD SOMETHING HERE FOR DIFFERENT CONVs + biasI = obj.net.layers(lI).paramIndexes(2); + obj.net.params(biasI).learningRate = 2; + obj.net.params(biasI).weightDecay = 0; + end + end + + conv1I = obj.net.getLayerIndex('conv1'); % AlexNet-style networks + if isnan(conv1I) + conv1I = obj.net.getLayerIndex('conv1_1'); % VGG-16 style networks + end + obj.net.params(obj.net.layers(conv1I).paramIndexes(1)).learningRate = 0; + obj.net.params(obj.net.layers(conv1I).paramIndexes(2)).learningRate = 0; + +end diff --git a/matconvnet-calvin/matlab/@CalvinNN/testPrtObj.m b/matconvnet-calvin/matlab/@CalvinNN/testPrtObj.m new file mode 100644 index 0000000..b7889be --- /dev/null +++ b/matconvnet-calvin/matlab/@CalvinNN/testPrtObj.m @@ -0,0 +1,58 @@ +function[stats] = testPrtObj(obj) +% [stats] = test(obj) +% +% Test function +% - Does a single processing of an epoch for testing +% - Uses the nnOpts.testFn function for the testing (inside process_epoch) +% - Automatically changes softmaxloss to softmax, removes hinge loss. Other losses are not yet supported +% +% Copyright by Jasper Uijlings, 2015 +% Modified by Holger Caesar, 2016 +% Abel + +% Check that we only use one GPU +numGpus = numel(obj.nnOpts.gpus); +assert(numGpus <= 1); +% Replace softmaxloss layer with softmax layer +softMaxLossIdx = obj.net.getLayerIndex('softmaxlossObj'); +if ~isnan(softMaxLossIdx) + softmaxlossInput = obj.net.layers(softMaxLossIdx).inputs{1}; + obj.net.removeLayer('softmaxlossObj'); + obj.net.addLayer('softmaxObj', dagnn.SoftMax(), softmaxlossInput, 'scoresObj', {}); + softmaxIdx = obj.net.layers(obj.net.getLayerIndex('softmaxObj')).outputIndexes; + assert(numel(softmaxIdx) == 1); +end + +softMaxLossIdx = obj.net.getLayerIndex('softmaxlossPrt'); +if ~isnan(softMaxLossIdx) + softmaxlossInput = obj.net.layers(softMaxLossIdx).inputs{1}; + obj.net.removeLayer('softmaxlossPrt'); + obj.net.addLayer('softmaxPrt', dagnn.SoftMax(), softmaxlossInput, 'scoresPrt', {}); + softmaxIdx = obj.net.layers(obj.net.getLayerIndex('softmaxPrt')).outputIndexes; + assert(numel(softmaxIdx) == 1); +end + +% Remove regression loss if it's there +regressLossIdx = obj.net.getLayerIndex('regressLossPrt'); +if ~isnan(regressLossIdx) + obj.net.removeLayer('regressLossPrt'); +end + +% Remove regression loss if it's there +regressLossIdx = obj.net.getLayerIndex('regressLossObj'); +if ~isnan(regressLossIdx) + obj.net.removeLayer('regressLossObj'); +end + +% Set datasetMode in imdb +datasetMode = 'test'; +obj.net.mode = datasetMode; % Disable dropout +obj.imdb.setDatasetMode(datasetMode); +state.epoch = 1; +state.allBatchInds = obj.imdb.getAllBatchInds(); + +% Process the epoch +obj.stats.(datasetMode) = obj.processEpoch(obj.net, state); + +% The stats are the desired results +stats = obj.stats.(datasetMode); \ No newline at end of file diff --git a/matconvnet-calvin/matlab/setup/downloadPASCALParts.m b/matconvnet-calvin/matlab/setup/downloadPASCALParts.m new file mode 100644 index 0000000..250a9a0 --- /dev/null +++ b/matconvnet-calvin/matlab/setup/downloadPASCALParts.m @@ -0,0 +1,39 @@ +function downloadPASCALParts() +% downloadPASCALParts() +% +% Downloads and unpacks the PASCAL-Part dataset. +% + +% Settings +zipNameData = 'trainval.tar.gz'; +urlData = 'http://www.stat.ucla.edu/~xianjie.chen/pascal_part_dataset/trainval.tar.gz'; +rootFolder = calvin_root(); +datasetFolder = fullfile(rootFolder, 'data', 'Datasets', 'VOC2010'); +downloadFolder = fullfile(rootFolder, 'data', 'Downloads'); +zipFileData = fullfile(downloadFolder, zipNameData); +partsFolder = fullfile(datasetFolder, 'VOCdevkit','VOC2010'); + +% Download dataset +if ~exist(partsFolder, 'dir') + % Create folder + if ~exist(datasetFolder, 'dir') + mkdir(datasetFolder); + end + if ~exist(downloadFolder, 'dir') + mkdir(downloadFolder); + end + + % Download tar file + if ~exist(zipFileData, 'file') + fprintf('Downloading PASCAL-Part dataset...\n'); + urlwrite(urlData, zipFileData); + end + + % Untar it + fprintf('Unpacking PASCAL-Part annotations...\n'); + untar(zipFileData, partsFolder); + +end + +% Add to path +addpath(devkitFolder); \ No newline at end of file diff --git a/matconvnet-calvin/matlab/setup/setupParts.m b/matconvnet-calvin/matlab/setup/setupParts.m new file mode 100644 index 0000000..30c44eb --- /dev/null +++ b/matconvnet-calvin/matlab/setup/setupParts.m @@ -0,0 +1,38 @@ +function setupParts(varargin) +% setupParts(varargin) +% +% This includes extracting Selective Search proposals and ground-truth for +% each image in the PASCAL VOC 20xx dataset. Note that this takes about +% 4s/im or about 11h for VOC 2010. +% +% Copyright by Holger Caesar, 2016 + +%%% Settings +% Dataset +vocYear = 2010; +trainName = 'train'; +testName = 'val'; +vocName = sprintf('VOC%d', vocYear); +global glDatasetFolder; +datasetDir = [fullfile(glDatasetFolder, vocName), '/']; +setupDataOptsPrts(vocYear, testName, datasetDir); +global DATAopts; % Database specific paths +assert(~isempty(DATAopts), 'Error: Dataset not initialized properly!'); + +%% Create IMDBs +imSet = 'train'; +imdb = createIMDB(imSet); +save(sprintf(DATAopts.imdb, imSet), 'imdb'); + + +imSet = 'val'; +imdb = createIMDB(imSet); +save(sprintf(DATAopts.imdb, imSet), 'imdb'); + + +%% GStructs + +saveGStructs('train'); + +saveGStructs('val'); +