Skip to content
This repository
Browse code

[bugfix] rbrennan sidefragment - subsequence fixes to date.

  • Loading branch information...
commit d182024a2c73d1e5029534670656dbde3e52f3f5 1 parent ed47f0c
authored September 07, 2012
54  droid-core/src/main/java/uk/gov/nationalarchives/droid/core/signature/droid6/SideFragment.java
@@ -116,6 +116,7 @@
116 116
     private int myPosition;
117 117
     private int myMinOffset;
118 118
     private int myMaxOffset;
  119
+    private int originalMinOffset;
119 120
     private SequenceMatcher matcher;
120 121
     private boolean isInvalidFragment;
121 122
   
@@ -137,13 +138,14 @@ public final void setPosition(final int thePosition) {
137 138
      * A minimum offset is the amount of bytes to skip before
138 139
      * looking for this fragment.
139 140
      *   
140  
-     * @param theMinOffset The minimum offset to begin looking for this fragment.
  141
+     * @param offset The minimum offset to begin looking for this fragment.
141 142
      */
142  
-    public final void setMinOffset(final int theMinOffset) {
143  
-        this.myMinOffset = theMinOffset;
144  
-        // ensure the maximum is never less than then minimum.
  143
+    public final void setMinOffset(final int offset) {
  144
+        this.myMinOffset = offset;
  145
+        this.originalMinOffset = offset;
  146
+        // ensure the maximum is never less than the minimum.
145 147
         if (this.myMaxOffset < this.myMinOffset) {
146  
-            this.myMaxOffset = theMinOffset;
  148
+            this.myMaxOffset = offset;
147 149
         }
148 150
     }
149 151
 
@@ -153,17 +155,41 @@ public final void setMinOffset(final int theMinOffset) {
153 155
      * than the minimum offset, then a range of bytes will be
154 156
      * searched for this fragment.
155 157
      * 
156  
-     * @param theMaxOffset The maximum offset to begin lookiing for this fragment.
  158
+     * @param offset The maximum offset to begin looking for this fragment.
157 159
      */
158  
-    public final void setMaxOffset(final int theMaxOffset) {
159  
-        this.myMaxOffset = theMaxOffset;
  160
+    public final void setMaxOffset(final int offset) {
  161
+        this.myMaxOffset = offset;
  162
+        // ensure the minimum is never greater than the maximum.
  163
+        if (this.originalMinOffset > this.myMaxOffset) {
  164
+            this.originalMinOffset = offset;
  165
+        }
160 166
         // ensure the minimum is never greater than the maximum.
161 167
         if (this.myMinOffset > this.myMaxOffset) {
162  
-            this.myMinOffset = theMaxOffset;
  168
+            this.myMinOffset = offset;
163 169
         }
164 170
     }
165 171
 
166 172
     /**
  173
+     * The next offset is the new amount of bytes to skip before looking for
  174
+     * this fragment after a previous match which failed on a subsequent fragment.
  175
+     *   
  176
+     * @param offset The next minimum offset to begin looking for this fragment.
  177
+     */
  178
+    public final void changeMinOffset(final int offset) {
  179
+        if (offset >= this.originalMinOffset && offset <= this.myMaxOffset) {
  180
+            this.myMinOffset = offset;
  181
+        }
  182
+    }
  183
+
  184
+    /**
  185
+     * Reset the minimum offset to the original amount of bytes to skip 
  186
+     * before looking for this fragment.
  187
+     */
  188
+    public final void resetMinOffset() {
  189
+        this.myMinOffset = this.originalMinOffset;
  190
+    }
  191
+
  192
+    /**
167 193
      * 
168 194
      * @param expression The regular expression defining the fragment.
169 195
      */
@@ -221,6 +247,16 @@ public final int getMinOffset() {
221 247
     }
222 248
 
223 249
     /**
  250
+     * A next offset is the new amount of bytes to skip before looking for 
  251
+     * this fragment after a previous match failed on a subsequent fragment.
  252
+     * 
  253
+     * @return The next minimum offset to begin looking for this fragment.
  254
+     */
  255
+    public final int getOriginalMinOffset() {
  256
+        return originalMinOffset;
  257
+    }
  258
+
  259
+    /**
224 260
      * A maximum offset is the largest amount of bytes to look
225 261
      * in for this fragment.  If the maximum offset is greater
226 262
      * than the minimum offset, then a range of bytes will be
437  droid-core/src/main/java/uk/gov/nationalarchives/droid/core/signature/droid6/SubSequence.java
@@ -158,6 +158,8 @@
158 158
     private int numLeftFragmentPositions;
159 159
     private int numRightFragmentPositions;
160 160
     private boolean fullFileScan;
  161
+    private boolean hasLeftRange;
  162
+    private boolean hasRightRange;
161 163
     private List<LeftFragment> leftFragments = new ArrayList<LeftFragment>();
162 164
     private List<RightFragment> rightFragments = new ArrayList<RightFragment>();
163 165
     private SequenceMatcher matcher;
@@ -167,6 +169,7 @@
167 169
     private final List<List<SideFragment>> orderedRightFragments = new ArrayList<List<SideFragment>>();
168 170
     private boolean backwardsSearch;
169 171
     private boolean isInvalidSubSequence;
  172
+    private boolean nudged;
170 173
     
171 174
     private LeftFragment getRawLeftFragment(final int theIndex) {
172 175
         return leftFragments.get(theIndex);
@@ -430,6 +433,7 @@ private void processSequenceFragments() {
430 433
                     newFrag.setMaxOffset(frag.getMaxOffset());
431 434
                     expression.append(']');
432 435
                     newFrag.setFragment(expression.toString());
  436
+//                    newFrag.setText(expression.toString());
433 437
                     List<SideFragment> newList = new ArrayList<SideFragment>();
434 438
                     newList.add(newFrag);
435 439
                     orderedLeftFragments.set(fragPos, newList);
@@ -440,6 +444,7 @@ private void processSequenceFragments() {
440 444
         // Calculate minimum and maximum size of left fragments:
441 445
         minLeftFragmentLength = 0;
442 446
         maxLeftFragmentLength = 0;
  447
+        hasLeftRange = false;
443 448
         for (int position = 0; position < orderedLeftFragments.size(); position++) {
444 449
             final List<SideFragment> fragmentList = orderedLeftFragments.get(position);
445 450
             int minFragSize = Integer.MAX_VALUE;
@@ -455,11 +460,30 @@ private void processSequenceFragments() {
455 460
                     maxFragSize = fragMaxSpace;
456 461
                 }
457 462
             }
  463
+            if (minFragSize < maxFragSize) {
  464
+                hasLeftRange = true;
  465
+            }
458 466
             minLeftFragmentLength += minFragSize;
459 467
             maxLeftFragmentLength += maxFragSize;
460 468
         }
461 469
 
462 470
         this.numLeftFragmentPositions = orderedLeftFragments.size();
  471
+        
  472
+/* testing...        
  473
+        for (int i = 0; i < numLeftFragmentPositions; i++) {
  474
+            List<SideFragment> sideFrags = orderedLeftFragments.get(i);
  475
+            for (int j = 0; j < sideFrags.size(); j++) {
  476
+                LeftFragment leftFrag = (LeftFragment) sideFrags.get(j);
  477
+                System.out.println("Fragment " + i + "," + j);
  478
+                System.out.println("...elementName: " + leftFrag.getElementName());
  479
+                System.out.println("...text:        " + leftFrag.getText());
  480
+                System.out.println("...minOffset:   " + leftFrag.getMinOffset());
  481
+                System.out.println("...maxOffset:   " + leftFrag.getMaxOffset());
  482
+                System.out.println("...numBytes:    " + leftFrag.getNumBytes());
  483
+                System.out.println("...position:    " + leftFrag.getPosition());
  484
+            }
  485
+        }
  486
+*/        
463 487
 
464 488
         //clear out unnecessary info
465 489
         this.leftFragments = null;
@@ -522,6 +546,7 @@ private void processSequenceFragments() {
522 546
         // Calculate minimum size of right fragments:
523 547
         minRightFragmentLength = 0;
524 548
         maxRightFragmentLength = 0;
  549
+        hasRightRange = false;
525 550
         for (int position = 0; position < orderedRightFragments.size(); position++) {
526 551
             final List<SideFragment> fragmentList = orderedRightFragments.get(position);
527 552
             int minFragSize = Integer.MAX_VALUE;
@@ -537,6 +562,9 @@ private void processSequenceFragments() {
537 562
                     maxFragSize = fragMaxSpace;
538 563
                 }
539 564
             }
  565
+            if (minFragSize < maxFragSize) {
  566
+                hasRightRange = true;
  567
+            }
540 568
             minRightFragmentLength += minFragSize;
541 569
             maxRightFragmentLength += maxFragSize;
542 570
         }
@@ -575,7 +603,20 @@ private boolean checkFragmentList(List<List<SideFragment>> orderedFragmentList)
575 603
         return false;
576 604
     }
577 605
     
578  
-
  606
+    /**
  607
+     * @return Whether left fragments include any with minOffset < maxOffset.
  608
+     */
  609
+    public boolean hasLeftRange() {
  610
+        return hasLeftRange;
  611
+    }
  612
+    
  613
+    /**
  614
+     * @return Whether right fragments include any with minOffset < maxOffset.
  615
+     */
  616
+    public boolean hasRightRange() {
  617
+        return hasRightRange;
  618
+    }
  619
+    
579 620
     /** Uses the Boyer-Moore-Horspool search algorithm to find a sequence within a window
580 621
      * on a file.
581 622
      *
@@ -599,6 +640,7 @@ private boolean checkFragmentList(List<List<SideFragment>> orderedFragmentList)
599 640
     public final boolean findSequenceFromPosition(final long position, 
600 641
             final ByteReader targetFile, final long maxBytesToScan,
601 642
             final boolean bofSubsequence, final boolean eofSubsequence) {
  643
+//        System.out.println("findSequenceFromPosition: " + position);
602 644
         boolean entireSequenceFound = false;
603 645
         try {
604 646
             // Local variables to speed up commonly used arrays and decisions:
@@ -664,14 +706,14 @@ public final boolean findSequenceFromPosition(final long position,
664 706
                         if (hasRightFragments) { 
665 707
                             final long[] rightFragmentPositions = 
666 708
                                 bytePosForRightFragments(reader, matchPosition + matchLength, 
667  
-                                    targetFile.getFileMarker(), 1, 0);
  709
+                                    targetFile.getFileMarker());
668 710
                             matchFound = rightFragmentPositions.length > 0;
669 711
                         }
670 712
                         if (matchFound) {
671 713
                             // Check that any left fragments, before our sequence, match.
672 714
                             if (hasLeftFragments) { 
673 715
                                 final long[] leftFragmentPositions =
674  
-                                    bytePosForLeftFragments(reader, 0, matchPosition - 1, -1, 0);
  716
+                                    bytePosForLeftFragments(reader, 0, matchPosition - 1);
675 717
                                 matchFound = leftFragmentPositions.length > 0;
676 718
                                 matchPosition = matchFound ? leftFragmentPositions[0] : matchPosition;
677 719
                             }
@@ -722,7 +764,7 @@ public final boolean findSequenceFromPosition(final long position,
722 764
                         if (hasLeftFragments) { // Check that any left fragments, behind our sequence match:
723 765
                             final long[] leftFragmentPositions = 
724 766
                                 bytePosForLeftFragments(reader, targetFile.getFileMarker(),
725  
-                                    matchPosition - matchLength, -1, 0);
  767
+                                    matchPosition - matchLength);
726 768
                             matchFound = leftFragmentPositions.length > 0;
727 769
                             
728 770
 //                            // check BOF max seq offset (bugfix)
@@ -735,7 +777,7 @@ public final boolean findSequenceFromPosition(final long position,
735 777
                         if (matchFound) {
736 778
                             if (hasRightFragments) { // Check that any right fragments after our sequence match:
737 779
                                 final long[] rightFragmentPositions = 
738  
-                                    bytePosForRightFragments(reader, matchPosition + 1, lastBytePositionInFile, 1, 0);
  780
+                                    bytePosForRightFragments(reader, matchPosition + 1, lastBytePositionInFile);
739 781
                                 matchFound = rightFragmentPositions.length > 0;
740 782
                             
741 783
                                 // check EOF max seq offset (bugfix)
@@ -775,44 +817,71 @@ public final boolean findSequenceFromPosition(final long position,
775 817
      * @param targetFile      the binary file to be identified
776 818
      * @param leftBytePos     left-most byte position of allowed search window on file
777 819
      * @param rightBytePos    right-most byte position of allowed search window on file
778  
-     * @param searchDirection 1 for a left to right search, -1 for right to left
779  
-     * @param offsetRange     range of possible start positions in the direction of searchDirection
780 820
      * @return A long array containing all possible matching positions for the left fragments.
781 821
      */
782 822
     //CHECKSTYLE:OFF - way, way, way too complex.
783  
-    private long[] bytePosForLeftFragments(final net.domesdaybook.reader.ByteReader bytes, final long leftBytePos, final long rightBytePos,
784  
-            final int searchDirection, final int offsetRange) {
  823
+    private long[] bytePosForLeftFragments(final net.domesdaybook.reader.ByteReader bytes,
  824
+            final long leftBytePos, final long rightBytePos) {
785 825
     //CHECKSTYLE:ON
  826
+//        System.out.println("bytePosForLeftFragments from " + leftBytePos + " to " + rightBytePos);
786 827
         final boolean leftFrag = true;
  828
+        final int searchBackwards = -1;       //right to left search
787 829
         
788  
-        // set up loop start and end depending on search order:
789  
-        final int numFragPos = this.numLeftFragmentPositions; // getNumFragmentPositions(leftFrag);
790  
-        long startPos;
791  
-        int posLoopStart;
792  
-        if (searchDirection == 1) {
793  
-            startPos = leftBytePos;
794  
-            posLoopStart = numFragPos;
795  
-        } else {
796  
-            startPos = rightBytePos;
797  
-            posLoopStart = 1;
798  
-        }
  830
+        // set up loop start and end
  831
+        int fromFrag = 1;
  832
+        int toFrag = this.numLeftFragmentPositions;
  833
+        long startBytePos = rightBytePos;
799 834
 
800 835
         // Calculate the total possible number of options in all the fragments:
801 836
         //TODO: can most of this calculation be done up front?
802  
-        int totalNumOptions = offsetRange + 1;
803  
-        for (int iFragPos = 1; iFragPos <= numFragPos; iFragPos++) {
804  
-            totalNumOptions = totalNumOptions * this.getNumAlternativeFragments(leftFrag, iFragPos);
  837
+        int numFragOptions = 1;
  838
+        for (int iFrag = 1; iFrag <= fromFrag; iFrag++) {
  839
+            numFragOptions = numFragOptions * this.getNumAlternativeFragments(leftFrag, iFrag);
805 840
         }
806  
-        
  841
+//        System.out.println("...numFragOptions: " + numFragOptions);
807 842
         //now set up the array so that it can potentially hold all possibilities
808  
-        long[] markerPos = new long[totalNumOptions];
809  
-        for (int iOffset = 0; iOffset <= offsetRange; iOffset++) {
810  
-            markerPos[iOffset] = startPos + iOffset * searchDirection;
  843
+        long[] markerPos = new long[numFragOptions];
  844
+        markerPos[0] = startBytePos;
  845
+        int numOptions = 1;
  846
+        
  847
+        long[] fragBytePos = new long[toFrag];
  848
+//        System.out.println("fragBytePos.length: " + fragBytePos.length);
  849
+        boolean seqNotFound = true;
  850
+        nudged = false;
  851
+        System.out.println("nudgeda: " + nudged);
  852
+        while (fromFrag > 0) {
  853
+            seqNotFound = search(bytes, fromFrag, toFrag, searchBackwards,
  854
+                    leftBytePos, startBytePos, markerPos, fragBytePos);
  855
+            
  856
+            if (seqNotFound && this.hasLeftRange) {
  857
+                fromFrag = nudgeFragments(fromFrag, toFrag, searchBackwards, startBytePos, fragBytePos);
  858
+                System.out.println("nudgeFragments returned: " + fromFrag);
  859
+                if (fromFrag > 0) {
  860
+                    nudged = true;
  861
+                    System.out.println("nudgedb: " + nudged);
  862
+                    if (fromFrag > 1) {
  863
+                        startBytePos = fragBytePos[fromFrag - 1];
  864
+                        markerPos[0] = startBytePos;
  865
+                    }
  866
+                }
  867
+            } else {
  868
+                fromFrag = 0;
  869
+            }
  870
+        }
  871
+       System.out.println("finished searchinga");
  872
+       System.out.println("nudgedc: " + nudged);
  873
+       if (nudged) {           // reset all minimum offsets
  874
+            System.out.println("resetting left minOffsets");
  875
+            for (int i = 0; i < orderedLeftFragments.size(); i++) {
  876
+                final List<SideFragment> frags = orderedLeftFragments.get(i);
  877
+                for (int j = 0; j < frags.size(); j++) {
  878
+                    final SideFragment frag = frags.get(j);
  879
+                    frag.resetMinOffset();
  880
+                }
  881
+            }
811 882
         }
812  
-        int numOptions = 1 + offsetRange;
813  
-
814 883
         // Search for the fragments:
815  
-        boolean seqNotFound = false;
  884
+/*        boolean seqNotFound = false;
816 885
         for (int iFragPos = posLoopStart; (!seqNotFound) && (iFragPos <= numFragPos) && (iFragPos >= 1);
817 886
             iFragPos -= searchDirection) {
818 887
             final List<SideFragment> fragmentsAtPosition = orderedLeftFragments.get(iFragPos - 1);
@@ -839,6 +908,7 @@ public final boolean findSequenceFromPosition(final long position,
839 908
                     }
840 909
                     if (tempFragEnd > -1L) { // a match has been found
841 910
                         tempEndPos[numEndPos] = tempFragEnd + searchDirection;
  911
+                        System.out.println("tempEndPos(" + numEndPos + "): " + tempEndPos[numEndPos]);
842 912
                         numEndPos += 1;
843 913
                     }
844 914
                 }
@@ -863,7 +933,7 @@ public final boolean findSequenceFromPosition(final long position,
863 933
                 }
864 934
             }
865 935
         }
866  
-
  936
+*/
867 937
         //prepare array to be returned
868 938
         if (seqNotFound) {
869 939
             // no possible positions found, return 0 length array
@@ -874,20 +944,16 @@ public final boolean findSequenceFromPosition(final long position,
874 944
 
875 945
         // convert values to negative temporarily so that reverse sort order 
876 946
         // can be obtained for a right to left search direction
877  
-        if (searchDirection < 0) {
878  
-            for (int iOption = 0; iOption < numOptions; iOption++) {
879  
-                markerPos[iOption] = -markerPos[iOption];
880  
-            }
  947
+        for (int iOption = 0; iOption < numOptions; iOption++) {
  948
+            markerPos[iOption] = -markerPos[iOption];
881 949
         }
882 950
 
883 951
         //sort the values in the array
884 952
         Arrays.sort(markerPos, 0, numOptions);
885 953
 
886 954
         //convert values back to positive now that a reverse sort order has been obtained
887  
-        if (searchDirection < 0) {
888  
-            for (int iOption = 0; iOption < numOptions; iOption++) {
889  
-                markerPos[iOption] = -markerPos[iOption];
890  
-            }
  955
+        for (int iOption = 0; iOption < numOptions; iOption++) {
  956
+            markerPos[iOption] = -markerPos[iOption];
891 957
         }
892 958
 
893 959
         //copy to a new array which has precisely the correct length
@@ -895,12 +961,14 @@ public final boolean findSequenceFromPosition(final long position,
895 961
 
896 962
         //correct the value
897 963
         for (int iOption = 0; iOption < numOptions; iOption++) {
898  
-            outArray[iOption] -= searchDirection;
  964
+            outArray[iOption]++;
899 965
         }
900  
-
  966
+/*        System.out.println("returning outArray...");
  967
+        for (int i = 0; i < outArray.length; i++)
  968
+            System.out.println("..." + outArray[i]); */
901 969
         return outArray;
902 970
     }
903  
-
  971
+    
904 972
     /**
905 973
      * Searches for the right fragments of this subsequence between the given byte
906 974
      * positions in the file.  Either returns the last byte taken up by the
@@ -909,38 +977,70 @@ public final boolean findSequenceFromPosition(final long position,
909 977
      * @param bytes           the binary file to be identified
910 978
      * @param leftBytePos     left-most byte position of allowed search window on file
911 979
      * @param rightBytePos    right-most byte position of allowed search window on file
912  
-     * @param searchDirection 1 for a left to right search, -1 for right to left
913  
-     * @param offsetRange     range of possible start positions in the direction of searchDirection
914 980
      * @return
915 981
      */
916 982
     //CHECKSTYLE:OFF - way, way, way too complex.
917  
-    private long[] bytePosForRightFragments(final net.domesdaybook.reader.ByteReader bytes, final long leftBytePos, final long rightBytePos,
918  
-            final int searchDirection, final int offsetRange) {
  983
+    private long[] bytePosForRightFragments(final net.domesdaybook.reader.ByteReader bytes,
  984
+            final long leftBytePos, final long rightBytePos) {
919 985
     //CHECKSTYLE:ON
920  
-        final boolean leftFrag = false;
921  
-        long startPos = leftBytePos;
922  
-        int posLoopStart = 1;
923  
-        final int numFragPos = numRightFragmentPositions; 
924  
-        if (searchDirection == -1) {
925  
-            startPos = rightBytePos;
926  
-            posLoopStart = numFragPos;
927  
-        }
  986
+        System.out.println("bytePosForRightFragments from " + leftBytePos + " to " + rightBytePos);
  987
+        final boolean rightFrag = false;
  988
+        final int searchForwards = 1;       //left to right search
  989
+        
  990
+        long startBytePos = leftBytePos;
  991
+        int fromFrag = 1;
  992
+        final int toFrag = numRightFragmentPositions; 
928 993
 
929 994
         //now set up the array so that it can potentially hold all possibilities
930  
-        int totalNumOptions = offsetRange + 1;
931  
-        for (int iFragPos = 1; iFragPos <= numFragPos; iFragPos++) {
932  
-            totalNumOptions = totalNumOptions * this.getNumAlternativeFragments(leftFrag, iFragPos);
  995
+        int totalNumOptions = 1;
  996
+        for (int iFrag = 1; iFrag <= toFrag; iFrag++) {
  997
+            totalNumOptions = totalNumOptions * this.getNumAlternativeFragments(rightFrag, iFrag);
933 998
         }
934 999
         long[] markerPos = new long[totalNumOptions];
935  
-        for (int iOffset = 0; iOffset <= offsetRange; iOffset++) {
936  
-            markerPos[iOffset] = startPos + iOffset * searchDirection;
  1000
+        markerPos[0] = startBytePos;
  1001
+        int numOptions = 1;
  1002
+
  1003
+        long[] fragBytePos = new long[toFrag];
  1004
+        boolean seqNotFound = true;
  1005
+        nudged = false;
  1006
+        System.out.println("nudged1: " + nudged);
  1007
+        while (fromFrag > 0) {
  1008
+            seqNotFound = search(bytes, fromFrag, toFrag, searchForwards,
  1009
+                    startBytePos, rightBytePos, markerPos, fragBytePos);
  1010
+            
  1011
+            if (seqNotFound && this.hasRightRange) {
  1012
+                fromFrag = nudgeFragments(fromFrag, toFrag, searchForwards, startBytePos, fragBytePos);
  1013
+                System.out.println("nudgeFragments returned: " + fromFrag);
  1014
+                if (fromFrag > 0) {
  1015
+                    nudged = true;
  1016
+                    System.out.println("nudged2: " + nudged);
  1017
+                    if (fromFrag > 1) {
  1018
+                        startBytePos = fragBytePos[fromFrag - 1];
  1019
+                        markerPos[0] = startBytePos;
  1020
+                    }
  1021
+                    System.out.println("nudged3: " + nudged);
  1022
+                }
  1023
+                    System.out.println("nudged4: " + nudged);
  1024
+            } else {
  1025
+                fromFrag = 0;
  1026
+                System.out.println("nudged5: " + nudged);
  1027
+            }
  1028
+            System.out.println("nudged6: " + nudged);
937 1029
         }
938  
-        int numOptions = 1 + offsetRange;
939  
-
940  
-        boolean seqNotFound = false;
941  
-        for (int iFragPos = posLoopStart; 
942  
-            (!seqNotFound) && (iFragPos <= numFragPos) && (iFragPos >= 1);
943  
-            iFragPos += searchDirection) {
  1030
+        System.out.println("finished searching1");
  1031
+        System.out.println("nudged7: " + nudged);
  1032
+        if (nudged) {           // reset all minimum offsets
  1033
+            System.out.println("resetting right minOffsets");
  1034
+            for (int i = 0; i < orderedRightFragments.size(); i++) {
  1035
+                final List<SideFragment> frags = orderedRightFragments.get(i);
  1036
+                for (int j = 0; j < frags.size(); j++) {
  1037
+                    final SideFragment frag = frags.get(j);
  1038
+                    frag.resetMinOffset();
  1039
+                }
  1040
+            }
  1041
+        }
  1042
+/*        for (int iFragPos = posLoopStart; 
  1043
+            (!seqNotFound) && (iFragPos <= numFragPos) && (iFragPos >= 1); iFragPos++) {
944 1044
             final List<SideFragment> fragmentsAtPosition = orderedRightFragments.get(iFragPos - 1);
945 1045
             final int numAltFrags = fragmentsAtPosition.size();
946 1046
             //array to store possible end positions after this fragment position has been examined
@@ -989,7 +1089,7 @@ public final boolean findSequenceFromPosition(final long position,
989 1089
                 }
990 1090
             }
991 1091
         }
992  
-
  1092
+*/
993 1093
         //prepare array to be returned
994 1094
         if (seqNotFound) {
995 1095
             // no possible positions found, return 0 length array
@@ -998,53 +1098,205 @@ public final boolean findSequenceFromPosition(final long position,
998 1098
         // return ordered array of possibilities
999 1099
         long[] outArray = new long[numOptions];
1000 1100
 
1001  
-        // convert values to negative temporarily so that reverse
1002  
-        // sort order can be obtained for a right to left search direction
1003  
-        if (searchDirection < 0) {
1004  
-            for (int iOption = 0; iOption < numOptions; iOption++) {
1005  
-                markerPos[iOption] = -markerPos[iOption];
1006  
-            }
1007  
-        }
1008  
-
1009 1101
         //sort the values in the array
1010 1102
         Arrays.sort(markerPos, 0, numOptions);
1011 1103
 
1012  
-        //convert values back to positive now that a reverse sort order has been obtained
1013  
-        if (searchDirection < 0) {
1014  
-            for (int iOption = 0; iOption < numOptions; iOption++) {
1015  
-                markerPos[iOption] = -markerPos[iOption];
1016  
-            }
1017  
-        }
1018  
-
1019 1104
         //copy to a new array which has precisely the correct length
1020 1105
         System.arraycopy(markerPos, 0, outArray, 0, numOptions);
1021 1106
 
1022 1107
         //correct the value
1023 1108
         for (int iOption = 0; iOption < numOptions; iOption++) {
1024  
-            outArray[iOption] -= searchDirection;
  1109
+            outArray[iOption]--;
1025 1110
         }
1026 1111
 
1027 1112
         return outArray;
1028 1113
     }
  1114
+    
  1115
+    /**
  1116
+     * Search for fragment group
  1117
+     * 
  1118
+     * @param bytes         byte reader
  1119
+     * @param fromFrag      first fragment to find
  1120
+     * @param toFrag        last fragment to find
  1121
+     * @param searchDir     search direction
  1122
+     * @param leftByte      leftmost file position to search
  1123
+     * @param rightByte     rightmost file position to search
  1124
+     * @param markerByte
  1125
+     * @param fragPos       fragment positions in file
  1126
+     * @return seqNotFound  search result
  1127
+     */
  1128
+    private boolean search(final net.domesdaybook.reader.ByteReader bytes,
  1129
+            final int fromFrag, final int toFrag, final int searchDir,
  1130
+            final long leftByte, final long rightByte, final long[] markerByte,
  1131
+            final long[] fragPos) {
  1132
+        
  1133
+        System.out.println("search from " + fromFrag + " to " + toFrag + " from " +
  1134
+                (searchDir > 0 ? "left to right" : "right to left"));
  1135
+        System.out.println("... file leftByte: " + leftByte + ", file rightByte: " + rightByte);
  1136
+        // Search for the fragments:
  1137
+        int iFragPos = 0;
  1138
+        boolean seqNotFound = false;
  1139
+        for (int iFrag = fromFrag; (iFrag <= toFrag) && (iFrag >= 1) && !seqNotFound; iFrag++) {
  1140
+            final List<SideFragment> altFragments = searchDir > 0 ?
  1141
+                    orderedRightFragments.get(iFrag - 1) : orderedLeftFragments.get(iFrag - 1);
  1142
+            final int altFrags = altFragments.size();
  1143
+            //array to store possible end positions after this fragment position has been examined
  1144
+            long[] tempEndPos = new long[altFrags]; 
  1145
+
  1146
+            int numEndPos = 0;
  1147
+            //will now look for all matching alternative sequence at the current end positions
  1148
+            for (int iAlt = 0; iAlt < altFrags; iAlt++) {
  1149
+                final SideFragment altFragment = altFragments.get(iAlt);
  1150
+                long tempFragEnd;
  1151
+                if (searchDir == 1) {    //searching left to right for right fragments
  1152
+                    tempFragEnd = 
  1153
+                        this.endBytePosForSeqFrag(bytes, markerByte[0], 
  1154
+                                rightByte, false, searchDir, 
  1155
+                                iFrag, altFragment);
  1156
+                } else {                //searching right to left for left fragments
  1157
+                    tempFragEnd = 
  1158
+                        this.endBytePosForSeqFrag(bytes, leftByte, 
  1159
+                                markerByte[0], true, searchDir, 
  1160
+                                iFrag, altFragment);
  1161
+                }
  1162
+                if (tempFragEnd > -1L) { // a match has been found
  1163
+                    tempEndPos[numEndPos] = tempFragEnd + searchDir;
  1164
+//                    System.out.println("tempEndPos(" + numEndPos + "): " + tempEndPos[numEndPos]);
  1165
+                    numEndPos += 1;
  1166
+                    fragPos[iFragPos] = tempFragEnd;
  1167
+                    iFragPos++;
  1168
+                }
  1169
+            }
  1170
+            if (numEndPos == 0) {
  1171
+                seqNotFound = true;
  1172
+            } else {
  1173
+                int opts = 0;
  1174
+                for (int iOption = 0; iOption < numEndPos; iOption++) {
  1175
+                    //eliminate any repeated end positions
  1176
+                    boolean addEndPos = true;
  1177
+                    for (int iMarker = 0; iMarker < opts; iMarker++) {
  1178
+                        if (markerByte[iMarker] == tempEndPos[iOption]) {
  1179
+                            addEndPos = false;
  1180
+                            break;
  1181
+                        }
  1182
+                    }
  1183
+                    if (addEndPos) {
  1184
+                        markerByte[opts] = tempEndPos[iOption];
  1185
+                        opts++;
  1186
+                    }
  1187
+                }
  1188
+            }
  1189
+        }
  1190
+        return seqNotFound;
  1191
+    }
  1192
+
  1193
+    /**
  1194
+     * Increase next minimum offset beyond previously found offset for a variable-length range
  1195
+     * 
  1196
+     * @param fromFrag  first fragment to find
  1197
+     * @param toFrag    last fragment to find
  1198
+     * @param searchDir search towards right/left (1/-1)
  1199
+     * @param startPos  starting file position to search from
  1200
+     * @param fragPos   fragment positions in file
  1201
+     * @return fragment number to continue searching for, or 0 if all variable-length ranges searched
  1202
+     */
  1203
+    private int nudgeFragments(final int fromFrag, final int toFrag, final int searchDir,
  1204
+            final long startPos, final long[] fragPos) {
  1205
+
  1206
+        
  1207
+        // problem area here
  1208
+        // if 0 is simply returned, identification runs ok but without handling the variable-length range bug.
  1209
+        
  1210
+        
  1211
+        System.out.println("nudging fragments from: " + fromFrag + " to " + toFrag +
  1212
+                " for " + (searchDir > 0 ? "left to right" : "right to left") +
  1213
+                " search from file startPos " + startPos);
  1214
+        boolean diag = false;
  1215
+        if (startPos == 26) {
  1216
+            diag = true;
  1217
+        }
  1218
+        int reply = 0;
  1219
+        final int[] actOff = new int[toFrag - fromFrag + 1];
  1220
+        long prevPos = startPos;
  1221
+        // get actual offsets of fragments found so far
  1222
+        for (int i = fromFrag - 1; i < toFrag && (fragPos[i] > 0); i++) {
  1223
+            actOff[i] = ((int) (fragPos[i] - prevPos) * searchDir) - 1;
  1224
+            if (diag) {
  1225
+                System.out.println("fragPos[" + i + "]: " + fragPos[i]);
  1226
+                System.out.println("prevPos: " + prevPos);
  1227
+                System.out.println("actOff[" + i + "]: " + actOff[i]);
  1228
+            }
  1229
+            prevPos = fragPos[i];
  1230
+        }
  1231
+        // increase next range minimum offset, if found
  1232
+        for (int i = toFrag - 1; i >= fromFrag - 1; i--) {
  1233
+//            System.out.println("fragPos: " + fragPos[i]);
  1234
+//            System.out.println("actOff: " + actOff[i]);
  1235
+            if (fragPos[i] > 0) {                                   //fragment found?
  1236
+                List<SideFragment> altFrags = searchDir > 0 ?
  1237
+                        orderedRightFragments.get(i) : orderedLeftFragments.get(i);
  1238
+                boolean done = false;
  1239
+                for (int j = 0; j < altFrags.size(); j++) {
  1240
+                    SideFragment altFrag = altFrags.get(j);
  1241
+                    final int origMinOff = altFrag.getOriginalMinOffset();
  1242
+                    final int minOff = altFrag.getMinOffset();
  1243
+                    final int maxOff = altFrag.getMaxOffset();
  1244
+/*                    System.out.println("originalMinOff: " + origMinOff);
  1245
+                    System.out.println("minOff: " + minOff);
  1246
+                    System.out.println("maxOff: " + maxOff); */  
  1247
+                    if (origMinOff < maxOff) {                      //varying-length range?
  1248
+                        if (minOff < maxOff) {                      //not searched all range?
  1249
+                            altFrag.changeMinOffset(actOff[i] + 1); //force to search more range
  1250
+                            if (diag)
  1251
+                                System.out.println("changed minOffset from " + minOff + " to " + actOff[i] + 1);
  1252
+                            done = true;
  1253
+                        } else {
  1254
+                            altFrag.resetMinOffset();               //reset to start of range
  1255
+                            if (diag)
  1256
+                                System.out.println("reset minOffset from " + minOff);
  1257
+                        }
  1258
+                    }
  1259
+//                    altFrag = null;
  1260
+                }
  1261
+                if (done) {
  1262
+                    for (int k = i + 1; k <= toFrag - 1; k++) {
  1263
+                        fragPos[k] = 0;
  1264
+                    }
  1265
+                    reply = i + 1;
  1266
+                }
  1267
+//                altFrags = null;
  1268
+            }
  1269
+        }
  1270
+        return reply;
  1271
+    }
1029 1272
 
1030 1273
     /**
1031 1274
      * searches for the specified fragment sequence
1032 1275
      * between the leftmost and rightmost byte positions that are given.
1033 1276
      * returns the end position of the found sequence or -1 if it is not found
1034 1277
      *
1035  
-     * @param targetFile      The file that is being reviewed for identification
1036  
-     * @param leftEndBytePos  leftmost position in file at which to search
1037  
-     * @param rightEndBytePos rightmost postion in file at which to search-
1038  
-     * @param leftFrag        flag to indicate whether looking at left or right fragments
1039  
-     * @param searchDirection direction in which search is carried out (1 for left to right, -1 for right to left)
1040  
-     * @param fragPos         position of left/right sequence fragment to use
1041  
-     * @param fragIndex       index of fragment within the position (where alternatives exist)
1042  
-     * @return
  1278
+     * @param bytes             byte reader for file to be identified
  1279
+     * @param leftEndBytePos    leftmost position in file at which to search
  1280
+     * @param rightEndBytePos   rightmost position in file at which to search
  1281
+     * @param leftFrag          flag to indicate whether looking at left or right fragments
  1282
+     * @param searchDirection   direction in which search is carried out (1 for left to right, -1 for right to left)
  1283
+     * @param fragPos           position of left/right sequence fragment to use
  1284
+     * @param fragment          fragment to search for
  1285
+     * @return                  
1043 1286
      */
1044 1287
     //CHECKSTYLE:OFF too long and complex.
1045 1288
     private long endBytePosForSeqFrag(final net.domesdaybook.reader.ByteReader bytes, 
1046  
-            final long leftEndBytePos, final long rightEndBytePos,
1047  
-            final boolean leftFrag, final int searchDirection, final int fragPos, final SideFragment fragment) {
  1289
+            final long leftEndBytePos, final long rightEndBytePos, final boolean leftFrag,
  1290
+            final int searchDirection, final int fragPos, final SideFragment fragment) {
  1291
+/*        System.out.println("endBytePosForSeqFrag...");
  1292
+        System.out.println("...leftEndBytePos: " + leftEndBytePos);
  1293
+        System.out.println("...rightEndBytePos: " + rightEndBytePos);
  1294
+        System.out.println("...fragPos: " + fragPos);
  1295
+        System.out.println("...fragment...");
  1296
+        System.out.println("......originalMinOffset: " + fragment.getOriginalMinOffset());
  1297
+        System.out.println("......minOffset: " + fragment.getMinOffset());
  1298
+        System.out.println("......maxOffset: " + fragment.getMaxOffset());
  1299
+        System.out.println("......text: " + fragment.getText()); */
1048 1300
     //CHECKSTYLE:ON
1049 1301
         long startPosInFile;
1050 1302
         long lastStartPosInFile;
@@ -1069,7 +1321,6 @@ private long endBytePosForSeqFrag(final net.domesdaybook.reader.ByteReader bytes
1069 1321
             minOffset = 0;
1070 1322
             maxOffset = 0;
1071 1323
         }
1072  
-
1073 1324
         // set up start and end positions for searches taking into account min and max offsets
1074 1325
         if (searchDirection == -1) {
1075 1326
             startPosInFile = rightEndBytePos - minOffset;

0 notes on commit d182024

Please sign in to comment.
Something went wrong with that request. Please try again.