Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

fix the bug of PForDeltaDocIdSet does not all PForDelta.compressionOn…

…eBlockOpt() and adding New Tests to cover all test cases that the old p4d covers
  • Loading branch information...
commit dd4aeba47b382e47892aafee5ff2c189238380dd 1 parent 73dede5
Hao Yan authored
8 src/main/java/com/kamikaze/docidset/compression/PForDeltaWithBase.java
@@ -42,20 +42,20 @@ public int estimateCompSize(int[] inputBlock, int bits, int blockSize) throws Il
42 42
43 43 @Override
44 44 public CompResult compressOneBlock(int[] inputBlock, int bits, int blockSize, boolean flag) throws IllegalArgumentException {
45   - return compressOneBlock(inputBlock, bits, blockSize);
  45 + return compressOneBlock(inputBlock, blockSize);
46 46 }
47 47
48 48 /**
49 49 * Compress an integer array
50 50 *
51 51 * @param inputBlock the integer input array
52   - * @param bits the value of b in the PForDelta algorithm
53 52 * @param blockSize the block size which is 256 by default
54 53 * @return CompResult which contains the compressed size in number of bits and the reference to the compressed data
55 54 * @throws IllegalArgumentException
56 55 */
57   - public CompResult compressOneBlock(int[] inputBlock, int bits, int blockSize) throws IllegalArgumentException {
58   - int[] compBlock = PForDelta.compressOneBlock(inputBlock, bits, blockSize);
  56 + public CompResult compressOneBlock(int[] inputBlock, int blockSize) throws IllegalArgumentException {
  57 +
  58 + int[] compBlock = PForDelta.compressOneBlockOpt(inputBlock, blockSize);
59 59 CompResult res = new CompResult();
60 60 res.setCompressedSize(compBlock.length<<5);
61 61 res.setCompressedBlock(compBlock);
20 src/main/java/com/kamikaze/docidset/impl/PForDeltaDocIdSet.java
@@ -14,6 +14,7 @@
14 14 import com.kamikaze.docidset.utils.CompResult;
15 15 import com.kamikaze.docidset.utils.IntArray;
16 16 import com.kamikaze.docidset.utils.PForDeltaIntSegmentArray;
  17 +import com.kamikaze.pfordelta.PForDelta;
17 18
18 19 /**
19 20 * This class implements the DocId set which is built on top of the optimized PForDelta algorithm (PForDeltaWithBase)
@@ -415,9 +416,9 @@ public void flush(int docId)
415 416 * Compress one block of integers using PForDelta
416 417 *
417 418 */
418   - private CompResult PForDeltaCompressOneBlock(int[] srcData, int b)
  419 + private CompResult PForDeltaCompressOneBlock(int[] srcData)
419 420 {
420   - CompResult compRes = compBlockWithBase.compressOneBlock(srcData, b, _blockSize);
  421 + CompResult compRes = compBlockWithBase.compressOneBlock(srcData, _blockSize);
421 422 return compRes;
422 423 }
423 424
@@ -590,20 +591,9 @@ private CompResult PForDeltaCompressCurrentBlock()
590 591 int tmpB = currentB;
591 592
592 593 preProcessBlock(currentNoCompBlock, sizeOfCurrentNoCompBlock);
593   - int optSize = PForDeltaEstimateCompSize(currentNoCompBlock, tmpB);
594   -
595   - for (int i = 1; i < POSSIBLE_B.length; ++i)
596   - {
597   - tmpB = POSSIBLE_B[i];
598   - int curSize = PForDeltaEstimateCompSize(currentNoCompBlock, tmpB);
599   - if(curSize < optSize)
600   - {
601   - currentB = tmpB;
602   - optSize = curSize;
603   - }
604   - }
  594 +
605 595 // return the compressed data achieved from the best b
606   - CompResult finalRes = PForDeltaCompressOneBlock(currentNoCompBlock, currentB);
  596 + CompResult finalRes = PForDeltaCompressOneBlock(currentNoCompBlock);
607 597 return finalRes;
608 598 }
609 599
8 src/main/java/com/kamikaze/docidset/utils/DocSetFactory.java
@@ -84,8 +84,8 @@ public static DocSet getDocSetInstance(int min, int max, int count, FOCUS hint)
84 84 return new OBSDocIdSet(max-min+1);
85 85
86 86 else
87   - //return new PForDeltaDocIdSet();
88   - return new P4DDocIdSet();
  87 + return new PForDeltaDocIdSet();
  88 + // return new P4DDocIdSet();
89 89
90 90 // All cases in consideration
91 91 case OPTIMAL:
@@ -94,8 +94,8 @@ public static DocSet getDocSetInstance(int min, int max, int count, FOCUS hint)
94 94 if(count < AbstractDocSet.DEFAULT_BATCH_SIZE)
95 95 return new IntArrayDocIdSet(count);
96 96 else
97   - //return new PForDeltaDocIdSet();
98   - return new P4DDocIdSet();
  97 + return new PForDeltaDocIdSet();
  98 + //return new P4DDocIdSet();
99 99 }
100 100 else if((((max-min)>>>LONG_SHIFT)+1)*2*INT_SIZE > count * INT_SIZE)
101 101 return new IntArrayDocIdSet(count);
8 src/main/java/com/kamikaze/pfordelta/Simple16.java
@@ -51,7 +51,8 @@
51 51 */
52 52 public static final int s16Compress(int[] out, int outOffset, int[] in, int inOffset, int n, int blockSize)
53 53 {
54   - int numIdx, j, num, bits;
  54 + int numIdx=0, j=0, num=0, bits=0;
  55 + try{
55 56 for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++)
56 57 {
57 58 out[outOffset] = numIdx<<S16_BITSSIZE;
@@ -69,6 +70,11 @@ public static final int s16Compress(int[] out, int outOffset, int[] in, int inOf
69 70 return num;
70 71 }
71 72 }
  73 + }catch(Exception e)
  74 + {
  75 + System.out.println("s16Compress: " + "numIdx:" + numIdx + ",j:" + j + ",num:" + num + ",bits: " + bits);
  76 + e.printStackTrace();
  77 + }
72 78
73 79 return -1;
74 80 }
19 src/test/java/com/kamikaze/test/PForDeltaMultiThreadedAccessTest.java
@@ -11,25 +11,6 @@
11 11 import com.kamikaze.docidset.api.StatefulDSIterator;
12 12 import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
13 13
14   -//public class PForDeltaMultiThreadedAccessTest {
15   -// public static void main(String[] args)
16   -// {
17   -// PForDeltaMultiThreadedAccessTest1 t1= new PForDeltaMultiThreadedAccessTest1();
18   -// try{
19   -// t1.testSkipPerformance();
20   -// t1.testMultiThreadedFind();
21   -// }
22   -// catch(IOException eio)
23   -// {
24   -// eio.printStackTrace();
25   -// }
26   -// catch(InterruptedException ei)
27   -// {
28   -// ei.printStackTrace();
29   -// }
30   -// }
31   -//}
32   -
33 14 // testing multiple threads: all threads share the same PForDeltaDocId set, and each thread has its own iterator iterating on it (only read operations).
34 15 public class PForDeltaMultiThreadedAccessTest {
35 16 int _length = 10;
6 ...t/java/com/kamikaze/test/TestBooleanDocIdSet.java → ...m/kamikaze/test/PForDeltaTestBooleanDocIdSet.java
@@ -16,9 +16,9 @@
16 16 import com.kamikaze.docidset.impl.NotDocIdSet;
17 17 import com.kamikaze.docidset.impl.OrDocIdSet;
18 18
19   -public class TestBooleanDocIdSet extends TestCase {
  19 +public class PForDeltaTestBooleanDocIdSet extends TestCase {
20 20 @Test
21   - public void _testOrDocIdSet() throws Exception
  21 + public void testOrDocIdSet() throws Exception
22 22 {
23 23 System.out.println("Running testOrDocIdSet() Test case...");
24 24 DocIdSet[] DocList;
@@ -68,7 +68,7 @@ public void testNotDocIdSet() throws Exception
68 68 }
69 69
70 70 @Test
71   - public void _testAndDocIdSet() throws Exception
  71 + public void testAndDocIdSet() throws Exception
72 72 {
73 73 System.out.println("Running testOrDocIdSet() Test case...");
74 74 DocIdSet[] DocList;
10 ...est/java/com/kamikaze/test/TestDocSetFactory.java → ...com/kamikaze/test/PForDeltaTestDocSetFactory.java
@@ -7,13 +7,13 @@
7 7
8 8 import com.kamikaze.docidset.utils.DocSetFactory;
9 9 import com.kamikaze.docidset.utils.DocSetFactory.FOCUS;
10   -public class TestDocSetFactory {
  10 +public class PForDeltaTestDocSetFactory {
11 11
12 12 private static int batch = 128;
13 13
14 14 private static String serial = "SerialDocSet";
15 15
16   - public TestDocSetFactory() {
  16 + public PForDeltaTestDocSetFactory() {
17 17
18 18 }
19 19
@@ -32,7 +32,7 @@ public void testDocSetFactory() {
32 32
33 33 assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.IntArrayDocIdSet");
34 34 set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.SPACE);
35   - assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.P4DDocIdSet");
  35 + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.PForDeltaDocIdSet");
36 36 //assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.PForDeltaDocIdSet");
37 37 set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.OPTIMAL);
38 38 assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.IntArrayDocIdSet");
@@ -46,7 +46,7 @@ public void testDocSetFactory() {
46 46
47 47 count *=10000;
48 48 set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.OPTIMAL);
49   - assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.P4DDocIdSet");
  49 + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.PForDeltaDocIdSet");
50 50
51 51 // count *=10000;
52 52 // set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.OPTIMAL);
@@ -67,7 +67,7 @@ public void testDocSetFactory() {
67 67 count /= 10000000;
68 68
69 69 set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.SPACE);
70   - assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.P4DDocIdSet");
  70 + assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.PForDeltaDocIdSet");
71 71
72 72 // set = DocSetFactory.getDocSetInstance(min, max, count, FOCUS.SPACE);
73 73 // assertEquals(set.getClass().getName(), "com.kamikaze.docidset.impl.PForDeltaDocIdSet");
124 src/test/java/com/kamikaze/test/PForDeltaTestDocSetSerialization.java
... ... @@ -0,0 +1,124 @@
  1 +package com.kamikaze.test;
  2 +
  3 +import static org.junit.Assert.assertEquals;
  4 +import static org.junit.Assert.assertFalse;
  5 +import static org.junit.Assert.fail;
  6 +
  7 +import java.io.BufferedInputStream;
  8 +import java.io.File;
  9 +import java.io.FileInputStream;
  10 +import java.io.FileOutputStream;
  11 +import java.io.IOException;
  12 +import java.io.InputStream;
  13 +import java.io.ObjectInputStream;
  14 +import java.io.ObjectOutputStream;
  15 +import java.util.ArrayList;
  16 +import java.util.Collections;
  17 +import java.util.List;
  18 +import java.util.Random;
  19 +import java.util.Set;
  20 +import java.util.TreeSet;
  21 +
  22 +import org.apache.lucene.search.DocIdSet;
  23 +import org.apache.lucene.search.DocIdSetIterator;
  24 +import org.apache.lucene.util.OpenBitSet;
  25 +import org.junit.Test;
  26 +
  27 +import com.kamikaze.docidset.api.DocSet;
  28 +import com.kamikaze.docidset.api.StatefulDSIterator;
  29 +import com.kamikaze.docidset.bitset.MyOpenBitSet;
  30 +import com.kamikaze.docidset.impl.AndDocIdSet;
  31 +import com.kamikaze.docidset.impl.IntArrayDocIdSet;
  32 +import com.kamikaze.docidset.impl.NotDocIdSet;
  33 +import com.kamikaze.docidset.impl.OBSDocIdSet;
  34 +import com.kamikaze.docidset.impl.OrDocIdSet;
  35 +import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
  36 +
  37 +public class PForDeltaTestDocSetSerialization {
  38 +
  39 + private static int batch = 128;
  40 +
  41 + private static String serial = "src/test/test-data/SerialDocSet";
  42 +
  43 + public PForDeltaTestDocSetSerialization() {
  44 +
  45 + }
  46 +
  47 +
  48 + @Test
  49 + public void testNotDocSetSerialization() throws Exception {
  50 +
  51 + System.out.println("");
  52 + System.out.println("Running NotDocIdSet Serialization Test case...");
  53 + System.out.println("----------------------------");
  54 +
  55 + Random random = new Random();
  56 +
  57 + int randomizer = 0;
  58 + int b = 0;
  59 + int length = 1000;
  60 + int max = 5400;
  61 +
  62 + Set<Integer> intSet = new TreeSet<Integer>();
  63 + PForDeltaDocIdSet docSet = new PForDeltaDocIdSet(batch);
  64 + randomizer = 0;
  65 +
  66 + for (int i = 1; i < 1000 + 1; i++) {
  67 +
  68 + int bVal[] = new int[33];
  69 + for (int k = 0; k < batch; k++) {
  70 + b = randomizer + (int) (random.nextDouble() * 1000);
  71 + intSet.add(b);
  72 +
  73 + }
  74 +
  75 + randomizer += 1000;
  76 + }
  77 + for (Integer c : intSet) {
  78 + docSet.addDoc(c);
  79 + }
  80 +
  81 + DocIdSet not = new NotDocIdSet(docSet, max);
  82 +
  83 + try {
  84 + File f = new File(serial);
  85 + ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(f));
  86 + oos.writeObject(not);
  87 + oos.flush();
  88 + oos.close();
  89 +
  90 + } catch (Exception e) {
  91 + e.printStackTrace();
  92 + fail(e.getMessage());
  93 + }
  94 +
  95 + NotDocIdSet not2 = null;
  96 +
  97 + try {
  98 + InputStream f = new FileInputStream(new File(serial));
  99 + ObjectInputStream ois = new ObjectInputStream(new BufferedInputStream(f));
  100 + not2 = (NotDocIdSet) (ois.readObject());
  101 + } catch (Exception e) {
  102 + e.printStackTrace();
  103 + fail(e.getMessage());
  104 + }
  105 +
  106 + org.apache.lucene.search.DocIdSetIterator noit = not.iterator();
  107 + org.apache.lucene.search.DocIdSetIterator noit2 = not2.iterator();
  108 +
  109 + try {
  110 + int docid;
  111 + while ((docid=noit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS) {
  112 + int docid2 = noit2.nextDoc();
  113 + assertFalse(intSet.contains(docid));
  114 + assertEquals(docid, docid2);
  115 + }
  116 + } catch (Exception e) {
  117 + e.printStackTrace();
  118 + fail(e.getMessage());
  119 + }
  120 +
  121 + }
  122 +
  123 +
  124 +}
75 src/test/java/com/kamikaze/test/TestDocSets.java → .../java/com/kamikaze/test/PForDeltaTestDocSets.java
@@ -24,16 +24,18 @@
24 24 import com.kamikaze.docidset.impl.NotDocIdSet;
25 25 import com.kamikaze.docidset.impl.OBSDocIdSet;
26 26 import com.kamikaze.docidset.impl.OrDocIdSet;
27   -import com.kamikaze.docidset.impl.P4DDocIdSet;
  27 +import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
  28 +import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
  29 +import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
28 30 import com.kamikaze.docidset.utils.DocSetFactory;
29 31 import com.kamikaze.docidset.utils.DocSetFactory.FOCUS;
30 32
31   -public class TestDocSets {
  33 +public class PForDeltaTestDocSets {
32 34
33 35 private static final FOCUS SPACE = null;
34 36 private static int batch = 128;
35 37
36   - public TestDocSets() {
  38 + public PForDeltaTestDocSets() {
37 39
38 40 }
39 41
@@ -349,13 +351,13 @@ public void testWideDocSkips() throws Exception {
349 351 }
350 352 _testWideDocSkips("Testing skips on IntArrayDocIdSet", pset6);
351 353
352   - P4DDocIdSet pset7 = new P4DDocIdSet();
  354 + PForDeltaDocIdSet pset7 = new PForDeltaDocIdSet();
353 355 orit = orSet.iterator();
354 356 while((docid = orit.nextDoc())!=DocIdSetIterator.NO_MORE_DOCS)
355 357 {
356 358 pset7.addDoc(docid);
357 359 }
358   - _testWideDocSkips("Testing skips on P4DDocIdSet", pset7);
  360 + _testWideDocSkips("Testing skips on PForDeltaDocIdSet", pset7);
359 361
360 362 OBSDocIdSet pset8 = new OBSDocIdSet(2000);
361 363 orit = orSet.iterator();
@@ -443,9 +445,9 @@ public void testAndDocIdSetSkipSanity() throws Exception{
443 445 2978, 2981, 2984, 2994, 2997 };
444 446 int set3[] = { 2994, 2997 };
445 447
446   - P4DDocIdSet pset1 = new P4DDocIdSet(batch);
  448 + PForDeltaDocIdSet pset1 = new PForDeltaDocIdSet(batch);
447 449 OpenBitSet pset2 = new OpenBitSet();
448   - P4DDocIdSet pset3 = new P4DDocIdSet(batch);
  450 + PForDeltaDocIdSet pset3 = new PForDeltaDocIdSet(batch);
449 451
450 452 for (int i = 0; i < set1.length; i++) {
451 453 pset1.addDoc(set1[i]);
@@ -501,14 +503,29 @@ public void testAndDocIdSetSkipSanity() throws Exception{
501 503
502 504
503 505
504   -
  506 + private int[] convertSetToUniqueSet(int[] input)
  507 + {
  508 + Set<Integer> uniqueSet = new TreeSet<Integer>();
  509 + for(int i=0; i<input.length; i++)
  510 + {
  511 + uniqueSet.add(input[i]);
  512 + }
  513 + Iterator<Integer> iter = uniqueSet.iterator();
  514 + int[] output = new int[uniqueSet.size()];
  515 + int k=0;
  516 + while(iter.hasNext())
  517 + {
  518 + output[k++] = iter.next();
  519 + }
  520 + return output;
  521 + }
505 522
506 523 @Test
507 524 public void testCombinationSanity()throws Exception {
508 525
509 526 System.out.println("");
510 527 int[] set1 = { 4, 19, 21, 35, 36, 43, 43, 73, 85, 104, 105, 106, 112, 118,
511   - 119, 138, 141, 145, 146, 146, 196, 200, 202, 217, 219, 220, 221, 239,
  528 + 119, 138, 141, 145, 146, 147, 196, 200, 202, 217, 219, 220, 221, 239,
512 529 242, 243, 261, 276, 280, 281, 295, 297, 306, 309, 319, 324, 359, 375,
513 530 376, 387, 398, 401, 406, 438, 442, 450, 450, 462, 469, 475, 495, 499,
514 531 505, 505, 513, 513, 526, 529, 569, 584, 589, 590, 609, 614, 633, 635,
@@ -551,6 +568,12 @@ public void testCombinationSanity()throws Exception {
551 568 int[] set5 = { 4, 1999 };
552 569 int[] set6 = { 2000 };
553 570
  571 + set1 = convertSetToUniqueSet(set1);
  572 + set2 = convertSetToUniqueSet(set2);
  573 + set3 = convertSetToUniqueSet(set3);
  574 + set4 = convertSetToUniqueSet(set4);
  575 +
  576 +
554 577 OpenBitSet ps1 = new OpenBitSet();
555 578
556 579 // Build open bit set
@@ -569,7 +592,7 @@ public void testCombinationSanity()throws Exception {
569 592 for (int i = 0; i < set3.length; i++)
570 593 ps3.set(set3[i]);
571 594
572   - P4DDocIdSet ps4 = new P4DDocIdSet(128);
  595 + PForDeltaDocIdSet ps4 = new PForDeltaDocIdSet(128);
573 596
574 597 // Build open bit set
575 598 for (int i = 0; i < set4.length; i++)
@@ -581,7 +604,7 @@ public void testCombinationSanity()throws Exception {
581 604 for (int i = 0; i < set5.length; i++)
582 605 ps5.set(set5[i]);
583 606
584   - P4DDocIdSet ps6 = new P4DDocIdSet(128);
  607 + PForDeltaDocIdSet ps6 = new PForDeltaDocIdSet(128);
585 608 ps6.addDoc(2000);
586 609
587 610 ArrayList<DocIdSet> sets = new ArrayList<DocIdSet>();
@@ -1242,7 +1265,7 @@ public void testDenseConstructionTime()
1242 1265 }
1243 1266 System.out.println("Time for OpenBitSet construction:"+(System.nanoTime()-time)+" ns");
1244 1267 time = System.nanoTime();
1245   - P4DDocIdSet docSet3 = new P4DDocIdSet();
  1268 + PForDeltaDocIdSet docSet3 = new PForDeltaDocIdSet();
1246 1269 for(int i=0;i<20000000;i++)
1247 1270 {
1248 1271 docSet3.addDoc(i+test);
@@ -1284,7 +1307,7 @@ public void testDenseConstructionTime()
1284 1307 // System.out.println("Time for"+(20000/5)+ " OBSDocSet Find:"+(System.nanoTime()-time)+" ns");
1285 1308 //
1286 1309 // time = System.nanoTime();
1287   -// P4DDocIdSet docSet3 = new P4DDocIdSet();
  1310 +// PForDeltaDocIdSet docSet3 = new PForDeltaDocIdSet();
1288 1311 // for(int i=0;i<20000;i++)
1289 1312 // {
1290 1313 // docSet3.addDoc(i+5);
@@ -1300,14 +1323,14 @@ public void testDenseConstructionTime()
1300 1323 // }
1301 1324
1302 1325 @Test
1303   - public void testFindOnP4D()
  1326 + public void testFindOnP4D() throws IOException
1304 1327 {
1305 1328 System.out.println("");
1306 1329 System.out.println("Running testFindOnP4D...");
1307 1330 System.out.println("----------------------------");
1308 1331
1309 1332
1310   - P4DDocIdSet docSet3 = new P4DDocIdSet();
  1333 + PForDeltaDocIdSet docSet3 = new PForDeltaDocIdSet();
1311 1334 ArrayList<Integer> list = new ArrayList<Integer>();
1312 1335 for(int i=0;i<20000;i+=5)
1313 1336 {
@@ -1323,7 +1346,7 @@ public void testFindOnP4D()
1323 1346 }
1324 1347
1325 1348 list.clear();
1326   - docSet3 = new P4DDocIdSet();
  1349 + docSet3 = new PForDeltaDocIdSet();
1327 1350 for(int i=0;i<20000;i+=6)
1328 1351 {
1329 1352 list.add(i);
@@ -1338,7 +1361,7 @@ public void testFindOnP4D()
1338 1361
1339 1362 list.clear();
1340 1363
1341   - docSet3 = new P4DDocIdSet();
  1364 + docSet3 = new PForDeltaDocIdSet();
1342 1365 assertFalse(docSet3.find(34));
1343 1366 for(int i=1;i<257;i++)
1344 1367 {
@@ -1361,7 +1384,7 @@ public void testFindOnP4D()
1361 1384
1362 1385 list.clear();
1363 1386
1364   - docSet3 = new P4DDocIdSet();
  1387 + docSet3 = new PForDeltaDocIdSet();
1365 1388 assertFalse(docSet3.find(34));
1366 1389 for(int i=1;i<33;i++)
1367 1390 {
@@ -1554,7 +1577,7 @@ public void testWideCombinationCase2() throws IOException {
1554 1577 for (int i = 0; i < set8.length; i++)
1555 1578 ps8.set(set8[i]);
1556 1579
1557   - P4DDocIdSet ps9 = new P4DDocIdSet(128);
  1580 + PForDeltaDocIdSet ps9 = new PForDeltaDocIdSet(128);
1558 1581 for (int i = 0; i < set9.length; i++)
1559 1582 ps9.addDoc(set9[i]);
1560 1583
@@ -1607,36 +1630,36 @@ public void testWideCombinationCase2() throws IOException {
1607 1630
1608 1631 }
1609 1632
  1633 +
1610 1634 @Test
1611 1635 public void testP4DDocIdSetNoExceptionCompressionRatio()
1612 1636 {
1613 1637 boolean failed = false;
1614 1638 System.out.println("");
1615   - System.out.println("Running P4DeltaDocSet No Exception Compression Ratio test");
  1639 + System.out.println("Running PForDeltaDocSet No Exception Compression Ratio test");
1616 1640 System.out.println("----------------------------");
1617 1641
1618 1642 final int max = 10000;
1619   -
  1643 + int c=0;
  1644 + int counter =0;
1620 1645 for(int j = 0; j < 31; j++)
1621 1646 {
1622 1647 try
1623 1648 {
1624   - P4DDocIdSet set = new P4DDocIdSet(batch);
  1649 + PForDeltaDocIdSet set = new PForDeltaDocIdSet(batch);
1625 1650 long time = System.nanoTime();
1626 1651
1627   - int counter=0;
1628   - for(int c = 0; c >= 0 && counter < max; c += (1 << j))
  1652 + for(c = 0; c >= 0 && counter < max; c += (1 << j))
1629 1653 {
1630 1654 set.addDoc(c);
1631 1655 counter++;
1632 1656 }
1633 1657 set.optimize();
1634 1658 //System.out.println("Time to construct:"+(System.nanoTime() - time)+" ns");
1635   - System.out.println("Delta:" + (1 << j) + " numOfItems:" + counter + " Blob Size:"+set.totalBlobSize());
1636 1659 }
1637 1660 catch(Exception ex)
1638 1661 {
1639   - System.out.println("Delta:" + (1 << j) + " Failed");
  1662 + System.out.println("c: " + c + ", counter: " + counter + "Delta:" + (1 << j) + " Failed");
1640 1663 failed = true;
1641 1664 }
1642 1665 }
15 src/test/java/com/kamikaze/test/TestKamikaze.java → ...java/com/kamikaze/test/PForDeltaTestKamikaze.java
... ... @@ -1,5 +1,6 @@
1 1 package com.kamikaze.test;
2 2
  3 +
3 4 import java.io.IOException;
4 5 import java.util.ArrayList;
5 6 import java.util.Collections;
@@ -15,11 +16,11 @@
15 16 import com.kamikaze.docidset.api.DocSet;
16 17 import com.kamikaze.docidset.impl.AndDocIdSet;
17 18 import com.kamikaze.docidset.impl.OrDocIdSet;
18   -import com.kamikaze.docidset.impl.P4DDocIdSet;
  19 +import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
19 20 import com.kamikaze.docidset.utils.DocSetFactory;
20 21
21 22
22   -public class TestKamikaze extends TestCase
  23 +public class PForDeltaTestKamikaze extends TestCase
23 24 {
24 25
25 26 public void testMultipleIntersections() throws Exception
@@ -108,7 +109,7 @@ public void testForOutOfBounds() throws Exception
108 109 seen.add(nextDoc);
109 110 }
110 111 Collections.sort(nums);
111   - DocSet docs = new P4DDocIdSet();
  112 + DocSet docs = new PForDeltaDocIdSet();
112 113 boolean saw403 = false;
113 114 for (Integer integer : nums)
114 115 {
@@ -126,8 +127,8 @@ public void testPartialEmptyAnd() throws IOException
126 127 System.out.println("Running Partial Empty And Test case...");
127 128 System.out.println("-------------------------------------------");
128 129
129   - DocSet ds1 = new P4DDocIdSet();
130   - DocSet ds2 = new P4DDocIdSet();
  130 + DocSet ds1 = new PForDeltaDocIdSet();
  131 + DocSet ds2 = new PForDeltaDocIdSet();
131 132 ds2.addDoc(42);
132 133 ds2.addDoc(43);
133 134 ds2.addDoc(44);
@@ -136,8 +137,8 @@ public void testPartialEmptyAnd() throws IOException
136 137 docs.add(ds1);
137 138 docs.add(ds2);
138 139 OrDocIdSet orlist1 = new OrDocIdSet(docs);
139   - DocSet ds3 = new P4DDocIdSet();
140   - DocSet ds4 = new P4DDocIdSet();
  140 + DocSet ds3 = new PForDeltaDocIdSet();
  141 + DocSet ds4 = new PForDeltaDocIdSet();
141 142 ds4.addDoc(42);
142 143 ds4.addDoc(43);
143 144 ds4.addDoc(44);
142 ...a/com/kamikaze/test/TestParameterizedDocSets.java → ...ikaze/test/PForDeltaTestParameterizedDocSets.java
@@ -8,10 +8,12 @@
8 8 import java.util.ArrayList;
9 9 import java.util.Arrays;
10 10 import java.util.Collections;
  11 +import java.util.HashSet;
11 12 import java.util.Iterator;
12 13 import java.util.LinkedList;
13 14 import java.util.List;
14 15 import java.util.Random;
  16 +import java.util.Set;
15 17 import java.util.TreeSet;
16 18
17 19 import org.apache.lucene.search.DocIdSet;
@@ -28,9 +30,11 @@
28 30 import com.kamikaze.docidset.impl.NotDocIdSet;
29 31 import com.kamikaze.docidset.impl.OBSDocIdSet;
30 32 import com.kamikaze.docidset.impl.P4DDocIdSet;
  33 +import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
  34 +import com.kamikaze.docidset.impl.PForDeltaDocIdSet;
31 35
32 36 @RunWith(Parameterized.class)
33   -public class TestParameterizedDocSets {
  37 +public class PForDeltaTestParameterizedDocSets {
34 38
35 39 private static final int batch = 256;
36 40
@@ -40,7 +44,7 @@
40 44
41 45 private int _max = -1;
42 46
43   - public TestParameterizedDocSets(int length, int max) {
  47 + public PForDeltaTestParameterizedDocSets(int length, int max) {
44 48 super();
45 49
46 50
@@ -61,7 +65,6 @@ public static List data() {
61 65
62 66
63 67
64   -
65 68 @Test
66 69 public void testAnnounce()
67 70 {
@@ -103,13 +106,19 @@ public void testOBSDocIdSetIterateSanity() throws IOException {
103 106
104 107 for (int k = 0; k < batch; k++) {
105 108 list1.add(list.get(k));
106   - set.addDoc(list.get(k));
  109 +
107 110 }
108 111
109 112 // System.out.println("At :" + i +" "+(randomizer-1000) +" " +
110 113 // randomizer);
111 114 }
112 115
  116 + Iterator<Integer> iter = list1.iterator();
  117 + while(iter.hasNext())
  118 + {
  119 + set.addDoc(iter.next());
  120 + }
  121 +
113 122 totalCompressionTime = System.nanoTime() - now;
114 123 // System.out.println("Total compression time :"+totalCompressionTime+":
115 124 // for"+((double)batch*length)/1000000+" M numbers");
@@ -127,10 +136,8 @@ public void testOBSDocIdSetIterateSanity() throws IOException {
127 136
128 137 totalDecompressionTime = System.nanoTime() - now;
129 138
130   -
131   -
132 139 }
133   -
  140 +
134 141 @Test
135 142 public void testOBSDocIdSetSkipSanity() {
136 143 double booster = ((_max*1.0)/(1000f*_length));
@@ -265,7 +272,6 @@ public void testOBSDocIdSetPerformance() throws IOException {
265 272
266 273 }
267 274
268   -
269 275 @Test
270 276 public void testIntArrayDocIdSetSkipSanity() {
271 277
@@ -408,12 +414,12 @@ public void testIntArrayDocIdSetIteratePerformance() {
408 414 }
409 415
410 416 @Test
411   - public void testP4DDocIdSetIteratePerformance() {
  417 + public void testPForDeltaDocIdSetIteratePerformance() throws IOException {
412 418 double booster = ((_max*1.0)/(1000f*_length));
413 419
414   - P4DDocIdSet set = new P4DDocIdSet(batch);
  420 + PForDeltaDocIdSet set = new PForDeltaDocIdSet(batch);
415 421 System.out.println("");
416   - System.out.println("Running P4DeltaDocSet Iterate Performance test");
  422 + System.out.println("Running PForDeltaDocIdSet Iterate Performance test");
417 423 System.out.println("----------------------------");
418 424 Random random = new Random();
419 425 // Minimum 5 bits
@@ -422,18 +428,26 @@ public void testP4DDocIdSetIteratePerformance() {
422 428 double totalDecompressionTime = 0;
423 429 List<Integer> list = new LinkedList<Integer>();
424 430 LinkedList<Integer> list2 = new LinkedList<Integer>();
  431 + Set<Integer> uniqueSet = new TreeSet<Integer>();
425 432 int val = 0 ;
426 433 for (int i = 1; i < _length + 1; i++) {
427 434
428 435 int bVal[] = new int[33];
429 436 for (int k = 0; k < batch; k++) {
430 437 val = randomizer + (int) (random.nextDouble() * 1000);
431   - list.add(val);
  438 + uniqueSet.add(val);
432 439
433 440 }
434 441
435 442 randomizer += 1000*booster;
436 443 }
  444 +
  445 + Iterator<Integer> iter = uniqueSet.iterator();
  446 + while(iter.hasNext())
  447 + {
  448 + list.add(iter.next());
  449 + }
  450 +
437 451
438 452 Collections.sort(list);
439 453 System.out.println("Largest Element in the List:"+list.get( list.size() -1 ));
@@ -460,13 +474,13 @@ public void testP4DDocIdSetIteratePerformance() {
460 474 System.out.println("Compression Ratio : "+ ((double)set.sizeInBytes())/(batch * _length * 4));
461 475 }
462 476
463   -
464 477 @Test
465   - public void testP4DDocIdSetNonBoundarySkipSanity() {
  478 + public void testPForDeltaDocIdSetNonBoundarySkipSanity() throws IOException {
466 479 double booster = ((_max*1.0)/(1000f*_length));
467   - P4DDocIdSet set = new P4DDocIdSet(batch);
  480 +
  481 + PForDeltaDocIdSet set = new PForDeltaDocIdSet(batch);
468 482 System.out.println("");
469   - System.out.println("Running P4DeltaDocSet Non-Boundary skip test");
  483 + System.out.println("Running PForDeltaDocIdSet Non-Boundary skip test");
470 484 System.out.println("----------------------------");
471 485 Random random = new Random();
472 486 int extra = 35;
@@ -479,10 +493,10 @@ public void testP4DDocIdSetNonBoundarySkipSanity() {
479 493 double totalDecompressionTime = 0;
480 494
481 495 List<Integer> list = new LinkedList<Integer>();
482   -
  496 + Set<Integer> uniqueSet = new TreeSet<Integer>();
483 497 for (int i = 1; i < _length + 1; i++) {
484 498 for (int k = 0; k < batch; k++) {
485   - list.add(randomizer + (int) (random.nextDouble() * 1000));
  499 + uniqueSet.add(randomizer + (int) (random.nextDouble() * 1000));
486 500 }
487 501
488 502 randomizer += 1000*booster;
@@ -490,9 +504,15 @@ public void testP4DDocIdSetNonBoundarySkipSanity() {
490 504
491 505 randomizer += 1000*booster;
492 506 for (int i = 0; i < extra; i++)
493   - list.add(randomizer + (int) (random.nextDouble() * 1000));
  507 + uniqueSet.add(randomizer + (int) (random.nextDouble() * 1000));
494 508 int counter = 0;
495 509
  510 + Iterator<Integer> iter = uniqueSet.iterator();
  511 + while(iter.hasNext())
  512 + {
  513 + list.add(iter.next());
  514 + }
  515 +
496 516 Collections.sort(list);
497 517 System.out.println("Largest Element in the List:"+list.get( list.size() -1 ));
498 518 // System.out.println(list);
@@ -533,13 +553,13 @@ public void testP4DDocIdSetNonBoundarySkipSanity() {
533 553 }
534 554
535 555 @Test
536   - public void testP4DDocIdSetNonBoundaryCompressionSanity() throws IOException {
  556 + public void testPForDeltaDocIdSetNonBoundaryCompressionSanity() throws IOException {
537 557 int extra = 34;
538 558 double booster = ((_max*1.0)/(1000f*_length));
539 559 int counter = 0;
540   - P4DDocIdSet set = new P4DDocIdSet(batch);
  560 + PForDeltaDocIdSet set = new PForDeltaDocIdSet(batch);
541 561 System.out.println("");
542   - System.out.println("Running P4DeltaDocSet Non-Boundary Compression Sanity test");
  562 + System.out.println("Running PForDeltaDocSet Non-Boundary Compression Sanity test");
543 563 System.out.println("----------------------------");
544 564 Random random = new Random();
545 565 // Minimum 5 bits
@@ -550,11 +570,12 @@ public void testP4DDocIdSetNonBoundaryCompressionSanity() throws IOException {
550 570 int randomizer = 0;
551 571 double totalDecompressionTime = 0;
552 572 List<Integer> list = new LinkedList<Integer>();
553   -
  573 + Set<Integer> uniqueSet = new TreeSet<Integer>();
  574 +
554 575 for (int i = 1; i < size + 1; i++) {
555 576 for (int k = 0; k < batch; k++) {
556 577 counter++;
557   - list.add(randomizer + (int) (random.nextDouble() * 1000));
  578 + uniqueSet.add(randomizer + (int) (random.nextDouble() * 1000));
558 579 }
559 580
560 581 randomizer += 1000*booster;
@@ -565,7 +586,13 @@ public void testP4DDocIdSetNonBoundaryCompressionSanity() throws IOException {
565 586 for (int i = 0; i < extra; i++)
566 587 {
567 588 counter++;
568   - list.add(randomizer + (int) (random.nextDouble() * 1000));
  589 + uniqueSet.add(randomizer + (int) (random.nextDouble() * 1000));
  590 + }
  591 +
  592 + Iterator<Integer> iter = uniqueSet.iterator();
  593 + while(iter.hasNext())
  594 + {
  595 + list.add(iter.next());
569 596 }
570 597
571 598 Collections.sort(list);
@@ -596,11 +623,11 @@ public void testP4DDocIdSetNonBoundaryCompressionSanity() throws IOException {
596 623 }
597 624
598 625 @Test
599   - public void testP4DDocIdSetSkipSanity() {
  626 + public void testPForDeltaDocIdSetSkipSanity() throws IOException {
600 627 double booster = ((_max*1.0)/(1000f*_length));
601   - P4DDocIdSet set = new P4DDocIdSet(batch);
  628 + PForDeltaDocIdSet set = new PForDeltaDocIdSet(batch);
602 629 System.out.println("");
603   - System.out.println("Running P4DeltaDocSet Skip Sanity test");
  630 + System.out.println("Running PForDeltaDocIdSet Skip Sanity test");
604 631 System.out.println("----------------------------");
605 632 Random random = new Random();
606 633
@@ -610,16 +637,22 @@ public void testP4DDocIdSetSkipSanity() {
610 637 double totalDecompressionTime = 0;
611 638 List<Integer> list = new LinkedList<Integer>();
612 639 LinkedList<Integer> list2 = new LinkedList<Integer>();
613   -
  640 + Set<Integer> uniqueSet = new TreeSet<Integer>();
614 641 for (int i = 1; i < _length + 1; i++) {
615 642
616 643 for (int k = 0; k < batch; k++) {
617   - list.add(randomizer + (int) (random.nextDouble() * 1000));
  644 + uniqueSet.add(randomizer + (int) (random.nextDouble() * 1000));
618 645 }
619 646
620 647 randomizer += 1000*booster;
621 648 }
622 649
  650 + Iterator<Integer> iter = uniqueSet.iterator();
  651 + while(iter.hasNext())
  652 + {
  653 + list.add(iter.next());
  654 + }
  655 +
623 656 Collections.sort(list);
624 657 System.out.println("Largest Element in the List:"+list.get( list.size() -1 ));
625 658 long time = System.nanoTime();
@@ -660,7 +693,6 @@ public void testP4DDocIdSetSkipSanity() {
660 693
661 694 }
662 695
663   -
664 696 @Test
665 697 public void testSkipPerformance() throws IOException
666 698 {
@@ -669,9 +701,9 @@ public void testSkipPerformance() throws IOException
669 701 System.out.println("----------------------------");
670 702
671 703 double booster = ((_max*1.0)/(1000f*_length));
672   - P4DDocIdSet set = new P4DDocIdSet(batch);
  704 + PForDeltaDocIdSet set = new PForDeltaDocIdSet(batch);
673 705 System.out.println("");
674   - System.out.println("Running P4DeltaDocSet Skip Sanity test");
  706 + System.out.println("Running PForDeltaDocIdSet Skip Sanity test");
675 707 System.out.println("----------------------------");
676 708 Random random = new Random();
677 709
@@ -681,30 +713,36 @@ public void testSkipPerformance() throws IOException
681 713 double totalDecompressionTime = 0;
682 714 List<Integer> list = new LinkedList<Integer>();
683 715 LinkedList<Integer> list2 = new LinkedList<Integer>();
684   -
  716 + Set<Integer> uniqueSet = new TreeSet<Integer>();
685 717 for (int i = 1; i < _length + 1; i++) {
686 718
687 719 for (int k = 0; k < batch; k++) {
688   - list.add(randomizer + (int) (random.nextDouble() * 1000));
  720 + uniqueSet.add(randomizer + (int) (random.nextDouble() * 1000));
689 721 }
690 722
691 723 randomizer += 1000*booster;
692 724 }
693 725
  726 + Iterator<Integer> iter = uniqueSet.iterator();
  727 + while(iter.hasNext())
  728 + {
  729 + list.add(iter.next());
  730 + }
  731 +
694 732 Collections.sort(list);
695 733 //System.out.println("Largest Element in the List:"+list.get( list.size() -1 ));
696 734
697 735
698 736
699 737 //P4D
700   - P4DDocIdSet p4d = new P4DDocIdSet();
  738 + PForDeltaDocIdSet pfd = new PForDeltaDocIdSet();
701 739 int counter=0;
702 740
703 741 for (Integer c : list) {
704 742 counter++;
705   - p4d.addDoc(c);
  743 + pfd.addDoc(c);
706 744 }
707   - StatefulDSIterator dcit = p4d.iterator();
  745 + StatefulDSIterator dcit = pfd.iterator();
708 746 _testSkipPerformance(list.get(list.size()-1),dcit);
709 747
710 748 // Int Array
@@ -713,7 +751,7 @@ public void testSkipPerformance() throws IOException
713 751
714 752 for (Integer c : list) {
715 753 counter++;
716   - p4d.addDoc(c);
  754 + pfd.addDoc(c);
717 755 }
718 756 dcit = iSet.iterator();
719 757 _testSkipPerformance(list.get(list.size()-1),dcit);
@@ -724,7 +762,7 @@ public void testSkipPerformance() throws IOException
724 762
725 763 for (Integer c : list) {
726 764 counter++;
727   - p4d.addDoc(c);
  765 + pfd.addDoc(c);
728 766 }
729 767 dcit = oSet.iterator();
730 768 _testSkipPerformance(list.get(list.size()-1),dcit);
@@ -748,6 +786,7 @@ private void _testSkipPerformance(int max, StatefulDSIterator dcit) throws IOExc
748 786
749 787 }
750 788
  789 +
751 790 @Test
752 791 @Ignore
753 792 public void testAndDocIdSetPerformance() throws Exception{
@@ -768,24 +807,25 @@ public void testAndDocIdSetPerformance() throws Exception{
768 807
769 808 for (int j = 0; j < all; j++) {
770 809 ArrayList<Integer> intSet = new ArrayList<Integer>();
771   - P4DDocIdSet docSet = new P4DDocIdSet(batch);
  810 + PForDeltaDocIdSet docSet = new PForDeltaDocIdSet(batch);
  811 + Set<Integer> uniqueSet = new TreeSet<Integer>();
  812 +
772 813 randomizer = 0;
773 814 for (int i = 1; i < size + 1; i++) {
774 815
775 816 for (int k = 0; k < batch; k++) {
776   - intSet.add(randomizer + (int) (random.nextDouble() * 1000));
  817 + uniqueSet.add(randomizer + (int) (random.nextDouble() * 1000));
777 818 }
778 819
779 820 randomizer += 1000*booster;
780   - Collections.sort(intSet);
781   -
  821 +
782 822 }
783   - for (Integer c : intSet) {
  823 + for (Integer c : uniqueSet) {
784 824 docSet.addDoc(c);
785 825 }
786 826 docSets.add(docSet);
787   -
788 827 }
  828 +
789 829 System.out.println("Constructed component DocSets");
790 830 org.apache.lucene.search.DocIdSetIterator oit = new AndDocIdSet(docSets).iterator();
791 831 long now = System.nanoTime();
@@ -825,7 +865,9 @@ public void testNotDocIdSet() throws IOException {
825 865 int randomizer = 0;
826 866 int b = 0;
827 867 ArrayList<Integer> intSet = new ArrayList<Integer>();
828   - P4DDocIdSet docSet = new P4DDocIdSet(batch);
  868 + PForDeltaDocIdSet docSet = new PForDeltaDocIdSet(batch);
  869 + Set<Integer> uniqueSet = new TreeSet<Integer>();
  870 +
829 871 randomizer = 0;
830 872
831 873 for (int i = 1; i < length + 1; i++) {
@@ -833,16 +875,16 @@ public void testNotDocIdSet() throws IOException {
833 875 int bVal[] = new int[33];
834 876 for (int k = 0; k < batch; k++) {
835 877 b = randomizer + (int) (random.nextDouble() * 1000);
836   - intSet.add(b);
  878 + uniqueSet.add(b);
837 879
838 880 }
839 881
840 882 randomizer += 1000;
841   - Collections.sort(intSet);
  883 + // Collections.sort(intSet);
842 884
843 885