Skip to content

Commit

Permalink
Implement Li's Algorithm L as LiLSampling (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
gstamatelat committed Jul 2, 2018
1 parent 47e8cca commit e10db8a
Show file tree
Hide file tree
Showing 8 changed files with 92 additions and 15 deletions.
12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ System.out.println(sample);
| `WatermanSampling` | Algorithm R by Waterman | `O(k)` | |
| `VitterXSampling` | Algorithm X by Vitter | `O(k)` | |
| `VitterZSampling` | Algorithm Z by Vitter | `O(k)` | |
| `LiLSampling` | Algorithm L by Li | `O(k)` | |
| `EfraimidisSampling` | Algorithm A-Res by Efraimidis | `O(k)` | ✔ |
| `ChaoSampling` | Algorithm by Chao | `O(k)` | ✔ |

Expand All @@ -135,14 +136,21 @@ Signature: `VitterZSampling` implements `RandomSampling`
#### References
- [Vitter, Jeffrey S. "Random sampling with a reservoir." ACM Transactions on Mathematical Software (TOMS) 11.1 (1985): 37-57.](https://doi.org/10.1145/3147.3165)

### 4 Algorithm A-Res by Efraimidis
### 4 Algorithm L by Li

Signature: `LiLSampling` implements `RandomSampling`

#### References
- [Li, Kim-Hung. "Reservoir-sampling algorithms of time complexity O (n (1+ log (N/n)))." ACM Transactions on Mathematical Software (TOMS) 20.4 (1994): 481-493.](https://doi.org/10.1145/198429.198435)

### 5 Algorithm A-Res by Efraimidis

Signature: `EfraimidisSampling` implements `WeightedRandomSampling`

#### References
- [Efraimidis, Pavlos S., and Paul G. Spirakis. "Weighted random sampling with a reservoir." Information Processing Letters 97.5 (2006): 181-185.](https://doi.org/10.1016/j.ipl.2005.11.003)

### 5 Algorithm by Chao
### 6 Algorithm by Chao

Signature: `ChaoSampling` implements `WeightedRandomSampling`

Expand Down
63 changes: 63 additions & 0 deletions src/main/java/gr/james/sampling/LiLSampling.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package gr.james.sampling;

import java.util.Random;

/**
* Implementation of <i>Algorithm L</i> by Li in <b>Reservoir-sampling algorithms of time complexity
* O(n(1 + log(N/n)))</b>.
* <p>
* Unlike {@link WatermanSampling}, the {@link VitterXSampling}, {@link VitterZSampling} and {@code LiLSampling}
* algorithms decide how many items to skip, rather than deciding whether or not to skip an item each time it is feeded.
* This property allows these algorithms to perform better by efficiently calculating the number of items that need to
* be skipped, while making fewer calls to the RNG.
* <p>
* This implementation throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} items are feeded.
* <p>
* The space complexity of this class is {@code O(k)}, where {@code k} is the sample size.
*
* @param <T> the item type
* @author Giorgos Stamatelatos
* @see <a href="https://doi.org/10.1145/198429.198435">Reservoir-sampling algorithms of time complexity
* O(n(1 + log(N/n)))</a>
*/
public class LiLSampling<T> extends AbstractRandomSampling<T> {
private double W;

/**
* Construct a new instance of {@link LiLSampling} using the specified sample size and RNG. The implementation
* assumes that {@code random} conforms to the contract of {@link Random} and will perform no checks to ensure that.
* If this contract is violated, the behavior is undefined.
*
* @param sampleSize the sample size
* @param random the RNG to use
* @throws NullPointerException if {@code random} is {@code null}
* @throws IllegalArgumentException if {@code sampleSize} is less than 1
*/
public LiLSampling(int sampleSize, Random random) {
super(sampleSize, random);
}

/**
* Get a {@link RandomSamplingCollector} from this class.
*
* @param sampleSize the sample size
* @param random the RNG to use
* @param <E> the type of elements
* @return a {@link RandomSamplingCollector} from this class
*/
public static <E> RandomSamplingCollector<E> collector(int sampleSize, Random random) {
return new RandomSamplingCollector<>(() -> new LiLSampling<>(sampleSize, random));
}

@Override
void init(int sampleSize, Random random) {
W = Math.exp(Math.log(random.nextDouble()) / sampleSize);
}

@Override
long skipLength(long streamSize, int sampleSize, Random random) {
final long skip = (long) (Math.log(random.nextDouble()) / Math.log(1 - W));
W = W * Math.exp(Math.log(random.nextDouble()) / sampleSize);
return skip;
}
}
10 changes: 5 additions & 5 deletions src/main/java/gr/james/sampling/VitterXSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
/**
* Implementation of <i>Algorithm X</i> by Vitter in <b>Random Sampling with a Reservoir</b>.
* <p>
* Unlike {@link WatermanSampling}, the {@code VitterXSampling} and {@link VitterZSampling} algorithms decide how many
* items to skip, rather than deciding whether or not to skip an item each time it is feeded. This property allows these
* algorithms to perform better by efficiently calculating the number of items that need to be skipped, while making
* fewer calls to the RNG.
* Unlike {@link WatermanSampling}, the {@code VitterXSampling}, {@link VitterZSampling} and {@link LiLSampling}
* algorithms decide how many items to skip, rather than deciding whether or not to skip an item each time it is feeded.
* This property allows these algorithms to perform better by efficiently calculating the number of items that need to
* be skipped, while making fewer calls to the RNG.
* <p>
* This implementations throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} items are feeded.
* This implementation throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} items are feeded.
* <p>
* The space complexity of this class is {@code O(k)}, where {@code k} is the sample size.
*
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/gr/james/sampling/VitterZSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
/**
* Implementation of <i>Algorithm Z</i> by Vitter in <b>Random Sampling with a Reservoir</b>.
* <p>
* Unlike {@link WatermanSampling}, the {@link VitterXSampling} and {@code VitterZSampling} algorithms decide how many
* items to skip, rather than deciding whether or not to skip an item each time it is feeded. This property allows these
* algorithms to perform better by efficiently calculating the number of items that need to be skipped, while making
* fewer calls to the RNG.
* Unlike {@link WatermanSampling}, the {@link VitterXSampling}, {@code VitterZSampling} and {@link LiLSampling}
* algorithms decide how many items to skip, rather than deciding whether or not to skip an item each time it is feeded.
* This property allows these algorithms to perform better by efficiently calculating the number of items that need to
* be skipped, while making fewer calls to the RNG.
* <p>
* This implementations throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} items are feeded.
* This implementation throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} items are feeded.
* <p>
* The space complexity of this class is {@code O(k)}, where {@code k} is the sample size.
*
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/gr/james/sampling/WatermanSampling.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
* <p>
* The implementation is the simplest unweighted sampling algorithm that each time a new element is feeded, it
* determines whether is should be accepted in the sample by producing a random number. The more efficient
* {@link VitterXSampling} and {@link VitterZSampling} decide how many items to skip, rather than deciding whether or
* not to skip an item each time it is feeded.
* {@link VitterXSampling}, {@link VitterZSampling} and {@link LiLSampling} decide how many items to skip, rather than
* deciding whether or not to skip an item each time it is feeded.
* <p>
* This implementations throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} items are feeded.
* This implementation throws {@link StreamOverflowException} if more than {@link Long#MAX_VALUE} items are feeded.
* <p>
* The space complexity of this class is {@code O(k)}, where {@code k} is the sample size.
*
Expand Down
1 change: 1 addition & 0 deletions src/main/java/gr/james/sampling/package-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* <li>{@link gr.james.sampling.WatermanSampling}</li>
* <li>{@link gr.james.sampling.VitterXSampling}</li>
* <li>{@link gr.james.sampling.VitterZSampling}</li>
* <li>{@link gr.james.sampling.LiLSampling}</li>
* </ul>
* <h3><code>WeightedRandomSampling</code> implementations</h3>
* <ul>
Expand Down
2 changes: 2 additions & 0 deletions src/test/java/gr/james/sampling/Benchmark.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ public class Benchmark {
private static final WatermanSampling<Object> waterman = new WatermanSampling<>(10, random);
private static final VitterXSampling<Object> vitterx = new VitterXSampling<>(10, random);
private static final VitterZSampling<Object> vitterz = new VitterZSampling<>(10, random);
private static final LiLSampling<Object> lil = new LiLSampling<>(10, random);
private static final EfraimidisSampling<Object> efraimidis = new EfraimidisSampling<>(10, random);
private static final ChaoSampling<Object> chao = new ChaoSampling<>(10, random);

public static void main(String[] args) {
System.out.printf("%10s %5d ms%n", "Waterman", performance(waterman) / 1000000);
System.out.printf("%10s %5d ms%n", "VitterX", performance(vitterx) / 1000000);
System.out.printf("%10s %5d ms%n", "VitterZ", performance(vitterz) / 1000000);
System.out.printf("%10s %5d ms%n", "LiL", performance(lil) / 1000000);
System.out.printf("%10s %5d ms%n", "Efraimidis", performance(efraimidis) / 1000000);
System.out.printf("%10s %5d ms%n", "Chao", performance(chao) / 1000000);
}
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/gr/james/sampling/RandomSamplingTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public static Collection<Supplier<RandomSampling<Integer>>> implementations() {
implementations.add(() -> new WatermanSampling<>(SAMPLE, RANDOM));
implementations.add(() -> new VitterXSampling<>(SAMPLE, RANDOM));
implementations.add(() -> new VitterZSampling<>(SAMPLE, RANDOM));
implementations.add(() -> new LiLSampling<>(SAMPLE, RANDOM));
implementations.add(() -> new EfraimidisSampling<>(SAMPLE, RANDOM));
implementations.add(() -> new ChaoSampling<>(SAMPLE, RANDOM));
return implementations;
Expand Down Expand Up @@ -85,6 +86,8 @@ public void stream() {
collector = EfraimidisSampling.collector(SAMPLE, RANDOM);
} else if (alg instanceof ChaoSampling) {
collector = ChaoSampling.collector(SAMPLE, RANDOM);
} else if (alg instanceof LiLSampling) {
collector = LiLSampling.collector(SAMPLE, RANDOM);
} else {
throw new AssertionError("RandomSamplingTest.stream");
}
Expand Down

0 comments on commit e10db8a

Please sign in to comment.