forked from socialsensor/graphdb-benchmarks
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Dataset.java
58 lines (49 loc) · 1.69 KB
/
Dataset.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
package eu.socialsensor.dataset;
import java.io.File;
import java.util.*;
import org.apache.commons.math3.util.MathArrays;
import eu.socialsensor.utils.Utils;
/**
*
* @author Alexander Patrikalakis
*
*/
public class Dataset implements Iterable<List<String>>
{
private final List<List<String>> data;
private final List<Integer> generatedNodes;
public Dataset(File datasetFile, Random random, int randomNodeSetSize)
{
data = Utils.readTabulatedLines(datasetFile, 4 /* numberOfLinesToSkip */);
final Set<Integer> nodes = new HashSet<>();
//read node strings and convert to Integers and add to HashSet
data.stream().forEach(line -> { //TODO evaluate parallelStream
line.stream().forEach(nodeId -> {
nodes.add(Integer.valueOf(nodeId.trim()));
});
});
if(randomNodeSetSize > nodes.size()) {
throw new IllegalArgumentException("cant select more random nodes than there are unique nodes in dataset");
}
//shuffle
final List<Integer> nodeList = new ArrayList<>(nodes);
Collections.shuffle(nodeList, random);
//choose randomNodeSetSize of them
generatedNodes = new ArrayList<Integer>(randomNodeSetSize);
Iterator<Integer> it = nodeList.iterator();
while(generatedNodes.size() < randomNodeSetSize) {
generatedNodes.add(it.next());
}
}
@Override
public Iterator<List<String>> iterator()
{
return data.iterator();
}
public List<List<String>> getList() {
return new ArrayList<List<String>>(data);
}
public List<Integer> getRandomNodes() {
return generatedNodes;
}
}