Skip to content

Commit

Permalink
weighted mid walk generation added and minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Portisch authored and Jan Portisch committed Apr 14, 2020
1 parent 48d611c commit 0e21117
Show file tree
Hide file tree
Showing 12 changed files with 386 additions and 20 deletions.
1 change: 1 addition & 0 deletions src/main/java/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ public static void main(String[] args) {

String numberOfWalksText = getValue("-numberOfWalks", args);
numberOfWalksText = (numberOfWalksText == null) ? getValue("-numOfWalks", args) : numberOfWalksText;
numberOfWalksText = (numberOfWalksText == null) ? getValue("-numOfWalks", args) : numberOfWalksText;
if (numberOfWalksText != null) {
try {
numberOfWalks = Integer.parseInt(numberOfWalksText);
Expand Down
6 changes: 0 additions & 6 deletions src/main/java/training/Gensim.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,6 @@ public class Gensim {
*/
private static final String DEFAULT_RESOURCES_DIRECTORY = "./python-server/";

/**
* Objectmapper from jackson to generate JSON.
*/
private static final ObjectMapper JSON_MAPPER = new ObjectMapper();


/**
* Constructor
*/
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/walkGenerators/base/DummyWalkGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* A non-functional walk generator.
*/
public class DummyWalkGenerator extends WalkGenerator {

@Override
public void generateWalks(WalkGenerationMode generationMode, int numberOfThreads, int numberOfWalks, int depth, String walkFile) {

Expand Down Expand Up @@ -44,6 +45,16 @@ public void generateRandomMidWalks(int numberOfThreads, int numberOfWalksPerEnti

}

@Override
public void generateWeightedMidWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth) {

}

@Override
public void generateWeightedMidWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth, String filePathOfFileToBeWritten) {

}

@Override
public void generateRandomMidWalksDuplicateFree(int numberOfThreads, int numberOfWalksPerEntity, int depth) {

Expand Down
17 changes: 17 additions & 0 deletions src/main/java/walkGenerators/base/IWalkGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,23 @@ public interface IWalkGenerator {
*/
void generateRandomMidWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth, String filePathOfFileToBeWritten);

/**
* Generates weighted mid walks, duplicate walks are possible.
* @param numberOfThreads The number of threads to be used.
* @param numberOfWalksPerEntity The maximal number of walks that shall be performed per entity.
* @param depth The depth of each walk where the depth is the number of node-hops, i.e. depth 1 leads to a sentence with one hop and three elements: S → P → O.
*/
void generateWeightedMidWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth);

/**
* Generates weighted mid walks, duplicate walks are possible.
* @param numberOfThreads The number of threads to be used.
* @param numberOfWalksPerEntity The maximal number of walks that shall be performed per entity.
* @param depth The depth of each walk where the depth is the number of node-hops, i.e. depth 1 leads to a sentence with one hop and three elements: S → P → O.
* @param filePathOfFileToBeWritten The path to the file that shall be written.
*/
void generateWeightedMidWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth, String filePathOfFileToBeWritten);

/**
* Generates mid walks without duplicates.
* A mid walk is a random walk that involves a given entity but may not start or end with the entity in question.
Expand Down
100 changes: 100 additions & 0 deletions src/main/java/walkGenerators/base/MemoryParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,106 @@ public abstract class MemoryParser implements IParser {
boolean isUnifiyAnonymousNodes = false;


/**
* Weighted mid walk: If there are more options to go forward, it is more likely to go forward.
* @param entity The entity for which walks shall be generated.
* @param depth The depth of the walk. Depth is defined as hop to the next node. A walk of depth 1 will have three walk components.
* @param numberOfWalks Number of walks to be performed per entity.
* @return List of walks.
*/
public List<String> generateWeightedMidWalksForEntity(String entity, int depth, int numberOfWalks) {
return convertToStringWalks(generateWeightedMidWalkForEntityAsArray(entity, depth, numberOfWalks));
}

/**
* Walks of length 1, i.e., walks that contain only one node, are ignored.
* @param entity The entity for which walks shall be generated.
* @param depth The depth of each walk (where the depth is the number of hops).
* @param numberOfWalks The number of walks to be performed.
* @return A data structure describing the walks.
*/
public List<List<String>> generateWeightedMidWalkForEntityAsArray(String entity, int depth, int numberOfWalks) {
List<List<String>> result = new ArrayList<>();
for (int i = 0; i < numberOfWalks; i++) {
List<String> walk = generateWeightedMidWalkForEntity(entity, depth);
if(walk.size() > 1) {
result.add(walk);
}
}
return result;
}

/**
* Generates a single walk for the given entity with the given depth.
*
* @param entity The entity for which a walk shall be generated.
* @param depth The depth of the walk. Depth is defined as hop to the next node. A walk of depth 1 will have three walk components.
* @return One walk as list where each element is a walk component.
*/
public List<String> generateWeightedMidWalkForEntity(String entity, int depth) {

LinkedList<String> result = new LinkedList<>();

String nextElementPredecessor = entity;
String nextElementSuccessor = entity;

// initialize result
result.add(entity);

// variable to store the number of iterations performed so far
int currentDepth = 0;

while (currentDepth < depth) {
currentDepth++;

// randomly decide whether to use predecessors or successors
double randomPickZeroOne = ThreadLocalRandom.current().nextDouble(0.0, 1.00000001);

// predecessor candidates
ArrayList<Triple> candidatesPredecessor = data.getTriplesInvolvingObject(nextElementPredecessor);

// successor candidates
ArrayList<Triple> candidatesSuccessor = data.getTriplesInvolvingSubject(nextElementSuccessor);

double numberOfPredecessors = 0.0;
double numberOfSuccessors = 0.0;

if(candidatesPredecessor != null) numberOfPredecessors = candidatesPredecessor.size();
if(candidatesSuccessor != null) numberOfSuccessors = candidatesSuccessor.size();

// if there are no successors and predecessors: return current walk
if(numberOfPredecessors == 0 && numberOfSuccessors == 0) return result;

// determine cut-off point
double cutOffPoint = numberOfPredecessors / (numberOfPredecessors + numberOfSuccessors);

if (randomPickZeroOne <= cutOffPoint) {
// predecessor
if (candidatesPredecessor != null && candidatesPredecessor.size() > 0) {
Triple drawnTriple = randomDrawFromList(candidatesPredecessor);

// add walks from the front (walk started before entity)
result.addFirst(drawnTriple.predicate);
result.addFirst(drawnTriple.subject);
nextElementPredecessor = drawnTriple.subject;
}

} else {
// successor
if (candidatesSuccessor != null && candidatesSuccessor.size() > 0) {
Triple tripleToAdd = randomDrawFromList(candidatesSuccessor);

// add next walk iteration
result.addLast(tripleToAdd.predicate);
result.addLast(tripleToAdd.object);
nextElementSuccessor = tripleToAdd.object;
}
}
}
return result;
}


/**
* Generates walks that are ready to be processed further (already concatenated, space-separated).
*
Expand Down
33 changes: 28 additions & 5 deletions src/main/java/walkGenerators/base/WalkGenerationMode.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
package walkGenerators.base;

/**
* There must be a runnable for each walk generation option.
* The available walk generation modes.
* <p>
* Developer note:
* <ul>
* <li>
* There must be a runnable for each walk generation option.
* </li>
* <li>
* The must be a resolution in evey implementation of {@link IWalkGenerator#generateWalks(WalkGenerationMode, int, int, int, String)} that shall support this walk mode.
* This affects, for example {@link WalkGeneratorDefault#generateWalks(WalkGenerationMode, int, int, int, String)} or {@link walkGenerators.light.WalkGeneratorLight#generateWalks(WalkGenerationMode, int, int, int, String)}.
* </li>
* </ul>
*/
public enum WalkGenerationMode {

Expand All @@ -18,6 +29,14 @@ public enum WalkGenerationMode {
*/
MID_WALKS_DUPLICATE_FREE,

/**
* Weighted mid-walk walk generation: Given an entity, it is randomly decided whether to go backwards or forwards randomly
* where the chances are determined by the number of options to go backwards and forwards:
* If there are more options to go backwards than forwards, the likelihood of going backwards is larger. The generated
* walks are free of duplicates. Due to the implementation this can lead to less generated walks than originally specified.
*/
MID_WALKS_WEIGHTED,

/**
* Plain random walks generated in a forward-fashion (going backwards is not allowed).
*/
Expand All @@ -32,16 +51,19 @@ public enum WalkGenerationMode {

/**
* String representation of mode.
*
* @param modeString The mode as String.
* @return If possible, walk generation mode. Else null.
*/
public static WalkGenerationMode getModeFromString(String modeString){
public static WalkGenerationMode getModeFromString(String modeString) {
modeString = modeString.toLowerCase().trim();
switch (modeString){
switch (modeString) {
case "mid_walks":
return MID_WALKS;
case "mid_walks_duplicate_free":
return MID_WALKS_DUPLICATE_FREE;
case "mid_walks_weighted":
return MID_WALKS_WEIGHTED;
case "random_walks":
return RANDOM_WALKS;
case "random_walks_duplicate_free":
Expand All @@ -53,11 +75,12 @@ public static WalkGenerationMode getModeFromString(String modeString){

/**
* Get a string representation of all available modes.
*
* @return String representation of all modes.
*/
public static String getOptions(){
public static String getOptions() {
String result = "";
for(WalkGenerationMode mode : WalkGenerationMode.values()){
for (WalkGenerationMode mode : WalkGenerationMode.values()) {
result += mode.toString() + " | ";
}
result = result.substring(0, result.length() - 3);
Expand Down
45 changes: 41 additions & 4 deletions src/main/java/walkGenerators/base/WalkGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
import org.apache.jena.riot.RiotException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import walkGenerators.runnables.DuplicateFreeMidWalkEntityProcessingRunnable;
import walkGenerators.runnables.DuplicateFreeWalkEntityProcessingRunnable;
import walkGenerators.runnables.MidWalkEntityProcessingRunnable;
import walkGenerators.runnables.RandomWalkEntityProcessingRunnable;
import walkGenerators.runnables.*;

import java.io.*;
import java.net.MalformedURLException;
Expand Down Expand Up @@ -142,6 +139,46 @@ public void generateRandomMidWalksForEntitiesDuplicateFree(Set<String> entities,
}


/**
* Generate walks for the entities.
*
* @param entities The entities for which walks shall be generated.
* @param numberOfThreads The number of threads to be used.
* @param numberOfWalks The number of walks to be generated per thread.
* @param walkLength The maximal length of each walk (a walk may be shorter if it cannot be continued anymore). Aka depth.
*/
public void generateWeightedMidWalksForEntities(Set<String> entities, int numberOfThreads, int numberOfWalks, int walkLength) {
File outputFile = new File(filePath);
outputFile.getParentFile().mkdirs();

// initialize the writer
try {
this.writer = new OutputStreamWriter(new GZIPOutputStream(
new FileOutputStream(outputFile, false)), StandardCharsets.UTF_8);
} catch (Exception e1) {
LOGGER.error("Could not initialize writer. Aborting process.", e1);
return;
}

ThreadPoolExecutor pool = new ThreadPoolExecutor(numberOfThreads, numberOfThreads,
0, TimeUnit.SECONDS,
new java.util.concurrent.ArrayBlockingQueue<>(entities.size()));

for (String entity : entities) {
WeightedMidWalkEntityProcessingRunnable th = new WeightedMidWalkEntityProcessingRunnable(this, entity, numberOfWalks, walkLength);
pool.execute(th);
}
pool.shutdown();
try {
pool.awaitTermination(10, TimeUnit.DAYS);
} catch (InterruptedException e) {
LOGGER.error("Interrupted Exception");
e.printStackTrace();
}
this.close();
}


/**
* Generate walks for the entities.
*
Expand Down
26 changes: 24 additions & 2 deletions src/main/java/walkGenerators/base/WalkGeneratorDefault.java
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ public void generateWalks(WalkGenerationMode generationMode, int numberOfThreads
} else if (generationMode == WalkGenerationMode.RANDOM_WALKS_DUPLICATE_FREE) {
System.out.println("generate random walks duplicate free...");
this.generateRandomWalksDuplicateFree(numberOfThreads, numberOfWalks, depth, walkFile);
} else if (generationMode == WalkGenerationMode.MID_WALKS_WEIGHTED) {
System.out.println("generate weighted mid walks...");
this.generateWeightedMidWalks(numberOfThreads, numberOfWalks, depth, walkFile);
} else {
System.out.println("ERROR. Cannot identify the walkGenenerationMode chosen. Aborting program.");
}
Expand All @@ -153,7 +156,7 @@ public void generateRandomWalks(int numberOfThreads, int numberOfWalksPerEntity,
@Override
public void generateRandomWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth, String filePathOfFileToBeWritten) {
this.filePath = filePathOfFileToBeWritten;
generateWalksForEntities(entitySelector.getEntities(), numberOfThreads, numberOfWalksPerEntity, depth);
generateRandomWalksForEntities(entitySelector.getEntities(), numberOfThreads, numberOfWalksPerEntity, depth);
}

@Override
Expand Down Expand Up @@ -186,6 +189,25 @@ public void generateRandomMidWalks(int numberOfThreads, int numberOfWalksPerEnti
generateRandomMidWalksForEntities(entitySelector.getEntities(), numberOfThreads, numberOfWalksPerEntity, depth);
}

@Override
public void generateWeightedMidWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth) {
generateWeightedMidWalks(numberOfThreads, numberOfWalksPerEntity, depth, DEFAULT_WALK_FILE_TO_BE_WRITTEN);
}

@Override
public void generateWeightedMidWalks(int numberOfThreads, int numberOfWalksPerEntity, int depth, String filePathOfFileToBeWritten) {
if (this.parser == null) {
LOGGER.error("Parser not initialized. Aborting program");
return;
}
if (!parserIsOk) {
LOGGER.error("Will not execute walk generation due to parser initialization error.");
return;
}
this.filePath = filePathOfFileToBeWritten;
generateWeightedMidWalksForEntities(entitySelector.getEntities(), numberOfThreads, numberOfWalksPerEntity, depth);
}

@Override
public void generateRandomMidWalksDuplicateFree(int numberOfThreads, int numberOfWalksPerEntity, int depth) {
generateRandomMidWalksDuplicateFree(numberOfThreads, numberOfWalksPerEntity, depth, DEFAULT_WALK_FILE_TO_BE_WRITTEN);
Expand Down Expand Up @@ -214,7 +236,7 @@ public void generateRandomMidWalksDuplicateFree(int numberOfThreads, int numberO
* @param numberOfWalks The number of walks to be generated per entity.
* @param walkLength The length of each walk.
*/
public void generateWalksForEntities(Set<String> entities, int numberOfThreads, int numberOfWalks, int walkLength) {
public void generateRandomWalksForEntities(Set<String> entities, int numberOfThreads, int numberOfWalks, int walkLength) {
File outputFile = new File(filePath);
outputFile.getParentFile().mkdirs();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ public void run() {
walkGenerator.writeToFile(((HdtParser) walkGenerator.parser).generateMidWalksForEntityDuplicateFree(walkGenerator.shortenUri(entity), this.numberOfWalks, this.depth));
} else if (walkGenerator.parser.getClass() == NtMemoryParser.class) {
// yes, the depth and # of walks parameters are this way
walkGenerator.writeToFile(((NtMemoryParser) walkGenerator.parser).generateMidWalksForEntityDuplicateFree(walkGenerator.shortenUri(entity),this.depth, this.numberOfWalks));
walkGenerator.writeToFile(((NtMemoryParser) walkGenerator.parser).generateMidWalksForEntityDuplicateFree(walkGenerator.shortenUri(entity), this.numberOfWalks, depth));
} else if (walkGenerator.parser.getClass() == NxMemoryParser.class) {
walkGenerator.writeToFile(((NxMemoryParser) walkGenerator.parser).generateMidWalksForEntityDuplicateFree(walkGenerator.shortenUri(entity), this.depth, this.numberOfWalks));
walkGenerator.writeToFile(((NxMemoryParser) walkGenerator.parser).generateMidWalksForEntityDuplicateFree(walkGenerator.shortenUri(entity), this.numberOfWalks, depth));
} else LOGGER.error("NOT YET IMPLEMENTED FOR THE CURRENT PARSER!");
}
}
Loading

0 comments on commit 0e21117

Please sign in to comment.