Skip to content

Commit

Permalink
Fix typos and some warnings in CrawlersManager class
Browse files Browse the repository at this point in the history
  • Loading branch information
aecio committed Feb 24, 2023
1 parent 8ed162d commit 8c9077b
Showing 1 changed file with 15 additions and 24 deletions.
39 changes: 15 additions & 24 deletions ache/src/main/java/achecrawler/crawler/CrawlersManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,12 @@

public class CrawlersManager {

private static Logger logger = LoggerFactory.getLogger(CrawlersManager.class);
private static final Logger logger = LoggerFactory.getLogger(CrawlersManager.class);

private Configuration baseConfig;
private String baseDataPath;
private final Configuration baseConfig;
private final String baseDataPath;

private Map<String, CrawlContext> crawlers = new HashMap<>();

public CrawlersManager(String dataPath) {
this(dataPath, new Configuration());
}
private final Map<String, CrawlContext> crawlers = new HashMap<>();

public CrawlersManager(String baseDataPath, Configuration baseConfig) {
this.baseConfig = baseConfig;
Expand Down Expand Up @@ -112,22 +108,19 @@ private Configuration createConfigForCrawlType(Configuration baseConfig, Path co
CrawlType crawlType, String esIndexName, String esTypeName) throws IOException {

URL configLocation = getConfigForCrawlType(crawlType);
InputStream configStream = configLocation.openStream();
try {
try (InputStream configStream = configLocation.openStream()) {
Configuration crawlConfig = baseConfig.copyUpdating(configStream);
if (esIndexName != null && !esIndexName.isEmpty()) {
crawlConfig.getTargetStorageConfig().getElasticSearchConfig()
.setIndexName(esIndexName);
.setIndexName(esIndexName);
}
if (esTypeName != null && !esTypeName.isEmpty()) {
crawlConfig.getTargetStorageConfig().getElasticSearchConfig()
.setTypeName(esTypeName);
.setTypeName(esTypeName);
}
Files.createDirectories(configPath);
crawlConfig.writeToFile(configPath.resolve("ache.yml"));
return crawlConfig;
} finally {
configStream.close();
}
}

Expand All @@ -141,13 +134,13 @@ private URL getConfigForCrawlType(CrawlType crawlType) {
fileName = "config/config_focused_crawl/ache.yml";
break;
default:
throw new UnsupportedOperationException("Unsuported crawl type: " + crawlType);
throw new UnsupportedOperationException("Unsupported crawl type: " + crawlType);
}
return getClass().getClassLoader().getResource(fileName);
}

private String getSeedForCrawlType(CrawlType crawlType, List<String> seeds, Path configPath,
String storedModelPath) throws FileNotFoundException, IOException {
String storedModelPath) throws IOException {
String seedPath;
switch (crawlType) {
case DeepCrawl:
Expand Down Expand Up @@ -189,17 +182,17 @@ private String storeModelFile(byte[] model, Path modelPath) throws IOException {
}

private void unzipFile(Path file, Path outputDir) throws IOException {
ZipFile zipFile = new ZipFile(file.toFile());
try {
try (ZipFile zipFile = new ZipFile(file.toFile())) {
Enumeration<? extends ZipEntry> entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
if (entry.getName().startsWith("training_data")) {
logger.info("Skiping training_data folder/file.");
logger.info("Skipping training_data folder/file.");
continue;
}
File entryDestination = new File(outputDir.toFile(), entry.getName());
if (!entryDestination.toPath().normalize().startsWith(outputDir.toFile().toPath().normalize())) {
if (!entryDestination.toPath().normalize()
.startsWith(outputDir.toFile().toPath().normalize())) {
// Prevent from zip slip vulnerability.
// See:https://github.com/VIDA-NYU/ache/pull/307
throw new IOException("Bad zip entry");
Expand All @@ -215,8 +208,6 @@ private void unzipFile(Path file, Path outputDir) throws IOException {
out.close();
}
}
} finally {
zipFile.close();
}
}

Expand All @@ -232,7 +223,7 @@ private String findSeedFileInModelPackage(String model) throws IOException {
return null;
}

public class CrawlContext {
public static class CrawlContext {

public String crawlerId;

Expand All @@ -259,7 +250,7 @@ public String getCrawlerState() {
}

public boolean isCrawlerRunning() {
return crawler == null ? false : crawler.isRunning();
return crawler != null && crawler.isRunning();
}

public boolean isSearchEnabled() {
Expand Down

0 comments on commit 8c9077b

Please sign in to comment.