Skip to content

Commit

Permalink
Improve git indexer to limit package prefixes to domain suffixes
Browse files Browse the repository at this point in the history
Also reduce logging.
  • Loading branch information
merks committed Jul 18, 2023
1 parent 8508b55 commit 0d5e7eb
Showing 1 changed file with 32 additions and 20 deletions.
Expand Up @@ -43,6 +43,7 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
Expand All @@ -67,7 +68,21 @@ public class GitIndexApplication implements IApplication

private static final Pattern GITLAB_ECLIPSE_REPO_PATTERN = Pattern.compile("https://gitlab.eclipse.org/(([^/]+)/(.*))");

private static final Pattern PACKAGE_PATTERN = Pattern.compile("package\\s+([^;]+)\\s*;");
private static final Pattern PACKAGE_PATTERN = Pattern.compile("package\\s+(([^.]+)[^;]+)\\s*;");

private static final Pattern LOG_PATTERN = Pattern.compile(",| 0%| 25%| 50%| 75%| 100%");

private static final Set<String> PACKAGE_PREFIXES = new LinkedHashSet<String>();

static
{
for (String code : Locale.getISOCountries())
{
PACKAGE_PREFIXES.add(code.toLowerCase());
}

PACKAGE_PREFIXES.addAll(Set.of("com", "org", "net", "edu", "gov", "mil", "io", "java", "javax", "jakarta"));
}

private static Set<String> TEST_REPOSITORIES = new TreeSet<>(Set.of( //
"https://git.eclipse.org/r/jgit/jgit", //
Expand Down Expand Up @@ -758,14 +773,7 @@ public boolean isCanceled()
@Override
public void log(String line)
{
if (line.contains("%"))
{
if (line.contains("0%"))
{
System.out.println(line);
}
}
else
if (LOG_PATTERN.matcher(line).find())
{
System.out.println(line);
}
Expand Down Expand Up @@ -837,19 +845,23 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attributes) thro
var matcher = PACKAGE_PATTERN.matcher(line);
if (matcher.find())
{
var relativePath = cloneFolder.relativize(file.getParent()).toString().replace('\\', '/');
var packageName = matcher.group(1);
var className = fileName.substring(0, fileName.length() - ".java".length());
var packagePath = "/" + packageName.replace('.', '/');
if (relativePath.endsWith(packagePath))
var packagePrefix = matcher.group(2);
if (PACKAGE_PREFIXES.contains(packagePrefix))
{
var relativeBasePath = relativePath.substring(0, relativePath.length() - packagePath.length());
javaCount.incrementAndGet();
repositoryIndex.computeIfAbsent(base, key -> new TreeMap<>()).computeIfAbsent(relativeBasePath, key -> new TreeMap<>())
.computeIfAbsent(packageName, key -> new TreeSet<>()).add(className);
var relativePath = cloneFolder.relativize(file.getParent()).toString().replace('\\', '/');
var packageName = matcher.group(1);
var className = fileName.substring(0, fileName.length() - ".java".length());
var packagePath = "/" + packageName.replace('.', '/');
if (relativePath.endsWith(packagePath))
{
var relativeBasePath = relativePath.substring(0, relativePath.length() - packagePath.length());
javaCount.incrementAndGet();
repositoryIndex.computeIfAbsent(base, key -> new TreeMap<>()).computeIfAbsent(relativeBasePath, key -> new TreeMap<>())
.computeIfAbsent(packageName, key -> new TreeSet<>()).add(className);
}

return FileVisitResult.CONTINUE;
}

return FileVisitResult.CONTINUE;
}
}
}
Expand Down

0 comments on commit 0d5e7eb

Please sign in to comment.