Skip to content

Commit

Permalink
towards support eml to index table; related to #942 (comment)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorrit Poelen committed Nov 28, 2023
1 parent e9fb995 commit d2fd609
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 17 deletions.
Expand Up @@ -5,6 +5,7 @@
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.eol.globi.service.ResourceService;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
Expand Down Expand Up @@ -57,11 +58,17 @@ public static JsonNode datasetWithEML(ResourceService origDataset, URI emlURI) t

Node table = getFirstNodeIfPresent(doc, xpath, "//dataTable");
if (table != null) {
JsonNode contextNode = new ObjectMapper().readTree("[ \"http://www.w3.org/ns/csvw\", {\n" +
" \"@language\" : \"en\"\n" +
" }]");
objectNode.set("@context", contextNode);

ObjectNode tableNode = new ObjectMapper().createObjectNode();
ArrayNode arrayNode = new ObjectMapper().createArrayNode();
arrayNode.add(tableNode);
objectNode.set("tables", arrayNode);
tableNode.put("delimiter", StringUtils.trim(getFirstIfPresent(table, xpath, "//fieldDelimiter")));
Node delimiter = getFirstNodeIfPresent(table, xpath, "//fieldDelimiter");
tableNode.put("delimiter", StringEscapeUtils.unescapeJava(delimiter.getTextContent()));
tableNode.put("headerRowCount", StringUtils.trim(getFirstIfPresent(table, xpath, "//numHeaderLines")));
tableNode.put("url", StringUtils.trim(getFirstIfPresent(table, xpath, "//distribution/online/url")));
ObjectNode schema = new ObjectMapper().createObjectNode();
Expand Down
Expand Up @@ -41,10 +41,15 @@ public void emlToMetaTables() throws URISyntaxException, IOException {
assertThat(proxy.getCitation(), is("WorldFAIR pilot data from: VisitationData_Luisa_Carvalheiro."));
assertThat(proxy.getFormat(), not(is(MIME_TYPE_DWCA)));
assertThat(proxy.getFormat(), is("globi"));

JsonNode context = proxy.getConfig().get("@context");
assertThat(context, is(notNullValue()));
assertThat(context.toString(), is("[\"http://www.w3.org/ns/csvw\",{\"@language\":\"en\"}]"));

JsonNode tablesNode = proxy.getConfig().get("tables");
assertThat(tablesNode.size(), is(1));
JsonNode tableNode = tablesNode.get(0);
assertThat(tableNode.get("delimiter").textValue(), is("\\t"));
assertThat(tableNode.get("delimiter").textValue(), is("\t"));
assertThat(tableNode.get("headerRowCount").textValue(), is("6"));
assertThat(tableNode.get("url").textValue(), is("https://docs.google.com/spreadsheets/u/1/d/1cJ0qX9ppqHoSyqFykwYJef-DFOzoutthBXjwKRY81T8/export?format=tsv&id=1cJ0qX9ppqHoSyqFykwYJef-DFOzoutthBXjwKRY81T8&gid=776329546"));
JsonNode jsonNode = tableNode.get("tableSchema").get("columns");
Expand Down
Expand Up @@ -15,18 +15,18 @@
</abstract>
<creator id="https://orcid.org/0000-0001-7655-979X">
<individualName>
<givenName>Carvalheiro</givenName>
<surName>Luisa</surName>
<givenName>Luisa</givenName>
<surName>Carvalheiro</surName>
</individualName>
<organizationName xml:lang="en">University of Goias</organizationName>
<electronicMailAddress>lgcarvalheiro@gmail.com</electronicMailAddress>
<userId directory="https://orcid.org">https://orcid.org/0000-0001-7655-979X</userId>
</creator>
<creator id="https://orcid.org/0000-0002-8675-7068">
<individualName>
<givenName>Salim</givenName>
<givenName>José</givenName>
<givenName>A.</givenName>
<surName>José</surName>
<surName>Salim</surName>
</individualName>
<organizationName xml:lang="en">University of Sao Paulo</organizationName>
<electronicMailAddress>joseasalim@usp.br</electronicMailAddress>
Expand Down Expand Up @@ -1140,4 +1140,4 @@
<numberOfRecords>685</numberOfRecords>
</dataTable>
</dataset>
</eml:eml>
</eml:eml>
Expand Up @@ -5,7 +5,6 @@
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.lang3.tuple.Pair;
import org.eol.globi.util.ResourceUtil;
import org.globalbioticinteractions.dataset.Dataset;
import org.globalbioticinteractions.dataset.DatasetImpl;
import org.globalbioticinteractions.util.GitClient;
Expand Down Expand Up @@ -38,7 +37,7 @@ public static URI getGitHubAPIEndpoint(String path, String query) throws URISynt
path,
query,
null
);
);
}

private static boolean hasInteractionData(URI gloBIConfigURI, ResourceService resourceService) throws IOException {
Expand Down Expand Up @@ -101,14 +100,9 @@ private static List<Pair<String, String>> searchGitHubForCandidateRepositories(R
}

static boolean isGloBIRepository(String globiRepo, String commitSHA, ResourceService resourceService) throws IOException {
return hasInteractionData(getGloBIConfigURI(
globiRepo,
"globi.json",
commitSHA),
resourceService)
|| hasInteractionData(
getGloBIConfigURI(globiRepo, "globi-dataset.jsonld", commitSHA),
resourceService);
return hasInteractionData(getGloBIConfigURI(globiRepo, "globi.json", commitSHA), resourceService)
|| hasInteractionData(getGloBIConfigURI(globiRepo, "globi-dataset.jsonld", commitSHA), resourceService)
|| hasInteractionData(getGloBIConfigURI(globiRepo, "eml.xml", commitSHA), resourceService);
}

public static String lastCommitSHA(String repository, ResourceService resourceService) throws IOException {
Expand Down
Expand Up @@ -11,6 +11,7 @@
import java.net.URISyntaxException;
import java.util.List;

import static junit.framework.TestCase.assertTrue;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;
import static org.hamcrest.core.IsNull.notNullValue;
Expand All @@ -27,6 +28,18 @@ public void discoverRepos() throws IOException, URISyntaxException {
assertThat(reposWithData, CoreMatchers.hasItem(TEMPLATE_DATA_REPOSITORY_JSONLD));
}

@Test
public void isGloBIRepo() throws IOException {
ResourceServiceHTTP resourceService = new ResourceServiceHTTP(is -> is);

String repoName = "globalbioticinteractions/carvalheiro2023";
String sha = GitHubUtil.lastCommitSHA(
repoName,
resourceService
);
assertTrue(GitHubUtil.isGloBIRepository(repoName, sha, resourceService));
}

@Test
public void findMostRecentCommit() throws IOException {
String sha = GitHubUtil.lastCommitSHA(
Expand Down

0 comments on commit d2fd609

Please sign in to comment.