Skip to content

Commit

Permalink
gh-2725: Refactor Import Csv and Formats (#2778)
Browse files Browse the repository at this point in the history
* gh-2723 Split ImportCsv into two parts, refactor CsvElementGenerator

* gh-2725 fixed failing StoreTest

* gh-2725 move the koryphe ops to CsvFormat

* gh-2725 rework opretaion to follow GetSetExport

* gh-2725 refactor

* gh-2725 acted on comments

* gh-2725 ExportTo and ImportFrom LocalFile only default on MapStore

* gh-2725 checkstyle

* gh-2725 fixed StoreTest

* gh-2725 fixed typo

* gh-2725 indenting

* gh-2725 checkstyle
  • Loading branch information
p3430233 committed Nov 23, 2022
1 parent 6ff8f69 commit 2a33be8
Show file tree
Hide file tree
Showing 35 changed files with 1,068 additions and 816 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

package uk.gov.gchq.gaffer.data.generator;

import com.google.common.collect.ImmutableList;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -33,106 +31,64 @@
import uk.gov.gchq.koryphe.impl.function.FunctionChain;
import uk.gov.gchq.koryphe.impl.function.IterableFunction;
import uk.gov.gchq.koryphe.impl.function.MapToTuple;
import uk.gov.gchq.koryphe.impl.function.ParseTime;
import uk.gov.gchq.koryphe.impl.function.ToBoolean;
import uk.gov.gchq.koryphe.impl.function.ToDouble;
import uk.gov.gchq.koryphe.impl.function.ToFloat;
import uk.gov.gchq.koryphe.impl.function.ToInteger;
import uk.gov.gchq.koryphe.impl.function.ToLong;
import uk.gov.gchq.koryphe.impl.function.ToString;
import uk.gov.gchq.koryphe.tuple.Tuple;

import java.io.Serializable;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@Since("2.0.0")
@Summary("Generates elements from an openCypher CSV string")
public class OpenCypherCsvElementGenerator implements ElementGenerator<String>, Serializable {
@Summary("Generates elements from a CSV string")
public class CsvElementGenerator implements ElementGenerator<String>, Serializable {
private static final long serialVersionUID = -821376598172364516L;
private static final Logger LOGGER = LoggerFactory.getLogger(OpenCypherCsvElementGenerator.class);

public static final String NEPTUNE_VERTEX = ":ID";
public static final String NEO4J_VERTEX = "_id";
public static final String NEPTUNE_ENTITY_GROUP = ":LABEL";
public static final String NEO4J_ENTITY_GROUP = "_labels";
private static final Logger LOGGER = LoggerFactory.getLogger(CsvElementGenerator.class);

public static final String NEPTUNE_SOURCE = ":START_ID";
public static final String NEO4J_SOURCE = "_start";
public static final String NEPTUNE_DESTINATION = ":END_ID";
public static final String NEO4J_DESTINATION = "_end";

public static final String NEPTUNE_EDGE_GROUP = ":TYPE";
public static final String NEO4J_EDGE_GROUP = "_type";
private static final List<String> ELEMENT_COLUMN_NAMES = ImmutableList.of(NEPTUNE_VERTEX, NEPTUNE_ENTITY_GROUP, NEPTUNE_EDGE_GROUP, NEPTUNE_SOURCE, NEPTUNE_DESTINATION);
private String header;
private int firstRow = 0;
private Boolean trim = false;
private char delimiter = ',';
private String nullString = "";
private CsvFormat csvFormat;

@Override
public Iterable<? extends Element> apply(final Iterable<? extends String> strings) {
List<String> elementColumnNames = new ArrayList<String>(CsvFormat.getIdentifiers(csvFormat).values());
CsvLinesToMaps parseCsv = new CsvLinesToMaps()
.firstRow(1)
.trim(trim)
.nullString(nullString)
.delimiter(delimiter)
.parseHeader(header);
.firstRow(1)
.trim(trim)
.nullString(nullString)
.delimiter(delimiter)
.parseHeader(header);

IterableFunction<Map<String, Object>, Tuple<String>> toTuples = new IterableFunction<>(new MapToTuple<String>());

FunctionChain.Builder<Tuple<String>, Tuple<String>> transformTuplesBuilder = new FunctionChain.Builder<>();

ElementTupleDefinition entityDefinition = new ElementTupleDefinition(NEPTUNE_ENTITY_GROUP).vertex(NEPTUNE_VERTEX);
ElementTupleDefinition edgeDefinition = new ElementTupleDefinition(NEPTUNE_EDGE_GROUP)
.source(NEPTUNE_SOURCE)
.destination(NEPTUNE_DESTINATION)
.property("edge-id", NEPTUNE_VERTEX);
ElementTupleDefinition entityDefinition = new ElementTupleDefinition(csvFormat.getEntityGroup()).vertex(csvFormat.getVertex());
ElementTupleDefinition edgeDefinition = new ElementTupleDefinition(csvFormat.getEdgeGroup())
.source(csvFormat.getSource())
.destination(csvFormat.getDestination())
.property("edge-id", csvFormat.getVertex());
for (final String columnHeader : parseCsv.getHeader()) {
if (!ELEMENT_COLUMN_NAMES.contains(columnHeader)) {
if (!elementColumnNames.contains(columnHeader)) {
String propertyName = columnHeader.split(":")[0];
if (!PropertiesUtil.isValidName(propertyName)) {
propertyName = propertyName.replaceAll("_", "-");
propertyName = PropertiesUtil.stripInvalidCharacters(propertyName);
}
if (columnHeader.contains(":")) {
String typeName = columnHeader.split(":")[1];
HashMap<String, KorypheFunction<?, ?>> transformMappings = OpenCypherFormat.transformMappings;
KorypheFunction<?, ?> transform;
switch (typeName) {
case "DateTime":
transform = new ParseTime();
break;
case "Long":
transform = new ToLong();
break;
case "Byte":
case "Short":
case "Int":
transform = new ToInteger();
break;
case "Boolean":
transform = new ToBoolean();
break;
case "Float":
transform = new ToFloat();
break;
case "Double":
transform = new ToDouble();
break;
case "Char":
case "Date":
case "LocalDate":
case "LocalDateTime":
case "Duration":
case "Point":
case "String":
transform = new ToString();
break;
default:
throw new RuntimeException("Unsupported Type: " + typeName);
if (transformMappings.containsKey(typeName)) {
transform = transformMappings.get(typeName);
} else {
throw new RuntimeException("Unsupported Type: " + typeName);
}

transformTuplesBuilder = transformTuplesBuilder.execute(new String[]{columnHeader}, transform, new String[]{propertyName});
}

Expand All @@ -144,9 +100,9 @@ public Iterable<? extends Element> apply(final Iterable<? extends String> string
IterableFunction<Tuple<String>, Tuple<String>> transformTuples = new IterableFunction(transformTuplesBuilder.build());

TuplesToElements toElements = new TuplesToElements()
.element(entityDefinition)
.element(edgeDefinition)
.useGroupMapping(true);
.element(entityDefinition)
.element(edgeDefinition)
.useGroupMapping(true);
// Apply functions
final FunctionChain<Iterable<String>, Iterable<Element>> generator = new FunctionChain.Builder<Iterable<String>, Iterable<Element>>()
.execute(parseCsv)
Expand All @@ -157,22 +113,14 @@ public Iterable<? extends Element> apply(final Iterable<? extends String> string
return generator.apply((Iterable<String>) strings);
}

private String parseHeader(final String header) {
String parsedHeader = header.replace(NEO4J_VERTEX, NEPTUNE_VERTEX);
parsedHeader = parsedHeader.replace(NEO4J_ENTITY_GROUP, NEPTUNE_ENTITY_GROUP);
parsedHeader = parsedHeader.replace(NEO4J_EDGE_GROUP, NEPTUNE_EDGE_GROUP);
parsedHeader = parsedHeader.replace(NEO4J_SOURCE, NEPTUNE_SOURCE);
parsedHeader = parsedHeader.replace(NEO4J_DESTINATION, NEPTUNE_DESTINATION);
return parsedHeader;
}

public String getHeader() {
return header;
}


public void setHeader(final String header) {
this.header = parseHeader(header);
this.header = header;
}


Expand Down Expand Up @@ -214,42 +162,56 @@ public void setNullString(final String nullString) {
this.nullString = nullString;
}

public CsvFormat getCsvFormat() {
return csvFormat;
}

public void setCsvFormat(final CsvFormat csvFormat) {
this.csvFormat = csvFormat;
}

public static class Builder {
private String header;
private int firstRow = 0;
private Boolean trim = false;
private char delimiter = ',';
private String nullString = "";
private CsvFormat csvFormat;


public OpenCypherCsvElementGenerator.Builder header(final String header) {
public CsvElementGenerator.Builder header(final String header) {
this.header = header;
return this;
}
public OpenCypherCsvElementGenerator.Builder firstRow(final int firstRow) {

public CsvElementGenerator.Builder csvFormat(final CsvFormat csvFormat) {
this.csvFormat = csvFormat;
return this;
}
public CsvElementGenerator.Builder firstRow(final int firstRow) {
this.firstRow = firstRow;
return this;
}
public OpenCypherCsvElementGenerator.Builder trim(final boolean trim) {
public CsvElementGenerator.Builder trim(final boolean trim) {
this.trim = trim;
return this;
}
public OpenCypherCsvElementGenerator.Builder delimiter(final char delimiter) {
public CsvElementGenerator.Builder delimiter(final char delimiter) {
this.delimiter = delimiter;
return this;
}
public OpenCypherCsvElementGenerator.Builder nullString(final String nullString) {
public CsvElementGenerator.Builder nullString(final String nullString) {
this.nullString = nullString;
return this;
}
public OpenCypherCsvElementGenerator build() {
OpenCypherCsvElementGenerator generator = new OpenCypherCsvElementGenerator();
generator.setNullString(nullString);
generator.setDelimiter(delimiter);
generator.setHeader(header);
generator.setTrim(trim);
generator.setFirstRow(firstRow);
return generator;
public CsvElementGenerator build() {
CsvElementGenerator generator = new CsvElementGenerator();
generator.setNullString(nullString);
generator.setDelimiter(delimiter);
generator.setHeader(header);
generator.setTrim(trim);
generator.setFirstRow(firstRow);
generator.setCsvFormat(csvFormat);
return generator;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ public abstract class CsvFormat {
public static final String ENTITY_GROUP = "ENTITY_GROUP";
public static final String EDGE_GROUP = "EDGE_GROUP";
private static LinkedHashMap<String, String> identifiers = new LinkedHashMap<String, String>();

public abstract String getVertex();
public abstract String getEntityGroup();
public abstract String getEdgeGroup();
public abstract String getSource();
public abstract String getDestination();

public static LinkedHashMap<String, String> getIdentifiers(final CsvFormat csvFormat) {
identifiers.put(String.valueOf(IdentifierType.VERTEX), csvFormat.getVertex());
identifiers.put(ENTITY_GROUP, csvFormat.getEntityGroup());
Expand All @@ -38,3 +40,4 @@ public static LinkedHashMap<String, String> getIdentifiers(final CsvFormat csvFo
return identifiers;
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import org.codehaus.jackson.annotate.JsonIgnore;

public class Neo4jFormat extends CsvFormat {
public class Neo4jFormat extends OpenCypherFormat {

@JsonIgnore
@Override
Expand Down Expand Up @@ -46,3 +46,4 @@ public String getDestination() {
return "_end";
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@

package uk.gov.gchq.gaffer.data.generator;

import org.codehaus.jackson.annotate.JsonIgnore;

public class NeptuneFormat extends CsvFormat {
import com.fasterxml.jackson.annotation.JsonIgnore;

public class NeptuneFormat extends OpenCypherFormat {

@Override
@JsonIgnore
Expand Down Expand Up @@ -49,5 +50,4 @@ public String getSource() {
public String getDestination() {
return ":END_ID";
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright 2022 Crown Copyright
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package uk.gov.gchq.gaffer.data.generator;

import uk.gov.gchq.koryphe.function.KorypheFunction;
import uk.gov.gchq.koryphe.impl.function.ParseTime;
import uk.gov.gchq.koryphe.impl.function.ToBoolean;
import uk.gov.gchq.koryphe.impl.function.ToDouble;
import uk.gov.gchq.koryphe.impl.function.ToFloat;
import uk.gov.gchq.koryphe.impl.function.ToInteger;
import uk.gov.gchq.koryphe.impl.function.ToLong;
import uk.gov.gchq.koryphe.impl.function.ToString;

import java.util.HashMap;

public abstract class OpenCypherFormat extends CsvFormat {

public static HashMap<String, KorypheFunction<?, ?>> transformMappings = new HashMap<String, KorypheFunction<?, ?>>() { {
put("String", new ToString());
put("Char", new ToString());
put("Date", new ToString());
put("LocalDate", new ToString());
put("LocalDateTime", new ToString());
put("Point", new ToString());
put("Duration", new ToString());
put("Int", new ToInteger());
put("Short", new ToInteger());
put("Byte", new ToInteger());
put("DateTime", new ParseTime());
put("Long", new ToLong());
put("Double", new ToDouble());
put("Float", new ToFloat());
put("Boolean", new ToBoolean());
} };

public static HashMap<String, String> typeMappings = new HashMap<String, String>() { {
put("String", "String");
put("Character", "Char");
put("Date", "Date");
put("LocalDate", "LocalDate");
put("LocalDateTime", "LocalDateTime");
put("Point", "Point");
put("Duration", "Duration");
put("Integer", "Int");
put("Short", "Short");
put("Byte", "Byte");
put("DateTime", "DateTime");
put("Long", "Long");
put("Double", "Double");
put("Float", "Float");
put("Boolean", "Boolean");
} };

}

Loading

0 comments on commit 2a33be8

Please sign in to comment.