Skip to content

Commit

Permalink
Bug-fix/import-headers-merge-attrs (#16396)
Browse files Browse the repository at this point in the history
Fixes #16337

When using a CSV headers file, attribute merging wasn't performed for the first line of the data file.

Make warning display target attribute instead of a memory address, and wrap attribute names in quotes.

Copy int/float string from buffer for merging attributes so that the representation matches the normal import. Floats previously had a fixed precision of 6 decimal places.
  • Loading branch information
Simran-B committed Jun 21, 2022
1 parent eba729b commit 6895cbd
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 11 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
devel
-----

* Fixed issue 16337: arangoimport with `--headers-file` and `--merge-attributes`
merges column names instead of row values on the first line of a CSV file.

Additionally, floating-point numbers are now merged using their standard
string representation instead of with a fixed precision of 6 decimal places.

* Now supporting projections on traversals. In AQL Traversal statements like
FOR v,e,p IN 1..3 OUTBOUND @start GRAPH @graph RETURN v.name
we will now detect attribute accesses on the data, in above example "v.name"
Expand Down
24 changes: 15 additions & 9 deletions client-tools/Import/ImportHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -889,10 +889,13 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
}

int64_t num = StringUtils::int64(field, fieldLength);
size_t bufPos = _lineBuffer.length();
_lineBuffer.appendInteger(num);
if (!_mergeAttributesInstructions.empty()) {
lookUpTableValue = std::to_string(num);
lookUpTableValue =
std::string(_lineBuffer.stringBuffer()->_buffer + bufPos,
_lineBuffer.length() - bufPos);
}
_lineBuffer.appendInteger(num);
} catch (...) {
// conversion failed
_lineBuffer.appendJsonEncoded(field, fieldLength);
Expand All @@ -907,10 +910,13 @@ void ImportHelper::addField(char const* field, size_t fieldLength, size_t row,
if (pos == fieldLength) {
bool failed = (num != num || num == HUGE_VAL || num == -HUGE_VAL);
if (!failed) {
size_t bufPos = _lineBuffer.length();
_lineBuffer.appendDecimal(num);
if (!_mergeAttributesInstructions.empty()) {
lookUpTableValue = std::to_string(num);
lookUpTableValue =
std::string(_lineBuffer.stringBuffer()->_buffer + bufPos,
_lineBuffer.length() - bufPos);
}
_lineBuffer.appendDecimal(num);
return;
}
}
Expand Down Expand Up @@ -971,18 +977,18 @@ void ImportHelper::addLastField(char const* field, size_t fieldLength,
// add --merge-attributes arguments
if (!_mergeAttributesInstructions.empty()) {
for (auto& [key, value] : _mergeAttributesInstructions) {
if (row == _rowsToSkip) {
if (row == _rowsToSkip && !_headersSeen) {
std::for_each(
value.begin(), value.end(),
[this, key = &key](Step const& attrProperties) {
[this, &key](Step const& attrProperties) {
if (!attrProperties.isLiteral) {
if (std::find(_columnNames.begin(), _columnNames.end(),
attrProperties.value) == _columnNames.end()) {
LOG_TOPIC("ab353", WARN, arangodb::Logger::FIXME)
<< "In --merge-attributes: No matching value for "
"attribute name "
<< attrProperties.value << " to populate attribute "
<< key;
"attribute name '"
<< attrProperties.value << "' to populate attribute '"
<< key << "'";
}
}
});
Expand Down
11 changes: 11 additions & 0 deletions js/client/modules/@arangodb/testsuites/importing.js
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,17 @@ const impTodos = [{
convert: true,
datatype: "value=string",
mergeAttributes: ["Id=[id]", "IdAndValue=[id]:[value]", "ValueAndId=value:[value]/id:[id]", "_key=[id][value]", "newAttr=[_key]"],
}, {
id: 'csvheadersmergeattributes',
data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-merge-attrs.csv')),
skipLines: 1,
headers: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-merge-attrs-headers.csv')),
coll: 'UnitTestsImportCsvHeadersMergeAttributes',
type: 'csv',
create: 'true',
separator: ',',
convert: true,
mergeAttributes: ["Id=[id]", "IdAndValue=[id]:[value]", "ValueAndId=value:[value]/id:[id]", "_key=[id][value]", "newAttr=[_key]"],
}, {
id: 'csvmergeattributesInvalid',
data: tu.makePathUnix(fs.join(testPaths.importing[1], 'import-merge-attrs.csv')),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
id,value
8 changes: 6 additions & 2 deletions tests/js/common/test-data/import/import-merge-attrs.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"id","value"
id,value
1,null
2,false
3,true
Expand All @@ -8,4 +8,8 @@
7,2
8,123456.43
9,-13323.322
10,null
10,null
11,007
12,2e307
13,2e309
14,0.123456789
171 changes: 171 additions & 0 deletions tests/js/server/import/import.js
Original file line number Diff line number Diff line change
Expand Up @@ -949,13 +949,184 @@ function importTestSuite () {
"id": 10,
"newAttr": "10null",
"value": "null"
},
{
"Id": "11",
"IdAndValue": "11:007",
"ValueAndId": "value:007/id:11",
"_key": "11007",
"id": 11,
"newAttr": "11007",
"value": "007"
},
{
"Id": "12",
"IdAndValue": "12:2e307",
"ValueAndId": "value:2e307/id:12",
"_key": "122e307",
"id": 12,
"newAttr": "122e307",
"value": "2e307"
},
{
"Id": "13",
"IdAndValue": "13:2e309",
"ValueAndId": "value:2e309/id:13",
"_key": "132e309",
"id": 13,
"newAttr": "132e309",
"value": "2e309"
},
{
"Id": "14",
"IdAndValue": "14:0.123456789",
"ValueAndId": "value:0.123456789/id:14",
"_key": "140.123456789",
"id": 14,
"newAttr": "140.123456789",
"value": "0.123456789"
}
];

let actual = getQueryResults("FOR i IN UnitTestsImportCsvMergeAttributes SORT TO_NUMBER(i.id) RETURN i", true);
assertEqual(JSON.stringify(expected), JSON.stringify(actual));
},

////////////////////////////////////////////////////////////////////////////////
/// @brief test csv import with headers file and merging attributes
////////////////////////////////////////////////////////////////////////////////
testCsvImportHeadersMergeAttributes: function () {
let expected = [
{
"Id": "1",
"IdAndValue": "1:null",
"ValueAndId": "value:null/id:1",
"_key": "1null",
"id": 1,
"newAttr": "1null"
},
{
"Id": "2",
"IdAndValue": "2:false",
"ValueAndId": "value:false/id:2",
"_key": "2false",
"id": 2,
"newAttr": "2false",
"value": false
},
{
"Id": "3",
"IdAndValue": "3:true",
"ValueAndId": "value:true/id:3",
"_key": "3true",
"id": 3,
"newAttr": "3true",
"value": true
},
{
"Id": "4",
"IdAndValue": "4:-1",
"ValueAndId": "value:-1/id:4",
"_key": "4-1",
"id": 4,
"newAttr": "4-1",
"value": -1
},
{
"Id": "5",
"IdAndValue": "5:0",
"ValueAndId": "value:0/id:5",
"_key": "50",
"id": 5,
"newAttr": "50",
"value": 0
},
{
"Id": "6",
"IdAndValue": "6:1",
"ValueAndId": "value:1/id:6",
"_key": "61",
"id": 6,
"newAttr": "61",
"value": 1
},
{
"Id": "7",
"IdAndValue": "7:2",
"ValueAndId": "value:2/id:7",
"_key": "72",
"id": 7,
"newAttr": "72",
"value": 2
},
{
"Id": "8",
"IdAndValue": "8:123456.43",
"ValueAndId": "value:123456.43/id:8",
"_key": "8123456.43",
"id": 8,
"newAttr": "8123456.43",
"value": 123456.43
},
{
"Id": "9",
"IdAndValue": "9:-13323.322",
"ValueAndId": "value:-13323.322/id:9",
"_key": "9-13323.322",
"id": 9,
"newAttr": "9-13323.322",
"value": -13323.322
},
{
"Id": "10",
"IdAndValue": "10:null",
"ValueAndId": "value:null/id:10",
"_key": "10null",
"id": 10,
"newAttr": "10null"
},
{
"Id": "11",
"IdAndValue": "11:7",
"ValueAndId": "value:7/id:11",
"_key": "117",
"id": 11,
"newAttr": "117",
"value": 7
},
{
"Id": "12",
"IdAndValue": "12:2e+307",
"ValueAndId": "value:2e+307/id:12",
"_key": "122e+307",
"id": 12,
"newAttr": "122e+307",
"value": 2e+307
},
{
"Id": "13",
"IdAndValue": "13:2e309",
"ValueAndId": "value:2e309/id:13",
"_key": "132e309",
"id": 13,
"newAttr": "132e309",
"value": "2e309"
},
{
"Id": "14",
"IdAndValue": "14:0.123456789",
"ValueAndId": "value:0.123456789/id:14",
"_key": "140.123456789",
"id": 14,
"newAttr": "140.123456789",
"value": 0.123456789
}
];

let actual = getQueryResults("FOR i IN UnitTestsImportCsvHeadersMergeAttributes SORT TO_NUMBER(i.id) RETURN i", true);
assertEqual(JSON.stringify(expected), JSON.stringify(actual));
},

////////////////////////////////////////////////////////////////////////////////
/// @brief test csv with data and header in single csv file, small batch-size
////////////////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit 6895cbd

Please sign in to comment.