Skip to content

Commit ead1fd8

Browse files
committed
Changes to the CSV input files (see #3786 for the context).
- `"` is now considered as the default string qualifier (not null anymore), like it is in several softwares (e.g. Excel). - when a CSV file has headers, the number of columns read cannot exceed the number of headers
1 parent bc6dd96 commit ead1fd8

File tree

3 files changed

+35
-69
lines changed

3 files changed

+35
-69
lines changed

msi.gama.core/src/msi/gama/util/file/GamaCSVFile.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ public CSVInfo(final String propertyString) {
129129
public String getDocumentation() {
130130
final StringBuilder sb = new StringBuilder();
131131
sb.append("CSV File ").append(header ? "with header" : "no header").append(Strings.LN);
132-
sb.append("Dimensions: ").append(cols + " columns x " + rows + " rows").append(Strings.LN);
132+
sb.append("Dimensions: ").append(cols + " columns x " + (header ? rows - 1 : rows) + " rows")
133+
.append(Strings.LN);
133134
sb.append("Delimiter: ").append(delimiter).append(Strings.LN);
134135
sb.append("Contents type: ").append(type).append(Strings.LN);
135136
if (header && headers != null) {
@@ -142,8 +143,8 @@ public String getDocumentation() {
142143

143144
@Override
144145
public String getSuffix() {
145-
return "" + cols + "x" + rows + " | " + (header ? "with header" : "no header") + " | " + "delimiter: '"
146-
+ delimiter + "' | " + type;
146+
return "" + cols + "x" + (header ? rows - 1 : rows) + " | " + (header ? "with header" : "no header") + " | "
147+
+ "delimiter: '" + delimiter + "' | " + type;
147148
}
148149

149150
@Override
@@ -176,7 +177,7 @@ public void setHeaders(final String[] newHeaders) {
176177
String csvSeparator = null;
177178

178179
/** The text qualifier. */
179-
Character textQualifier = null;
180+
Character textQualifier = '"';
180181

181182
/** The contents type. */
182183
IType contentsType;
@@ -498,6 +499,8 @@ public void fillBuffer(final IScope scope) {
498499
// we remove one row so as to not read the headers as well
499500
// Cause for issue #3036
500501
userSize.y = userSize.y - 1;
502+
// Make sure that we do not read more columns than the number of headers
503+
userSize.x = headers.size();
501504
}
502505
// long t = System.currentTimeMillis();
503506
setBuffer(createMatrixFrom(scope, reader));

msi.gama.core/src/msi/gama/util/file/csv/CsvReader.java

Lines changed: 23 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public class CsvReader implements Closeable {
6666
// private String rawRecord = "";
6767

6868
/** The headers holder. */
69-
final HeadersHolder headersHolder = new HeadersHolder();
69+
HeadersHolder headersHolder;
7070

7171
// these are all more or less global loop variables
7272
// to keep from needing to pass them all into various
@@ -399,7 +399,7 @@ public void setSkipEmptyRecords(final boolean skipEmptyRecords) {
399399
* @return The count of headers read in by a previous call to
400400
* {@link msi.gama.util.file.csv.csvreader.CsvReader#readHeaders readHeaders()}.
401401
*/
402-
public int getHeaderCount() { return headersHolder.Length; }
402+
public int getHeaderCount() { return headersHolder == null ? 0 : headersHolder.Length(); }
403403

404404
/**
405405
* Returns the header values as a string array.
@@ -411,35 +411,15 @@ public void setSkipEmptyRecords(final boolean skipEmptyRecords) {
411411
public String[] getHeaders() throws IOException {
412412
checkClosed();
413413

414-
if (headersHolder.Headers == null) return null;
414+
if (headersHolder == null || headersHolder.Headers() == null) return null;
415415
// use clone here to prevent the outside code from
416416
// setting values on the array directly, which would
417417
// throw off the index lookup based on header name
418-
final String[] clone = new String[headersHolder.Length];
419-
System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length);
418+
final String[] clone = new String[headersHolder.Length()];
419+
System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length());
420420
return clone;
421421
}
422422

423-
/**
424-
* Sets the headers.
425-
*
426-
* @param headers
427-
* the new headers
428-
*/
429-
public void setHeaders(final String[] headers) {
430-
headersHolder.Headers = headers;
431-
432-
headersHolder.IndexByName.clear();
433-
434-
if (headers != null) {
435-
headersHolder.Length = headers.length;
436-
for (int i = 0; i < headersHolder.Length; i++) { headersHolder.IndexByName.put(headers[i], i); }
437-
} else {
438-
headersHolder.Length = 0;
439-
}
440-
441-
}
442-
443423
/**
444424
* Gets the values.
445425
*
@@ -514,12 +494,14 @@ public static class Stats {
514494
* the CS vsep
515495
*/
516496
Stats(final CsvReader reader, final String CSVsep) {
497+
// By default now (see #3786)
498+
reader.setTextQualifier('"');
517499
boolean firstLineHasNumber = false;
518-
String[] possibleHeaders = null;
500+
// String[] possibleHeaders = null;
519501
try {
520502
// firstLine
521503
final String s = reader.skipLine();
522-
possibleHeaders = processFirstLine(s, CSVsep);
504+
headers = processFirstLine(s, CSVsep);
523505
firstLineHasNumber = atLeastOneNumber;
524506
atLeastOneNumber = false;
525507
reader.setDelimiter(delimiter);
@@ -535,9 +517,12 @@ public static class Stats {
535517
}
536518
while (reader.readRecord()) { if (reader.columnsCount > cols) { cols = reader.columnsCount; } }
537519
} catch (final IOException e) {}
538-
if (!type.equals(firstLineType) || !firstLineHasNumber && atLeastOneNumber) { header = true; }
520+
if (!type.equals(firstLineType) || !firstLineHasNumber && atLeastOneNumber) {
521+
header = true;
522+
cols = headers.length;
523+
}
539524
// if ( header ) {
540-
headers = possibleHeaders;
525+
// headers = possibleHeaders;
541526
// }
542527
rows = (int) reader.currentRecord + 1;
543528
reader.close();
@@ -1281,19 +1266,18 @@ public boolean readHeaders() throws IOException {
12811266
// copy the header data from the column array
12821267
// to the header string array
12831268

1284-
headersHolder.Length = columnsCount;
1285-
1286-
headersHolder.Headers = new String[columnsCount];
1269+
String[] headers = new String[columnsCount];
1270+
HashMap indexByName = new HashMap();
12871271

1288-
for (int i = 0; i < headersHolder.Length; i++) {
1272+
for (int i = 0; i < columnsCount; i++) {
12891273
final String columnValue = get(i);
12901274

1291-
headersHolder.Headers[i] = columnValue;
1275+
headers[i] = columnValue;
12921276

12931277
// if there are duplicate header names, we will save the last one
1294-
headersHolder.IndexByName.put(columnValue, i);
1278+
indexByName.put(columnValue, i);
12951279
}
1296-
1280+
headersHolder = new HeadersHolder(headers, columnsCount, indexByName);
12971281
if (result) { currentRecord--; }
12981282

12991283
columnsCount = 0;
@@ -1453,7 +1437,7 @@ private void endRecord() throws IOException {
14531437
*/
14541438
public int getIndex(final String headerName) throws IOException {
14551439
checkClosed();
1456-
1440+
if (headersHolder == null) return -1;
14571441
final Object indexValue = headersHolder.IndexByName.get(headerName);
14581442

14591443
if (indexValue != null) return (Integer) indexValue;
@@ -1519,6 +1503,7 @@ public String skipLine() throws IOException {
15191503
/**
15201504
* Closes and releases all related resources.
15211505
*/
1506+
@Override
15221507
public void close() {
15231508
if (!closed) {
15241509
close(true);
@@ -1534,8 +1519,6 @@ private void close(final boolean closing) {
15341519
if (!closed) {
15351520
if (closing) {
15361521
charset = null;
1537-
headersHolder.Headers = null;
1538-
headersHolder.IndexByName = null;
15391522
dataBuffer.Buffer = null;
15401523
columnBuffer.Buffer = null;
15411524
// rawBuffer.Buffer = null;
@@ -1782,26 +1765,7 @@ public UserSettings() {
17821765
/**
17831766
* The Class HeadersHolder.
17841767
*/
1785-
private static class HeadersHolder {
1786-
1787-
/** The Headers. */
1788-
public String[] Headers;
1789-
1790-
/** The Length. */
1791-
public int Length;
1792-
1793-
/** The Index by name. */
1794-
public HashMap IndexByName;
1795-
1796-
/**
1797-
* Instantiates a new headers holder.
1798-
*/
1799-
public HeadersHolder() {
1800-
Headers = null;
1801-
Length = 0;
1802-
IndexByName = new HashMap();
1803-
}
1804-
}
1768+
private static record HeadersHolder(String[] Headers, int Length, HashMap IndexByName) {}
18051769

18061770
/**
18071771
* The Class StaticSettings.

msi.gama.core/src/msi/gaml/statements/create/CreateFromCSVDelegate.java

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
/*******************************************************************************************************
22
*
3-
* CreateFromCSVDelegate.java, in msi.gama.core, is part of the source code of the
4-
* GAMA modeling and simulation platform (v.1.9.2).
3+
* CreateFromCSVDelegate.java, in msi.gama.core, is part of the source code of the GAMA modeling and simulation platform
4+
* (v.1.9.2).
55
*
66
* (c) 2007-2023 UMI 209 UMMISCO IRD/SU & Partners (IRIT, MIAT, TLU, CTU)
77
*
88
* Visit https://github.com/gama-platform/gama for license information and contacts.
9-
*
9+
*
1010
********************************************************************************************************/
1111
package msi.gaml.statements.create;
1212

@@ -74,14 +74,13 @@ public boolean createFrom(final IScope scope, final List<Map<String, Object>> in
7474
headers = source.getAttributes(scope);
7575
} else {
7676
headers = new ArrayList<>();
77-
for (int j = 0; j < cols; j++) {
78-
headers.add(String.valueOf(j));
79-
}
77+
for (int j = 0; j < cols; j++) { headers.add(String.valueOf(j)); }
8078
}
8179
for (int i = 0; i < rows; i++) {
8280
final Map<String, Object> map = GamaMapFactory.create(hasHeader ? Types.STRING : Types.INT, Types.NO_TYPE);
8381
final IList vals = mat.getRow(i);
8482
for (int j = 0; j < cols; j++) {
83+
// see issue #3786
8584
map.put(headers.get(j), vals.get(j));
8685
}
8786
// CSV attributes are mixed with the attributes of agents

0 commit comments

Comments
 (0)