Skip to content

Commit

Permalink
Changes to the CSV input files (see #3786 for the context).
Browse files Browse the repository at this point in the history
- `"` is now considered as the default string qualifier (not null
anymore), like it is in several softwares (e.g. Excel).
- when a CSV file has headers, the number of columns read cannot exceed
the number of headers
  • Loading branch information
AlexisDrogoul committed Apr 25, 2023
1 parent bc6dd96 commit ead1fd8
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 69 deletions.
11 changes: 7 additions & 4 deletions msi.gama.core/src/msi/gama/util/file/GamaCSVFile.java
Expand Up @@ -129,7 +129,8 @@ public CSVInfo(final String propertyString) {
public String getDocumentation() {
final StringBuilder sb = new StringBuilder();
sb.append("CSV File ").append(header ? "with header" : "no header").append(Strings.LN);
sb.append("Dimensions: ").append(cols + " columns x " + rows + " rows").append(Strings.LN);
sb.append("Dimensions: ").append(cols + " columns x " + (header ? rows - 1 : rows) + " rows")
.append(Strings.LN);
sb.append("Delimiter: ").append(delimiter).append(Strings.LN);
sb.append("Contents type: ").append(type).append(Strings.LN);
if (header && headers != null) {
Expand All @@ -142,8 +143,8 @@ public String getDocumentation() {

@Override
public String getSuffix() {
return "" + cols + "x" + rows + " | " + (header ? "with header" : "no header") + " | " + "delimiter: '"
+ delimiter + "' | " + type;
return "" + cols + "x" + (header ? rows - 1 : rows) + " | " + (header ? "with header" : "no header") + " | "
+ "delimiter: '" + delimiter + "' | " + type;
}

@Override
Expand Down Expand Up @@ -176,7 +177,7 @@ public void setHeaders(final String[] newHeaders) {
String csvSeparator = null;

/** The text qualifier. */
Character textQualifier = null;
Character textQualifier = '"';

/** The contents type. */
IType contentsType;
Expand Down Expand Up @@ -498,6 +499,8 @@ public void fillBuffer(final IScope scope) {
// we remove one row so as to not read the headers as well
// Cause for issue #3036
userSize.y = userSize.y - 1;
// Make sure that we do not read more columns than the number of headers
userSize.x = headers.size();
}
// long t = System.currentTimeMillis();
setBuffer(createMatrixFrom(scope, reader));
Expand Down
82 changes: 23 additions & 59 deletions msi.gama.core/src/msi/gama/util/file/csv/CsvReader.java
Expand Up @@ -66,7 +66,7 @@ public class CsvReader implements Closeable {
// private String rawRecord = "";

/** The headers holder. */
final HeadersHolder headersHolder = new HeadersHolder();
HeadersHolder headersHolder;

// these are all more or less global loop variables
// to keep from needing to pass them all into various
Expand Down Expand Up @@ -399,7 +399,7 @@ public void setSkipEmptyRecords(final boolean skipEmptyRecords) {
* @return The count of headers read in by a previous call to
* {@link msi.gama.util.file.csv.csvreader.CsvReader#readHeaders readHeaders()}.
*/
public int getHeaderCount() { return headersHolder.Length; }
public int getHeaderCount() { return headersHolder == null ? 0 : headersHolder.Length(); }

/**
* Returns the header values as a string array.
Expand All @@ -411,35 +411,15 @@ public void setSkipEmptyRecords(final boolean skipEmptyRecords) {
public String[] getHeaders() throws IOException {
checkClosed();

if (headersHolder.Headers == null) return null;
if (headersHolder == null || headersHolder.Headers() == null) return null;
// use clone here to prevent the outside code from
// setting values on the array directly, which would
// throw off the index lookup based on header name
final String[] clone = new String[headersHolder.Length];
System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length);
final String[] clone = new String[headersHolder.Length()];
System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length());
return clone;
}

/**
* Sets the headers.
*
* @param headers
* the new headers
*/
public void setHeaders(final String[] headers) {
headersHolder.Headers = headers;

headersHolder.IndexByName.clear();

if (headers != null) {
headersHolder.Length = headers.length;
for (int i = 0; i < headersHolder.Length; i++) { headersHolder.IndexByName.put(headers[i], i); }
} else {
headersHolder.Length = 0;
}

}

/**
* Gets the values.
*
Expand Down Expand Up @@ -514,12 +494,14 @@ public static class Stats {
* the CS vsep
*/
Stats(final CsvReader reader, final String CSVsep) {
// By default now (see #3786)
reader.setTextQualifier('"');
boolean firstLineHasNumber = false;
String[] possibleHeaders = null;
// String[] possibleHeaders = null;
try {
// firstLine
final String s = reader.skipLine();
possibleHeaders = processFirstLine(s, CSVsep);
headers = processFirstLine(s, CSVsep);
firstLineHasNumber = atLeastOneNumber;
atLeastOneNumber = false;
reader.setDelimiter(delimiter);
Expand All @@ -535,9 +517,12 @@ public static class Stats {
}
while (reader.readRecord()) { if (reader.columnsCount > cols) { cols = reader.columnsCount; } }
} catch (final IOException e) {}
if (!type.equals(firstLineType) || !firstLineHasNumber && atLeastOneNumber) { header = true; }
if (!type.equals(firstLineType) || !firstLineHasNumber && atLeastOneNumber) {
header = true;
cols = headers.length;
}
// if ( header ) {
headers = possibleHeaders;
// headers = possibleHeaders;
// }
rows = (int) reader.currentRecord + 1;
reader.close();
Expand Down Expand Up @@ -1281,19 +1266,18 @@ public boolean readHeaders() throws IOException {
// copy the header data from the column array
// to the header string array

headersHolder.Length = columnsCount;

headersHolder.Headers = new String[columnsCount];
String[] headers = new String[columnsCount];
HashMap indexByName = new HashMap();

for (int i = 0; i < headersHolder.Length; i++) {
for (int i = 0; i < columnsCount; i++) {
final String columnValue = get(i);

headersHolder.Headers[i] = columnValue;
headers[i] = columnValue;

// if there are duplicate header names, we will save the last one
headersHolder.IndexByName.put(columnValue, i);
indexByName.put(columnValue, i);
}

headersHolder = new HeadersHolder(headers, columnsCount, indexByName);
if (result) { currentRecord--; }

columnsCount = 0;
Expand Down Expand Up @@ -1453,7 +1437,7 @@ private void endRecord() throws IOException {
*/
public int getIndex(final String headerName) throws IOException {
checkClosed();

if (headersHolder == null) return -1;
final Object indexValue = headersHolder.IndexByName.get(headerName);

if (indexValue != null) return (Integer) indexValue;
Expand Down Expand Up @@ -1519,6 +1503,7 @@ public String skipLine() throws IOException {
/**
* Closes and releases all related resources.
*/
@Override
public void close() {
if (!closed) {
close(true);
Expand All @@ -1534,8 +1519,6 @@ private void close(final boolean closing) {
if (!closed) {
if (closing) {
charset = null;
headersHolder.Headers = null;
headersHolder.IndexByName = null;
dataBuffer.Buffer = null;
columnBuffer.Buffer = null;
// rawBuffer.Buffer = null;
Expand Down Expand Up @@ -1782,26 +1765,7 @@ public UserSettings() {
/**
* The Class HeadersHolder.
*/
private static class HeadersHolder {

/** The Headers. */
public String[] Headers;

/** The Length. */
public int Length;

/** The Index by name. */
public HashMap IndexByName;

/**
* Instantiates a new headers holder.
*/
public HeadersHolder() {
Headers = null;
Length = 0;
IndexByName = new HashMap();
}
}
private static record HeadersHolder(String[] Headers, int Length, HashMap IndexByName) {}

/**
* The Class StaticSettings.
Expand Down
@@ -1,12 +1,12 @@
/*******************************************************************************************************
*
* CreateFromCSVDelegate.java, in msi.gama.core, is part of the source code of the
* GAMA modeling and simulation platform (v.1.9.2).
* CreateFromCSVDelegate.java, in msi.gama.core, is part of the source code of the GAMA modeling and simulation platform
* (v.1.9.2).
*
* (c) 2007-2023 UMI 209 UMMISCO IRD/SU & Partners (IRIT, MIAT, TLU, CTU)
*
* Visit https://github.com/gama-platform/gama for license information and contacts.
*
*
********************************************************************************************************/
package msi.gaml.statements.create;

Expand Down Expand Up @@ -74,14 +74,13 @@ public boolean createFrom(final IScope scope, final List<Map<String, Object>> in
headers = source.getAttributes(scope);
} else {
headers = new ArrayList<>();
for (int j = 0; j < cols; j++) {
headers.add(String.valueOf(j));
}
for (int j = 0; j < cols; j++) { headers.add(String.valueOf(j)); }
}
for (int i = 0; i < rows; i++) {
final Map<String, Object> map = GamaMapFactory.create(hasHeader ? Types.STRING : Types.INT, Types.NO_TYPE);
final IList vals = mat.getRow(i);
for (int j = 0; j < cols; j++) {
// see issue #3786
map.put(headers.get(j), vals.get(j));
}
// CSV attributes are mixed with the attributes of agents
Expand Down

0 comments on commit ead1fd8

Please sign in to comment.