diff --git a/msi.gama.core/src/msi/gama/common/preferences/GamaPreferences.java b/msi.gama.core/src/msi/gama/common/preferences/GamaPreferences.java index 547cbcbb32..62e037824c 100644 --- a/msi.gama.core/src/msi/gama/common/preferences/GamaPreferences.java +++ b/msi.gama.core/src/msi/gama/common/preferences/GamaPreferences.java @@ -40,6 +40,7 @@ import msi.gama.util.GamaMapFactory; import msi.gama.util.file.GenericFile; import msi.gama.util.file.IGamaFile; +import msi.gama.util.file.csv.AbstractCSVManipulator; import msi.gaml.compilation.GAML; import msi.gaml.compilation.kernel.GamaMetaModel; import msi.gaml.operators.Cast; @@ -59,6 +60,10 @@ * The Class GamaPreferences. */ +/** + * The Class GamaPreferences. + */ + /** * The Class GamaPreferences. */ @@ -352,10 +357,12 @@ public static class Runtime { // public static final Pref CORE_DELAY_STEP = create("pref_experiment_default_step", /** The Constant CORE_SYNC. */ // "Default step for the delay slider (in sec.)", 0.001, IType.FLOAT, true).in(NAME, EXECUTION).disabled(); - + public static final Pref CORE_SLIDER_TYPE = create("pref_experiment_type_slider", - "Set the step duration slider incrementation to linear. If false set to logarithmic", true, IType.BOOL, true).in(NAME, EXECUTION); - + "Set the step duration slider incrementation to linear. If false set to logarithmic", true, IType.BOOL, + true).in(NAME, EXECUTION); + + /** The Constant CORE_SYNC. */ public static final Pref CORE_SYNC = create("pref_display_synchronized", "Synchronize outputs with the simulation", false, IType.BOOL, true) .in(NAME, EXECUTION); @@ -815,6 +822,16 @@ public static class External { if (codes.isEmpty()) return false; return true; }); + + /** The Constant CSV_STRING_QUALIFIER. */ + public static final Pref CSV_STRING_QUALIFIER = GamaPreferences + .create("pref_csv_string_qualifier", "Default separator for strings", String.valueOf(AbstractCSVManipulator.Letters.QUOTE), IType.STRING, true) + .in(NAME, "CSV Files"); + + /** The Constant CSV_SEPARATOR. */ + public static final Pref CSV_SEPARATOR = + GamaPreferences.create("pref_csv_separator", "Default separator for fields", String.valueOf(AbstractCSVManipulator.Letters.COMMA), IType.STRING, true) + .in(GamaPreferences.External.NAME, "CSV Files"); } /** diff --git a/msi.gama.core/src/msi/gama/outputs/FileOutput.java b/msi.gama.core/src/msi/gama/outputs/FileOutput.java index 93c8778034..79171eb361 100644 --- a/msi.gama.core/src/msi/gama/outputs/FileOutput.java +++ b/msi.gama.core/src/msi/gama/outputs/FileOutput.java @@ -1,12 +1,11 @@ /******************************************************************************************************* * - * FileOutput.java, in msi.gama.core, is part of the source code of the - * GAMA modeling and simulation platform (v.1.9.2). + * FileOutput.java, in msi.gama.core, is part of the source code of the GAMA modeling and simulation platform (v.1.9.2). * * (c) 2007-2023 UMI 209 UMMISCO IRD/SU & Partners (IRIT, MIAT, TLU, CTU) * * Visit https://github.com/gama-platform/gama for license information and contacts. - * + * ********************************************************************************************************/ package msi.gama.outputs; @@ -19,7 +18,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.List; -import java.util.stream.Collectors; import msi.gama.common.interfaces.IKeyword; import msi.gama.kernel.experiment.IExperimentPlan; @@ -34,6 +32,7 @@ import msi.gama.runtime.IScope; import msi.gama.runtime.exceptions.GamaRuntimeException; import msi.gama.util.IMap; +import msi.gama.util.file.csv.AbstractCSVManipulator; import msi.gaml.compilation.GAML; import msi.gaml.descriptions.IDescription; import msi.gaml.expressions.IExpression; @@ -46,6 +45,7 @@ *

* A particular output file especially design for the batch experiment output *

+ * * @author drogoul */ @symbol ( @@ -125,55 +125,55 @@ public FileOutput(/* final ISymbol context, */final IDescription desc) { /** The file. */ File file = null; - + /** The file name. */ String fileName = ""; - + /** The rewrite. */ boolean rewrite = false; - + /** The header. */ String header = ""; - + /** The footer. */ String footer = ""; - + /** The last value. */ Object lastValue = null; - + /** The last values. */ List lastValues = null; - + /** The logged batch param. */ List loggedBatchParam = null; - + /** The solution. */ ParametersSet solution = null; - + /** The expression text. */ private String expressionText = null; - + /** The data. */ private IExpression data; - + /** The Constant LOG_FOLDER. */ private static final String LOG_FOLDER = "log"; - + /** The Constant XMLHeader. */ private static final String XMLHeader = ""; - + /** The Constant XML. */ private static final int XML = 1; - + /** The Constant CSV. */ private static final int CSV = 2; - + /** The Constant TEXT. */ private static final int TEXT = 0; - + /** The Constant extensions. */ private static final List extensions = Arrays.asList("txt", "xml", "csv"); - + /** The type. */ private int type; @@ -198,7 +198,8 @@ private void createExpression() { /** * Creates the header. * - * @throws GamaRuntimeException the gama runtime exception + * @throws GamaRuntimeException + * the gama runtime exception */ private void createHeader() throws GamaRuntimeException { final IExpression exp = getFacet(IKeyword.HEADER); @@ -212,7 +213,8 @@ private void createHeader() throws GamaRuntimeException { /** * Creates the footer. * - * @throws GamaRuntimeException the gama runtime exception + * @throws GamaRuntimeException + * the gama runtime exception */ private void createFooter() throws GamaRuntimeException { final IExpression exp = getFacet(IKeyword.FOOTER); @@ -226,7 +228,8 @@ private void createFooter() throws GamaRuntimeException { /** * Creates the rewrite. * - * @throws GamaRuntimeException the gama runtime exception + * @throws GamaRuntimeException + * the gama runtime exception */ private void createRewrite() throws GamaRuntimeException { final IExpression exp = getFacet(IKeyword.REWRITE); @@ -307,11 +310,16 @@ public boolean init(final IScope scope) throws GamaRuntimeException { /** * Instantiates a new file output. * - * @param name the name - * @param expr the expr - * @param columns the columns - * @param exp the exp - * @throws GamaRuntimeException the gama runtime exception + * @param name + * the name + * @param expr + * the expr + * @param columns + * the columns + * @param exp + * the exp + * @throws GamaRuntimeException + * the gama runtime exception */ public FileOutput(final String name, final String expr, final List columns, final IExperimentPlan exp) throws GamaRuntimeException { @@ -380,7 +388,8 @@ private void createFileName(final IScope scope) throws GamaRuntimeException { /** * Refresh expression. * - * @throws GamaRuntimeException the gama runtime exception + * @throws GamaRuntimeException + * the gama runtime exception */ public void refreshExpression() throws GamaRuntimeException { // in case the file writer persists over different simulations (like in @@ -407,11 +416,12 @@ public boolean step(final IScope scope) { public void update() throws GamaRuntimeException { writeToFile(getScope().getClock().getCycle()); } - + /** * Do write report and close. * - * @param report the report + * @param report + * the report */ public void doWriteReportAndClose(final String report) { switch (type) { @@ -429,19 +439,21 @@ public void doWriteReportAndClose(final String report) { break; } } - + /** * Main method to write down a set of given values for a single a point in the parameter space + * * @param sol * @param outputs * @throws GamaRuntimeException */ - public void doRefreshWriteAndClose(final ParametersSet sol, final IMap outputs) throws GamaRuntimeException { + public void doRefreshWriteAndClose(final ParametersSet sol, final IMap outputs) + throws GamaRuntimeException { setSolution(sol); if (outputs == null || outputs.isEmpty()) { - if (!getScope().step(this).passed()) { return; } + if (!getScope().step(this).passed()) return; } else { - this.lastValues = outputs.values().stream().toList(); //setLastValue(fitness); + this.lastValues = outputs.values().stream().toList(); // setLastValue(fitness); } // compute(getOwnScope(), 0l); switch (type) { @@ -462,11 +474,15 @@ public void doRefreshWriteAndClose(final ParametersSet sol, final IMap loggedBatchParam) { this.loggedBatchParam = loggedBatchParam; } + public void setLoggedBatchParam(final List loggedBatchParam) { + this.loggedBatchParam = loggedBatchParam; + } /** * Gets the solution. @@ -601,7 +626,8 @@ private String getFooter() { /** * Sets the solution. * - * @param solution the new solution + * @param solution + * the new solution */ public void setSolution(final ParametersSet solution) { this.solution = solution; } @@ -622,7 +648,9 @@ public void writeHeaderAndClose() { break; case CSV: final StringBuilder s = new StringBuilder(loggedBatchParam.size() * 8); - for (final String var : loggedBatchParam) { s.append(var).append(','); } + for (final String var : loggedBatchParam) { + s.append(var).append(AbstractCSVManipulator.getDefaultDelimiter()); + } if (getFacet(IKeyword.DATA) != null) { s.append(getLiteral(IKeyword.DATA)); } else { diff --git a/msi.gama.core/src/msi/gama/outputs/MonitorOutput.java b/msi.gama.core/src/msi/gama/outputs/MonitorOutput.java index 156726fe99..dc98cbbdac 100644 --- a/msi.gama.core/src/msi/gama/outputs/MonitorOutput.java +++ b/msi.gama.core/src/msi/gama/outputs/MonitorOutput.java @@ -148,7 +148,8 @@ private void setColor(final IExpression facet) { /** * Sets the color. * - * @param gamaColor the new color + * @param gamaColor + * the new color */ public void setColor(final GamaColor gamaColor) { color = gamaColor; @@ -272,7 +273,7 @@ public void saveHistory() { monitorFolder + "/" + "monitor_" + getName() + "_cycle_" + getScope().getClock().getCycle() + ".csv"; file = FileUtils.constructAbsoluteFilePath(getScope(), file, false); try (final BufferedWriter bw = new BufferedWriter(new FileWriter(file)); - final CsvWriter w = new CsvWriter(bw, CsvWriter.Letters.COMMA)) { + final CsvWriter w = new CsvWriter(bw)) { for (final Object o : history) { String[] strings = null; if (o instanceof Number) { diff --git a/msi.gama.core/src/msi/gama/runtime/benchmark/BenchmarkCSVExporter.java b/msi.gama.core/src/msi/gama/runtime/benchmark/BenchmarkCSVExporter.java index 89719501b0..0ac1cd7d02 100644 --- a/msi.gama.core/src/msi/gama/runtime/benchmark/BenchmarkCSVExporter.java +++ b/msi.gama.core/src/msi/gama/runtime/benchmark/BenchmarkCSVExporter.java @@ -1,12 +1,12 @@ /******************************************************************************************************* * - * BenchmarkCSVExporter.java, in msi.gama.core, is part of the source code of the - * GAMA modeling and simulation platform (v.1.9.2). + * BenchmarkCSVExporter.java, in msi.gama.core, is part of the source code of the GAMA modeling and simulation platform + * (v.1.9.2). * * (c) 2007-2023 UMI 209 UMMISCO IRD/SU & Partners (IRIT, MIAT, TLU, CTU) * * Visit https://github.com/gama-platform/gama for license information and contacts. - * + * ********************************************************************************************************/ package msi.gama.runtime.benchmark; @@ -32,15 +32,17 @@ * The Class BenchmarkCSVExporter. */ public class BenchmarkCSVExporter { - + /** The Constant exportFolder. */ private static final String exportFolder = "benchmarks"; /** * Save. * - * @param experiment the experiment - * @param records the records + * @param experiment + * the experiment + * @param records + * the records */ public void save(final IExperimentPlan experiment, final Benchmark records) { final IScope scope = experiment.getExperimentScope(); @@ -53,8 +55,9 @@ public void save(final IExperimentPlan experiment, final Benchmark records) { return; } final IMap scopes = GamaMapFactory.wrap(Types.NO_TYPE, Types.NO_TYPE, records); - final String exportFileName = FileUtils.constructAbsoluteFilePath(scope, exportFolder + "/" - + experiment.getModel().getName() + "_benchmark_" + Instant.now().toString().replace(':', '_') + ".csv", false); + final String exportFileName = + FileUtils.constructAbsoluteFilePath(scope, exportFolder + "/" + experiment.getModel().getName() + + "_benchmark_" + Instant.now().toString().replace(':', '_') + ".csv", false); final List headers = new ArrayList<>(); final List> contents = new ArrayList<>(); @@ -64,7 +67,7 @@ public void save(final IExperimentPlan experiment, final Benchmark records) { headers.add("Invocations in " + scopeRecord); }); contents.add(headers); - records.tree.visit(Order.PRE_ORDER, (n) -> { + records.tree.visit(Order.PRE_ORDER, n -> { final IBenchmarkable r = n.getData(); final List line = new ArrayList<>(); contents.add(line); @@ -78,10 +81,7 @@ public void save(final IExperimentPlan experiment, final Benchmark records) { try (final CsvWriter writer = new CsvWriter(exportFileName)) { writer.setDelimiter(';'); - writer.setUseTextQualifier(false); - for (final List ss : contents) { - writer.writeRecord(ss.toArray(new String[ss.size()])); - } + for (final List ss : contents) { writer.writeRecord(ss.toArray(new String[ss.size()])); } } catch (final IOException e) { throw GamaRuntimeException.create(e, scope); } diff --git a/msi.gama.core/src/msi/gama/util/file/GamaCSVFile.java b/msi.gama.core/src/msi/gama/util/file/GamaCSVFile.java index 47ac38b24d..d4b58e3f67 100644 --- a/msi.gama.core/src/msi/gama/util/file/GamaCSVFile.java +++ b/msi.gama.core/src/msi/gama/util/file/GamaCSVFile.java @@ -10,11 +10,12 @@ ********************************************************************************************************/ package msi.gama.util.file; -import static org.apache.commons.lang3.StringUtils.splitByWholeSeparatorPreserveAllTokens; - +import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; +import org.apache.commons.lang3.StringUtils; + import msi.gama.common.geometry.Envelope3D; import msi.gama.metamodel.shape.GamaPoint; import msi.gama.precompiler.GamlAnnotations.doc; @@ -26,6 +27,7 @@ import msi.gama.runtime.exceptions.GamaRuntimeException; import msi.gama.util.GamaListFactory; import msi.gama.util.IList; +import msi.gama.util.file.csv.AbstractCSVManipulator.Letters; import msi.gama.util.file.csv.CsvReader; import msi.gama.util.matrix.GamaFloatMatrix; import msi.gama.util.matrix.GamaIntMatrix; @@ -55,6 +57,50 @@ @SuppressWarnings ({ "unchecked", "rawtypes" }) public class GamaCSVFile extends GamaFile, Object> implements IFieldMatrixProvider { + /** + * The Class StringAnalysis. + */ + private static class StringAnalysis { + + /** The is float. */ + boolean isFloat = true; + + /** The is int. */ + boolean isInt = true; + + /** The is number sequence. */ + boolean isNumberSequence = true; + + /** + * Instantiates a new string analysis. + * + * @param s + * the s + */ + StringAnalysis(final String s) { + + for (final char c : s.toCharArray()) { + final boolean isDigit = Character.isDigit(c); + if (!isDigit) { + if (c == '.') { + isInt = false; + } else if (Character.isLetter(c)) { + isInt = false; + isFloat = false; + isNumberSequence = false; + break; + } else if (c == Letters.COMMA || c == Letters.SEMICOLUMN || c == Letters.PIPE || c == Letters.COLUMN + || c == Letters.SLASH || Character.isWhitespace(c) || c == Letters.QUOTE) { + isInt = false; + isFloat = false; + } + } + } + if (isInt && isFloat) { isFloat = false; } + } + + } + /** * The Class CSVInfo. */ @@ -67,16 +113,13 @@ public static class CSVInfo extends GamaFileMetaData { public int rows; /** The header. */ - public boolean header; + public boolean header, atLeastOneNumber; /** The delimiter. */ public Character delimiter; - // - // /** The qualifier. */ - // public Character qualifier; /** The type. */ - public final IType type; + public IType type, firstLineType; /** The headers. */ public String[] headers; @@ -93,13 +136,119 @@ public static class CSVInfo extends GamaFileMetaData { */ public CSVInfo(final String fileName, final long modificationStamp, final String CSVsep) { super(modificationStamp); - final CsvReader.Stats s = CsvReader.getStats(fileName, CSVsep); - cols = s.cols; - rows = s.rows; - header = s.header; - delimiter = s.delimiter; - type = s.type; - headers = s.headers; + try (CsvReader reader = new CsvReader(fileName)) { + process(reader, CSVsep); + } catch (FileNotFoundException e) {} + } + + /** + * Process. + * + * @param reader + * the reader + * @param CSVsep + * the CS vsep + */ + public void process(final CsvReader reader, final String CSVsep) { + // By default now (see #3786) + // reader.setTextQualifier(AbstractCSVManipulator.getDefaultQualifier()); + boolean firstLineHasNumber = false; + try { + // firstLine + final String s = reader.skipLine(); + headers = processFirstLine(s, CSVsep); + firstLineHasNumber = atLeastOneNumber; + atLeastOneNumber = false; + reader.setDelimiter(delimiter); + // secondLine + + if (!reader.readRecord()) { + // We only have one line + type = firstLineType; + rows = 1; + } else { + // We process the second line + type = processRecord(reader.getValues()); + } + while (reader.readRecord()) { if (reader.columnsCount > cols) { cols = reader.columnsCount; } } + } catch (final IOException e) {} + if (!type.equals(firstLineType) || !firstLineHasNumber && atLeastOneNumber) { + header = true; + cols = headers.length; + } + rows = (int) reader.currentRecord + 1; + reader.close(); + } + + /** + * Process first line. + * + * @param line + * the line + * @param CSVsep + * the CS vsep + * @return the string[] + */ + private String[] processFirstLine(final String line, final String CSVsep) { + if (CSVsep != null && !CSVsep.isEmpty()) { + delimiter = CSVsep.charAt(0); + } else { + String[] s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, ","); + if (s.length != 1 + || s[0].indexOf(' ') == -1 && s[0].indexOf(';') == -1 && s[0].indexOf(Letters.TAB) == -1) { + // We are likely dealing with a unicolum file + delimiter = Letters.COMMA; + } else { + // there should be another delimiter + s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, ";"); + if (s.length == 1) { + // Try with tab + s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "" + Letters.TAB); + if (s.length == 1) { + s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "" + Letters.SPACE); + if (s.length == 1) { + delimiter = Letters.PIPE; + } else { + delimiter = Letters.SPACE; + } + } else { + delimiter = Letters.TAB; + } + } else { + delimiter = ';'; + } + } + } + final String[] s2 = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, delimiter.toString()); + firstLineType = processRecord(s2); + return s2; + } + + /** + * Process record. + * + * @param values + * the values + * @return the i type + */ + private IType processRecord(final String[] values) { + // Fix for #3294 + if (values.length > cols) { cols = values.length; } + IType temp = null; + for (final String s : values) { + final StringAnalysis sa = new StringAnalysis(s); + atLeastOneNumber = sa.isFloat || sa.isInt || sa.isNumberSequence; + if (sa.isInt) { + if (temp == null) { temp = Types.INT; } + } else if (sa.isFloat) { + if (temp == null || temp == Types.INT) { temp = Types.FLOAT; } + } else { + temp = Types.NO_TYPE; + } + } + // in case nothing has been read (i.e. empty file) + if (temp == null) { temp = Types.NO_TYPE; } + return temp; } /** @@ -117,7 +266,7 @@ public CSVInfo(final String propertyString) { delimiter = segments[4].charAt(0); type = Types.get(segments[5]); if (header) { - headers = splitByWholeSeparatorPreserveAllTokens(segments[6], SUB_DELIMITER); + headers = StringUtils.splitByWholeSeparatorPreserveAllTokens(segments[6], SUB_DELIMITER); } else { headers = new String[cols]; @@ -163,14 +312,6 @@ public String toPropertyString() { + delimiter + DELIMITER + type + (header ? DELIMITER + String.join(SUB_DELIMITER, headers) : ""); } - /** - * @param header2 - */ - public void setHeaders(final String[] newHeaders) { - header = newHeaders != null; - headers = newHeaders; - } - } /** The csv separator. */ @@ -627,7 +768,8 @@ public void forceHeader(final Boolean asBool) { * * @return the boolean */ - public Boolean hasHeader() { + public Boolean hasHeader(final IScope scope) { + fillBuffer(scope); return hasHeader == null ? false : hasHeader; } diff --git a/msi.gama.core/src/msi/gama/util/file/csv/AbstractCSVManipulator.java b/msi.gama.core/src/msi/gama/util/file/csv/AbstractCSVManipulator.java new file mode 100644 index 0000000000..0ab413c138 --- /dev/null +++ b/msi.gama.core/src/msi/gama/util/file/csv/AbstractCSVManipulator.java @@ -0,0 +1,161 @@ +/******************************************************************************************************* + * + * AbstractCSVManipulator.java, in msi.gama.core, is part of the source code of the GAMA modeling and simulation + * platform (v.1.9.2). + * + * (c) 2007-2023 UMI 209 UMMISCO IRD/SU & Partners (IRIT, MIAT, TLU, CTU) + * + * Visit https://github.com/gama-platform/gama for license information and contacts. + * + ********************************************************************************************************/ +package msi.gama.util.file.csv; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Map; + +import msi.gama.common.preferences.GamaPreferences; + +/** + * The Class AbstractCSVManipulator. + */ +public abstract class AbstractCSVManipulator implements Closeable { + + /** The replacements. */ + public static Map REPLACEMENTS = + Map.of(';', ',', ',', ';', ' ', ';', '|', ';', ':', ';', '\t', ';'); + + /** The Constant MAX_BUFFER_SIZE. */ + public static final int MAX_BUFFER_SIZE = 1024; + + /** The Constant MAX_FILE_BUFFER_SIZE. */ + public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024; + + /** The Constant INITIAL_COLUMN_COUNT. */ + public static final int INITIAL_COLUMN_COUNT = 10; + + /** The Constant INITIAL_COLUMN_BUFFER_SIZE. */ + public static final int INITIAL_COLUMN_BUFFER_SIZE = 50; + + /** + * The Class Letters. + */ + /** + * The Class Letters. + */ + public static class Letters { + + /** The Constant LF. */ + public static final char LF = '\n'; + + /** The Constant CR. */ + public static final char CR = '\r'; + + /** The Constant QUOTE. */ + public static final char QUOTE = '"'; + + /** The Constant COMMA. */ + public static final char COMMA = ','; + + /** The Constant SPACE. */ + public static final char SPACE = ' '; + + /** The Constant TAB. */ + public static final char TAB = '\t'; + + /** The Constant POUND. */ + public static final char POUND = '#'; + + /** The Constant BACKSLASH. */ + public static final char BACKSLASH = '\\'; + + /** The Constant NULL. */ + public static final char NULL = '\0'; + + /** The Constant BACKSPACE. */ + public static final char BACKSPACE = '\b'; + + /** The Constant FORM_FEED. */ + public static final char FORM_FEED = '\f'; + + /** The Constant ESCAPE. */ + public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape + + /** The Constant VERTICAL_TAB. */ + public static final char VERTICAL_TAB = '\u000B'; + + /** The Constant ALERT. */ + public static final char ALERT = '\u0007'; + + /** The Constant PIPE. */ + public static final char PIPE = '|'; + + /** The Constant SEMICOLUMN. */ + public static final char SEMICOLUMN = ';'; + + /** The Constant COLUMN. */ + public static final char COLUMN = ':'; + + /** The Constant SLASH. */ + public static final char SLASH = '/'; + } + + /** The first column. */ + protected boolean firstColumn = true; + + /** The file name. */ + protected String fileName = null; + + /** The Text qualifier. */ + public Character textQualifier = getDefaultQualifier(); + + /** The Delimiter. */ + public char delimiter = getDefaultDelimiter(); + + /** + * Gets the default delimiter. + * + * @return the default delimiter + */ + public static char getDefaultDelimiter() { + String del = GamaPreferences.External.CSV_SEPARATOR.getValue(); + if (del == null || del.isEmpty()) return Letters.COMMA; + return del.charAt(0); + } + + /** + * Gets the default qualifier. + * + * @return the default qualifier + */ + public static char getDefaultQualifier() { + String del = GamaPreferences.External.CSV_STRING_QUALIFIER.getValue(); + if (del == null || del.isEmpty()) return Letters.QUOTE; + return del.charAt(0); + } + + @Override + public abstract void close() throws IOException; + + /** + * Sets the character to use as the column delimiter. Default is comma, ','. + * + * @param delimiter + * The character to use as the column delimiter. + */ + public void setDelimiter(final char delimiter) { this.delimiter = delimiter; } + + /** + * Sets the character to use as a text qualifier in the data. + * + * @param textQualifier + * The character to use as a text qualifier in the data. + */ + public void setTextQualifier(final Character textQualifier) { this.textQualifier = textQualifier; } + + /** + * End record. + */ + public abstract void endRecord() throws IOException; + +} diff --git a/msi.gama.core/src/msi/gama/util/file/csv/CsvReader.java b/msi.gama.core/src/msi/gama/util/file/csv/CsvReader.java index 661cb2ae71..7d0416dd55 100644 --- a/msi.gama.core/src/msi/gama/util/file/csv/CsvReader.java +++ b/msi.gama.core/src/msi/gama/util/file/csv/CsvReader.java @@ -10,41 +10,30 @@ package msi.gama.util.file.csv; import java.io.BufferedReader; -import java.io.Closeable; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.Charset; -import java.text.NumberFormat; import java.util.HashMap; import org.apache.commons.lang3.StringUtils; -import msi.gaml.types.IType; -import msi.gaml.types.Types; - /** * A stream based parser for parsing delimited text data from a file or a stream. */ -@SuppressWarnings ({ "unchecked", "rawtypes" }) -public class CsvReader implements Closeable { - - /** The file name. */ - private String fileName = null; - - /** The user settings. */ - // this holds all the values for switches that the user is allowed to set - private final UserSettings userSettings = new UserSettings(); - /** The charset. */ - private Charset charset = null; +/** + * The Class CsvReader. + */ - /** The use custom record delimiter. */ - private boolean useCustomRecordDelimiter = false; +/** + * The Class CsvReader. + */ +@SuppressWarnings ({ "unchecked", "rawtypes" }) +public class CsvReader extends AbstractCSVManipulator { /** The input stream. */ private Reader inputStream = null; @@ -56,14 +45,10 @@ public class CsvReader implements Closeable { private final DataBuffer dataBuffer = new DataBuffer(); /** The column buffer. */ - private final ColumnBuffer columnBuffer = new ColumnBuffer(); - - // private final RawRecordBuffer rawBuffer = new RawRecordBuffer(); + private char[] columnBuffer = new char[INITIAL_COLUMN_BUFFER_SIZE]; - /** The is qualified. */ - private boolean[] isQualified = null; - - // private String rawRecord = ""; + /** The column buffer position. */ + private int columnBufferPosition = 0; /** The headers holder. */ HeadersHolder headersHolder; @@ -75,66 +60,20 @@ public class CsvReader implements Closeable { /** The started column. */ boolean startedColumn = false; - /** The started with qualifier. */ - boolean startedWithQualifier = false; - /** The has more data. */ boolean hasMoreData = true; - /** The last letter. */ - char lastLetter = '\0'; - /** The has read next line. */ boolean hasReadNextLine = false; /** The columns count. */ - int columnsCount = 0; + public int columnsCount = 0; /** The current record. */ - long currentRecord = 0; + public long currentRecord = 0; /** The values. */ - String[] values = new String[StaticSettings.INITIAL_COLUMN_COUNT]; - - /** The initialized. */ - boolean initialized = false; - - /** The closed. */ - boolean closed = false; - - /** - * Double up the text qualifier to represent an occurance of the text qualifier. - */ - public static final int ESCAPE_MODE_DOUBLED = 1; - - /** - * Use a backslash character before the text qualifier to represent an occurance of the text qualifier. - */ - public static final int ESCAPE_MODE_BACKSLASH = 2; - - /** - * Creates a {@link msi.gama.util.file.csv.csvreader.CsvReader CsvReader} object using a file as the data source. - * - * @param fileName - * The path to the file to use as the data source. - * @param delimiter - * The character to use as the column delimiter. - * @param charset - * The {@link java.nio.charset.Charset Charset} to use while parsing the data. - */ - public CsvReader(final String fileName, final char delimiter, final Charset charset) throws FileNotFoundException { - if (fileName == null) throw new IllegalArgumentException("Parameter fileName can not be null."); - - if (charset == null) throw new IllegalArgumentException("Parameter charset can not be null."); - - if (!new File(fileName).exists()) throw new FileNotFoundException("File " + fileName + " does not exist."); - - this.fileName = fileName; - this.userSettings.Delimiter = delimiter; - this.charset = charset; - - isQualified = new boolean[values.length]; - } + String[] values = new String[INITIAL_COLUMN_COUNT]; /** * Creates a {@link msi.gama.util.file.csv.csvreader.CsvReader CsvReader} object using a file as the data @@ -146,7 +85,10 @@ public CsvReader(final String fileName, final char delimiter, final Charset char * The character to use as the column delimiter. */ public CsvReader(final String fileName, final char delimiter) throws FileNotFoundException { - this(fileName, delimiter, Charset.forName("UTF-8")); + if (fileName == null) throw new IllegalArgumentException("Parameter fileName can not be null."); + if (!new File(fileName).exists()) throw new FileNotFoundException("File " + fileName + " does not exist."); + this.fileName = fileName; + this.delimiter = delimiter; } /** @@ -157,7 +99,7 @@ public CsvReader(final String fileName, final char delimiter) throws FileNotFoun * The path to the file to use as the data source. */ public CsvReader(final String fileName) throws FileNotFoundException { - this(fileName, Letters.COMMA); + this(fileName, getDefaultDelimiter()); } /** @@ -170,13 +112,8 @@ public CsvReader(final String fileName) throws FileNotFoundException { * The character to use as the column delimiter. */ public CsvReader(final Reader inputStream, final char delimiter) { - if (inputStream == null) throw new IllegalArgumentException("Parameter inputStream can not be null."); - this.inputStream = inputStream; - this.userSettings.Delimiter = delimiter; - initialized = true; - - isQualified = new boolean[values.length]; + this.delimiter = delimiter; } /** @@ -187,204 +124,9 @@ public CsvReader(final Reader inputStream, final char delimiter) { * The stream to use as the data source. */ public CsvReader(final Reader inputStream) { - this(inputStream, Letters.COMMA); + this(inputStream, getDefaultDelimiter()); } - /** - * Constructs a {@link msi.gama.util.file.csv.csvreader.CsvReader CsvReader} object using an - * {@link java.io.InputStream InputStream} object as the data source. Uses a comma as the column delimiter. - * - * @param inputStream - * The stream to use as the data source. - * @param charset - * The {@link java.nio.charset.Charset Charset} to use while parsing the data. - */ - public CsvReader(final InputStream inputStream, final Charset charset) { - this(new InputStreamReader(inputStream, charset)); - } - - /** - * Gets whether leading and trailing whitespace characters are being trimmed from non-textqualified column data. - * Default is true. - * - * @return Whether leading and trailing whitespace characters are being trimmed from non-textqualified column data. - */ - public boolean getTrimWhitespace() { return userSettings.TrimWhitespace; } - - /** - * Sets whether leading and trailing whitespace characters should be trimmed from non-textqualified column data or - * not. Default is true. - * - * @param trimWhitespace - * Whether leading and trailing whitespace characters should be trimmed from non-textqualified column - * data or not. - */ - public void setTrimWhitespace(final boolean trimWhitespace) { userSettings.TrimWhitespace = trimWhitespace; } - - /** - * Gets the character being used as the column delimiter. Default is comma, ','. - * - * @return The character being used as the column delimiter. - */ - public char getDelimiter() { return userSettings.Delimiter; } - - /** - * Sets the character to use as the column delimiter. Default is comma, ','. - * - * @param delimiter - * The character to use as the column delimiter. - */ - public void setDelimiter(final char delimiter) { userSettings.Delimiter = delimiter; } - - /** - * Gets the record delimiter. - * - * @return the record delimiter - */ - public char getRecordDelimiter() { return userSettings.RecordDelimiter; } - - /** - * Sets the character to use as the record delimiter. - * - * @param recordDelimiter - * The character to use as the record delimiter. Default is combination of standard end of line - * characters for Windows, Unix, or Mac. - */ - public void setRecordDelimiter(final char recordDelimiter) { - useCustomRecordDelimiter = true; - userSettings.RecordDelimiter = recordDelimiter; - } - - // /** - // * Gets the character to use as a text qualifier in the data. - // * - // * @return The character to use as a text qualifier in the data. - // */ - // public char getTextQualifier() { - // return userSettings.TextQualifier; - // } - - /** - * Sets the character to use as a text qualifier in the data. - * - * @param textQualifier - * The character to use as a text qualifier in the data. - */ - public void setTextQualifier(final Character textQualifier) { userSettings.TextQualifier = textQualifier; } - - /** - * Whether text qualifiers will be used while parsing or not. - * - * @return Whether text qualifiers will be used while parsing or not. - */ - public boolean getUseTextQualifier() { return userSettings.UseTextQualifier; } - - /** - * Sets whether text qualifiers will be used while parsing or not. - * - * @param useTextQualifier - * Whether to use a text qualifier while parsing or not. - */ - public void setUseTextQualifier(final boolean useTextQualifier) { - userSettings.UseTextQualifier = useTextQualifier; - } - - /** - * Gets the character being used as a comment signal. - * - * @return The character being used as a comment signal. - */ - public char getComment() { return userSettings.Comment; } - - /** - * Sets the character to use as a comment signal. - * - * @param comment - * The character to use as a comment signal. - */ - public void setComment(final char comment) { userSettings.Comment = comment; } - - /** - * Gets whether comments are being looked for while parsing or not. - * - * @return Whether comments are being looked for while parsing or not. - */ - public boolean getUseComments() { return userSettings.UseComments; } - - /** - * Sets whether comments are being looked for while parsing or not. - * - * @param useComments - * Whether comments are being looked for while parsing or not. - */ - public void setUseComments(final boolean useComments) { userSettings.UseComments = useComments; } - - /** - * Gets the current way to escape an occurance of the text qualifier inside qualified data. - * - * @return The current way to escape an occurance of the text qualifier inside qualified data. - */ - public int getEscapeMode() { return userSettings.EscapeMode; } - - /** - * Sets the current way to escape an occurance of the text qualifier inside qualified data. - * - * @param escapeMode - * The way to escape an occurance of the text qualifier inside qualified data. - * @exception IllegalArgumentException - * When an illegal value is specified for escapeMode. - */ - public void setEscapeMode(final int escapeMode) throws IllegalArgumentException { - if (escapeMode != ESCAPE_MODE_DOUBLED && escapeMode != ESCAPE_MODE_BACKSLASH) - throw new IllegalArgumentException("Parameter escapeMode must be a valid value."); - - userSettings.EscapeMode = escapeMode; - } - - /** - * Gets the skip empty records. - * - * @return the skip empty records - */ - public boolean getSkipEmptyRecords() { return userSettings.SkipEmptyRecords; } - - /** - * Sets the skip empty records. - * - * @param skipEmptyRecords - * the new skip empty records - */ - public void setSkipEmptyRecords(final boolean skipEmptyRecords) { - userSettings.SkipEmptyRecords = skipEmptyRecords; - } - - /** - * Safety caution to prevent the parser from using large amounts of memory in the case where parsing settings like - * file encodings don't end up matching the actual format of a file. This switch can be turned off if the file - * format is known and tested. With the switch off, the max column lengths and max column count per record supported - * by the parser will greatly increase. Default is true. - * - * @return The current setting of the safety switch. - */ - public boolean getSafetySwitch() { return userSettings.SafetySwitch; } - - /** - * Safety caution to prevent the parser from using large amounts of memory in the case where parsing settings like - * file encodings don't end up matching the actual format of a file. This switch can be turned off if the file - * format is known and tested. With the switch off, the max column lengths and max column count per record supported - * by the parser will greatly increase. Default is true. - * - * @param safetySwitch - */ - public void setSafetySwitch(final boolean safetySwitch) { userSettings.SafetySwitch = safetySwitch; } - - /** - * Gets the count of columns found in this record. - * - * @return The count of columns found in this record. - */ - public int getColumnCount() { return columnsCount; } - /** * Gets the index of the current record. * @@ -399,7 +141,7 @@ public void setSkipEmptyRecords(final boolean skipEmptyRecords) { * @return The count of headers read in by a previous call to * {@link msi.gama.util.file.csv.csvreader.CsvReader#readHeaders readHeaders()}. */ - public int getHeaderCount() { return headersHolder == null ? 0 : headersHolder.Length(); } + public int getHeaderCount() { return headersHolder == null ? 0 : headersHolder.names.length; } /** * Returns the header values as a string array. @@ -408,15 +150,13 @@ public void setSkipEmptyRecords(final boolean skipEmptyRecords) { * @exception IOException * Thrown if this object has already been closed. */ - public String[] getHeaders() throws IOException { - checkClosed(); - - if (headersHolder == null || headersHolder.Headers() == null) return null; + public String[] getHeaders() { + if (headersHolder == null || headersHolder.names() == null) return null; // use clone here to prevent the outside code from // setting values on the array directly, which would // throw off the index lookup based on header name - final String[] clone = new String[headersHolder.Length()]; - System.arraycopy(headersHolder.Headers, 0, clone, 0, headersHolder.Length()); + final String[] clone = new String[headersHolder.names.length]; + System.arraycopy(headersHolder.names, 0, clone, 0, headersHolder.names.length); return clone; } @@ -427,9 +167,7 @@ public String[] getHeaders() throws IOException { * @throws IOException * Signals that an I/O exception has occurred. */ - public String[] getValues() throws IOException { - checkClosed(); - + public String[] getValues() { // need to return a clone, and can't use clone because values.Length // might be greater than columnsCount final String[] clone = new String[columnsCount]; @@ -437,239 +175,6 @@ public String[] getValues() throws IOException { return clone; } - /** - * Returns the current column value for a given column index. - * - * @param columnIndex - * The index of the column. - * @return The current column value. - * @exception IOException - * Thrown if this object has already been closed. - */ - public String get(final int columnIndex) throws IOException { - checkClosed(); - - if (columnIndex > -1 && columnIndex < columnsCount) return values[columnIndex]; - return ""; - } - - /** - * The Class Stats. - */ - public static class Stats { - - /** The delimiter. */ - public Character delimiter; - - /** The header. */ - public boolean header; - - /** The rows. */ - public int rows; - - /** The cols. */ - public int cols; - - /** The type. */ - public IType type = Types.NO_TYPE; - - /** The headers. */ - public String[] headers = null; - - // /** The qualifier. */ - // public Character qualifier; - - /** The first line type. */ - private IType firstLineType = Types.NO_TYPE; - - /** The at least one number. */ - private boolean atLeastOneNumber; - - /** - * Instantiates a new stats. - * - * @param reader - * the reader - * @param CSVsep - * the CS vsep - */ - Stats(final CsvReader reader, final String CSVsep) { - // By default now (see #3786) - reader.setTextQualifier('"'); - boolean firstLineHasNumber = false; - // String[] possibleHeaders = null; - try { - // firstLine - final String s = reader.skipLine(); - headers = processFirstLine(s, CSVsep); - firstLineHasNumber = atLeastOneNumber; - atLeastOneNumber = false; - reader.setDelimiter(delimiter); - // secondLine - - if (!reader.readRecord()) { - // We only have one line - type = firstLineType; - rows = 1; - } else { - // We process the second line - type = processRecord(reader.getValues()); - } - while (reader.readRecord()) { if (reader.columnsCount > cols) { cols = reader.columnsCount; } } - } catch (final IOException e) {} - if (!type.equals(firstLineType) || !firstLineHasNumber && atLeastOneNumber) { - header = true; - cols = headers.length; - } - // if ( header ) { - // headers = possibleHeaders; - // } - rows = (int) reader.currentRecord + 1; - reader.close(); - // log(); - } - - /** - * Process first line. - * - * @param line - * the line - * @param CSVsep - * the CS vsep - * @return the string[] - */ - private String[] processFirstLine(final String line, final String CSVsep) { - if (CSVsep != null && !CSVsep.isEmpty()) { - delimiter = CSVsep.charAt(0); - } else { - String[] s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, ","); - if (s.length != 1 - || s[0].indexOf(' ') == -1 && s[0].indexOf(';') == -1 && s[0].indexOf(Letters.TAB) == -1) { - // We are likely dealing with a unicolum file - delimiter = Letters.COMMA; - } else { - // there should be another delimiter - s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, ";"); - if (s.length == 1) { - // Try with tab - s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "" + Letters.TAB); - if (s.length == 1) { - s = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, "" + Letters.SPACE); - if (s.length == 1) { - delimiter = Letters.PIPE; - } else { - delimiter = Letters.SPACE; - } - } else { - delimiter = Letters.TAB; - } - } else { - delimiter = ';'; - } - } - } - final String[] s2 = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, delimiter.toString()); - firstLineType = processRecord(s2); - return s2; - } - - /** - * The Class StringAnalysis. - */ - private static class StringAnalysis { - - /** The is float. */ - boolean isFloat = true; - - /** The is int. */ - boolean isInt = true; - - /** The is number sequence. */ - boolean isNumberSequence = true; - - /** - * Instantiates a new string analysis. - * - * @param s - * the s - */ - StringAnalysis(final String s) { - - for (final char c : s.toCharArray()) { - final boolean isDigit = Character.isDigit(c); - if (!isDigit) { - if (c == '.') { - isInt = false; - } else if (Character.isLetter(c)) { - isInt = false; - isFloat = false; - isNumberSequence = false; - break; - } else if (c == ',' || c == ';' || c == '|' || c == ':' || c == '/' - || Character.isWhitespace(c)) { - isInt = false; - isFloat = false; - } - } - } - if (isInt && isFloat) { isFloat = false; } - } - - } - - /** - * Process record. - * - * @param values - * the values - * @return the i type - */ - private IType processRecord(final String[] values) { - // Fix for #3294 - if (values.length > cols) { cols = values.length; } - IType temp = null; - for (final String s : values) { - final StringAnalysis sa = new StringAnalysis(s); - atLeastOneNumber = sa.isFloat || sa.isInt || sa.isNumberSequence; - if (sa.isInt) { - if (temp == null) { temp = Types.INT; } - } else if (sa.isFloat) { - if (temp == null || temp == Types.INT) { temp = Types.FLOAT; } - } else { - temp = Types.NO_TYPE; - } - - } - // in case nothing has been read (i.e. empty file) - if (temp == null) { temp = Types.NO_TYPE; } - return temp; - } - - // private void log() { - // DEBUG.LOG("CSV stats: #rows " + rows + " #cols " + cols + " - // type " + type + " parsed in " - // + (System.currentTimeMillis() - startTime) + " ms"); - // } - } - - /** - * Gets the stats. - * - * @param initial - * the initial - * @param CSVsep - * the CS vsep - * @return the stats - */ - public static Stats getStats(final String initial, final String CSVsep) { - try { - return new Stats(new CsvReader(initial), CSVsep); - } catch (final FileNotFoundException e1) { - return null; - } - - } - /** * Reads another record. * @@ -678,521 +183,136 @@ public static Stats getStats(final String initial, final String CSVsep) { * Thrown if an error occurs while reading data from the source stream. */ public boolean readRecord() throws IOException { - checkClosed(); - - columnsCount = 0; - // rawBuffer.Position = 0; - - // dataBuffer.LineStart = dataBuffer.Position; - + this.columnsCount = 0; hasReadNextLine = false; - + boolean startedWithQualifier = false; + char lastLetter = '\0'; // check to see if we've already found the end of data - if (hasMoreData) { // loop over the data stream until the end of data is found // or the end of the record is found - do { - if (dataBuffer.Position == dataBuffer.Count) { + if (dataBuffer.position == dataBuffer.count) { checkDataLength(); } else { startedWithQualifier = false; - // grab the current letter as a char - - char currentLetter = dataBuffer.Buffer[dataBuffer.Position]; - - if (userSettings.UseTextQualifier && userSettings.TextQualifier != null - && currentLetter == userSettings.TextQualifier.charValue()) { + char currentLetter = dataBuffer.buffer[dataBuffer.position]; + if (textQualifier != null && currentLetter == textQualifier.charValue()) { // this will be a text qualified column, so // we need to set startedWithQualifier to make it // enter the seperate branch to handle text // qualified columns - lastLetter = currentLetter; - // read qualified startedColumn = true; - dataBuffer.ColumnStart = dataBuffer.Position + 1; + dataBuffer.columnStart = dataBuffer.position + 1; startedWithQualifier = true; boolean lastLetterWasQualifier = false; - - char escapeChar = userSettings.TextQualifier; - - if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) { escapeChar = Letters.BACKSLASH; } - boolean eatingTrailingJunk = false; boolean lastLetterWasEscape = false; - boolean readingComplexEscape = false; - int escape = ComplexEscape.UNICODE; - int escapeLength = 0; - char escapeValue = (char) 0; - - dataBuffer.Position++; - + dataBuffer.position++; do { - if (dataBuffer.Position == dataBuffer.Count) { + if (dataBuffer.position == dataBuffer.count) { checkDataLength(); } else { // grab the current letter as a char - - currentLetter = dataBuffer.Buffer[dataBuffer.Position]; - + currentLetter = dataBuffer.buffer[dataBuffer.position]; if (eatingTrailingJunk) { - dataBuffer.ColumnStart = dataBuffer.Position + 1; - - if (currentLetter == userSettings.Delimiter) { - endColumn(); - } else if (useCustomRecordDelimiter ? currentLetter == userSettings.RecordDelimiter - : currentLetter == Letters.CR || currentLetter == Letters.LF) { - endColumn(); + dataBuffer.columnStart = dataBuffer.position + 1; + if (currentLetter == delimiter) { + endColumn(startedWithQualifier); + } else if (currentLetter == Letters.CR || currentLetter == Letters.LF) { + endColumn(startedWithQualifier); endRecord(); } - } else if (readingComplexEscape) { - escapeLength++; - - switch (escape) { - case ComplexEscape.UNICODE: - escapeValue *= (char) 16; - escapeValue += hexToDec(currentLetter); - - if (escapeLength == 4) { readingComplexEscape = false; } - - break; - case ComplexEscape.OCTAL: - escapeValue *= (char) 8; - escapeValue += (char) (currentLetter - '0'); - - if (escapeLength == 3) { readingComplexEscape = false; } - - break; - case ComplexEscape.DECIMAL: - escapeValue *= (char) 10; - escapeValue += (char) (currentLetter - '0'); - - if (escapeLength == 3) { readingComplexEscape = false; } - - break; - case ComplexEscape.HEX: - escapeValue *= (char) 16; - escapeValue += hexToDec(currentLetter); - - if (escapeLength == 2) { readingComplexEscape = false; } - - break; - } - - if (!readingComplexEscape) { - appendLetter(escapeValue); - } else { - dataBuffer.ColumnStart = dataBuffer.Position + 1; - } - } else if (currentLetter == userSettings.TextQualifier.charValue()) { + } else if (currentLetter == textQualifier.charValue()) { if (lastLetterWasEscape) { lastLetterWasEscape = false; lastLetterWasQualifier = false; } else { updateCurrentValue(); - - if (userSettings.EscapeMode == ESCAPE_MODE_DOUBLED) { - lastLetterWasEscape = true; - } - + lastLetterWasEscape = true; lastLetterWasQualifier = true; } - } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH && lastLetterWasEscape) { - switch (currentLetter) { - case 'n': - appendLetter(Letters.LF); - break; - case 'r': - appendLetter(Letters.CR); - break; - case 't': - appendLetter(Letters.TAB); - break; - case 'b': - appendLetter(Letters.BACKSPACE); - break; - case 'f': - appendLetter(Letters.FORM_FEED); - break; - case 'e': - appendLetter(Letters.ESCAPE); - break; - case 'v': - appendLetter(Letters.VERTICAL_TAB); - break; - case 'a': - appendLetter(Letters.ALERT); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - escape = ComplexEscape.OCTAL; - readingComplexEscape = true; - escapeLength = 1; - escapeValue = (char) (currentLetter - '0'); - dataBuffer.ColumnStart = dataBuffer.Position + 1; - break; - case 'u': - case 'x': - case 'o': - case 'd': - case 'U': - case 'X': - case 'O': - case 'D': - switch (currentLetter) { - case 'u': - case 'U': - escape = ComplexEscape.UNICODE; - break; - case 'x': - case 'X': - escape = ComplexEscape.HEX; - break; - case 'o': - case 'O': - escape = ComplexEscape.OCTAL; - break; - case 'd': - case 'D': - escape = ComplexEscape.DECIMAL; - break; - } - - readingComplexEscape = true; - escapeLength = 0; - escapeValue = (char) 0; - dataBuffer.ColumnStart = dataBuffer.Position + 1; - - break; - default: - break; - } - - lastLetterWasEscape = false; - - // can only happen for ESCAPE_MODE_BACKSLASH - } else if (currentLetter == escapeChar) { - updateCurrentValue(); - lastLetterWasEscape = true; } else if (lastLetterWasQualifier) { - if (currentLetter == userSettings.Delimiter) { - endColumn(); - } else if (useCustomRecordDelimiter ? currentLetter == userSettings.RecordDelimiter - : currentLetter == Letters.CR || currentLetter == Letters.LF) { - endColumn(); - + if (currentLetter == delimiter) { + endColumn(startedWithQualifier); + } else if (currentLetter == Letters.CR || currentLetter == Letters.LF) { + endColumn(startedWithQualifier); endRecord(); } else { - dataBuffer.ColumnStart = dataBuffer.Position + 1; - + dataBuffer.columnStart = dataBuffer.position + 1; eatingTrailingJunk = true; } - // make sure to clear the flag for next // run of the loop - lastLetterWasQualifier = false; } - // keep track of the last letter because we need // it for several key decisions - lastLetter = currentLetter; - - if (startedColumn) { - dataBuffer.Position++; - - if (userSettings.SafetySwitch && dataBuffer.Position - dataBuffer.ColumnStart - + columnBuffer.Position > 100000) { - close(); - - throw new IOException("Maximum column length of 100,000 exceeded in column " - + NumberFormat.getIntegerInstance().format(columnsCount) + " in record " - + NumberFormat.getIntegerInstance().format(currentRecord) - + ". Set the SafetySwitch property to false" - + " if you're expecting column lengths greater than 100,000 characters to" - + " avoid this error."); - } - } + if (startedColumn) { dataBuffer.position++; } } // end else - } while (hasMoreData && startedColumn); - } else if (currentLetter == userSettings.Delimiter) { + } else if (currentLetter == delimiter) { // we encountered a column with no data, so // just send the end column - lastLetter = currentLetter; - - endColumn(); - } else if (useCustomRecordDelimiter && currentLetter == userSettings.RecordDelimiter) { + endColumn(startedWithQualifier); + } else if (currentLetter == Letters.CR || currentLetter == Letters.LF) { // this will skip blank lines - if (startedColumn || columnsCount > 0 || !userSettings.SkipEmptyRecords) { - endColumn(); - + if (startedColumn || columnsCount > 0) { + endColumn(startedWithQualifier); endRecord(); - } else { - // dataBuffer.LineStart = dataBuffer.Position + 1; } - lastLetter = currentLetter; - } else if (!useCustomRecordDelimiter - && (currentLetter == Letters.CR || currentLetter == Letters.LF)) { - // this will skip blank lines - if (startedColumn || columnsCount > 0 || !userSettings.SkipEmptyRecords - && (currentLetter == Letters.CR || lastLetter != Letters.CR)) { - endColumn(); - - endRecord(); - } else { - // dataBuffer.LineStart = dataBuffer.Position + 1; - } - - lastLetter = currentLetter; - } else if (userSettings.UseComments && columnsCount == 0 && currentLetter == userSettings.Comment) { - // encountered a comment character at the beginning of - // the line so just ignore the rest of the line - - lastLetter = currentLetter; - - skipLine(); - } else if (userSettings.TrimWhitespace - && (currentLetter == Letters.SPACE || currentLetter == Letters.TAB)) { + } else if (currentLetter == Letters.SPACE || currentLetter == Letters.TAB) { // do nothing, this will trim leading whitespace // for both text qualified columns and non - startedColumn = true; - dataBuffer.ColumnStart = dataBuffer.Position + 1; + dataBuffer.columnStart = dataBuffer.position + 1; } else { // since the letter wasn't a special letter, this // will be the first letter of our current column - startedColumn = true; - dataBuffer.ColumnStart = dataBuffer.Position; - boolean lastLetterWasBackslash = false; - boolean readingComplexEscape = false; - int escape = ComplexEscape.UNICODE; - int escapeLength = 0; - char escapeValue = (char) 0; - + dataBuffer.columnStart = dataBuffer.position; boolean firstLoop = true; - do { - if (!firstLoop && dataBuffer.Position == dataBuffer.Count) { + if (!firstLoop && dataBuffer.position == dataBuffer.count) { checkDataLength(); } else { if (!firstLoop) { // grab the current letter as a char - currentLetter = dataBuffer.Buffer[dataBuffer.Position]; + currentLetter = dataBuffer.buffer[dataBuffer.position]; } - - if (!userSettings.UseTextQualifier && userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH - && currentLetter == Letters.BACKSLASH) { - if (lastLetterWasBackslash) { - lastLetterWasBackslash = false; - } else { - updateCurrentValue(); - lastLetterWasBackslash = true; - } - } else if (readingComplexEscape) { - escapeLength++; - - switch (escape) { - case ComplexEscape.UNICODE: - escapeValue *= (char) 16; - escapeValue += hexToDec(currentLetter); - - if (escapeLength == 4) { readingComplexEscape = false; } - - break; - case ComplexEscape.OCTAL: - escapeValue *= (char) 8; - escapeValue += (char) (currentLetter - '0'); - - if (escapeLength == 3) { readingComplexEscape = false; } - - break; - case ComplexEscape.DECIMAL: - escapeValue *= (char) 10; - escapeValue += (char) (currentLetter - '0'); - - if (escapeLength == 3) { readingComplexEscape = false; } - - break; - case ComplexEscape.HEX: - escapeValue *= (char) 16; - escapeValue += hexToDec(currentLetter); - - if (escapeLength == 2) { readingComplexEscape = false; } - - break; - } - - if (!readingComplexEscape) { - appendLetter(escapeValue); - } else { - dataBuffer.ColumnStart = dataBuffer.Position + 1; - } - } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH && lastLetterWasBackslash) { - switch (currentLetter) { - case 'n': - appendLetter(Letters.LF); - break; - case 'r': - appendLetter(Letters.CR); - break; - case 't': - appendLetter(Letters.TAB); - break; - case 'b': - appendLetter(Letters.BACKSPACE); - break; - case 'f': - appendLetter(Letters.FORM_FEED); - break; - case 'e': - appendLetter(Letters.ESCAPE); - break; - case 'v': - appendLetter(Letters.VERTICAL_TAB); - break; - case 'a': - appendLetter(Letters.ALERT); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - escape = ComplexEscape.OCTAL; - readingComplexEscape = true; - escapeLength = 1; - escapeValue = (char) (currentLetter - '0'); - dataBuffer.ColumnStart = dataBuffer.Position + 1; - break; - case 'u': - case 'x': - case 'o': - case 'd': - case 'U': - case 'X': - case 'O': - case 'D': - switch (currentLetter) { - case 'u': - case 'U': - escape = ComplexEscape.UNICODE; - break; - case 'x': - case 'X': - escape = ComplexEscape.HEX; - break; - case 'o': - case 'O': - escape = ComplexEscape.OCTAL; - break; - case 'd': - case 'D': - escape = ComplexEscape.DECIMAL; - break; - } - - readingComplexEscape = true; - escapeLength = 0; - escapeValue = (char) 0; - dataBuffer.ColumnStart = dataBuffer.Position + 1; - - break; - default: - break; - } - - lastLetterWasBackslash = false; - } else if (currentLetter == userSettings.Delimiter) { - endColumn(); - } else if (useCustomRecordDelimiter ? currentLetter == userSettings.RecordDelimiter - : currentLetter == Letters.CR || currentLetter == Letters.LF) { - endColumn(); - + if (currentLetter == delimiter) { + endColumn(startedWithQualifier); + } else if (currentLetter == Letters.CR || currentLetter == Letters.LF) { + endColumn(startedWithQualifier); endRecord(); } - - // keep track of the last letter because we need - // it for several key decisions - lastLetter = currentLetter; firstLoop = false; - - if (startedColumn) { - dataBuffer.Position++; - - if (userSettings.SafetySwitch && dataBuffer.Position - dataBuffer.ColumnStart - + columnBuffer.Position > 100000) { - close(); - - throw new IOException("Maximum column length of 100,000 exceeded in column " - + NumberFormat.getIntegerInstance().format(columnsCount) + " in record " - + NumberFormat.getIntegerInstance().format(currentRecord) - + ". Set the SafetySwitch property to false" - + " if you're expecting column lengths greater than 100,000 characters to" - + " avoid this error."); - } - } + if (startedColumn) { dataBuffer.position++; } } // end else } while (hasMoreData && startedColumn); } - if (hasMoreData) { dataBuffer.Position++; } + if (hasMoreData) { dataBuffer.position++; } } // end else } while (hasMoreData && !hasReadNextLine); // check to see if we hit the end of the file // without processing the current record - if (startedColumn || lastLetter == userSettings.Delimiter) { - endColumn(); - + if (startedColumn || lastLetter == delimiter) { + endColumn(startedWithQualifier); endRecord(); } } - // if ( userSettings.CaptureRawRecord ) { - // if ( hasMoreData ) { - // if ( rawBuffer.Position == 0 ) { - // rawRecord = - // new String(dataBuffer.Buffer, dataBuffer.LineStart, - // dataBuffer.Position - dataBuffer.LineStart - - // 1); - // } else { - // rawRecord = - // new String(rawBuffer.Buffer, 0, rawBuffer.Position) + - // new String(dataBuffer.Buffer, dataBuffer.LineStart, - // dataBuffer.Position - - // dataBuffer.LineStart - 1); - // } - // } else { - // // for hasMoreData to ever be false, all data would have had to - // // have been - // // copied to the raw buffer - // rawRecord = new String(rawBuffer.Buffer, 0, rawBuffer.Position); - // } - // } else { - // rawRecord = ""; - // } - return hasReadNextLine; } @@ -1201,56 +321,24 @@ public boolean readRecord() throws IOException { * Thrown if an error occurs while reading data from the source stream. */ private void checkDataLength() throws IOException { - if (!initialized) { - if (fileName != null) { - inputStream = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charset), - StaticSettings.MAX_FILE_BUFFER_SIZE); - } - - charset = null; - initialized = true; + if (inputStream == null && fileName != null) { + inputStream = + new BufferedReader(new InputStreamReader(new FileInputStream(fileName), Charset.forName("UTF-8")), + MAX_FILE_BUFFER_SIZE); } - updateCurrentValue(); - - // if ( userSettings.CaptureRawRecord && dataBuffer.Count > 0 ) { - // if ( rawBuffer.Buffer.length - rawBuffer.Position < dataBuffer.Count - // - dataBuffer.LineStart ) { - // int newLength = - // rawBuffer.Buffer.length + - // FastMath.max(dataBuffer.Count - dataBuffer.LineStart, - // rawBuffer.Buffer.length); - // - // char[] holder = new char[newLength]; - // - // System.arraycopy(rawBuffer.Buffer, 0, holder, 0, rawBuffer.Position); - // - // rawBuffer.Buffer = holder; - // } - // - // System.arraycopy(dataBuffer.Buffer, dataBuffer.LineStart, - // rawBuffer.Buffer, rawBuffer.Position, - // dataBuffer.Count - dataBuffer.LineStart); - // - // rawBuffer.Position += dataBuffer.Count - dataBuffer.LineStart; - // } - try { - dataBuffer.Count = inputStream.read(dataBuffer.Buffer, 0, dataBuffer.Buffer.length); + dataBuffer.count = inputStream.read(dataBuffer.buffer, 0, dataBuffer.buffer.length); } catch (final IOException ex) { close(); - throw ex; } // if no more data could be found, set flag stating that // the end of the data was found - - if (dataBuffer.Count == -1) { hasMoreData = false; } - - dataBuffer.Position = 0; - // dataBuffer.LineStart = 0; - dataBuffer.ColumnStart = 0; + if (dataBuffer.count == -1) { hasMoreData = false; } + dataBuffer.position = 0; + dataBuffer.columnStart = 0; } /** @@ -1262,167 +350,97 @@ private void checkDataLength() throws IOException { */ public boolean readHeaders() throws IOException { final boolean result = readRecord(); - // copy the header data from the column array // to the header string array - String[] headers = new String[columnsCount]; HashMap indexByName = new HashMap(); - for (int i = 0; i < columnsCount; i++) { - final String columnValue = get(i); - + final String columnValue = values[i]; headers[i] = columnValue; - // if there are duplicate header names, we will save the last one indexByName.put(columnValue, i); } - headersHolder = new HeadersHolder(headers, columnsCount, indexByName); + headersHolder = new HeadersHolder(headers, indexByName); if (result) { currentRecord--; } - - columnsCount = 0; - + this.columnsCount = 0; return result; } /** - * @exception IOException - * Thrown if a very rare extreme exception occurs during parsing, normally resulting from improper - * data format. + * End column. + * + * @param startedWithQualifier + * the started with qualifier */ - private void endColumn() throws IOException { + private void endColumn(final boolean startedWithQualifier) { String currentValue = ""; - // must be called before setting startedColumn = false if (startedColumn) { - if (columnBuffer.Position == 0) { - if (dataBuffer.ColumnStart < dataBuffer.Position) { - int lastLetter = dataBuffer.Position - 1; - - if (userSettings.TrimWhitespace && !startedWithQualifier) { - while (lastLetter >= dataBuffer.ColumnStart && (dataBuffer.Buffer[lastLetter] == Letters.SPACE - || dataBuffer.Buffer[lastLetter] == Letters.TAB)) { + if (columnBufferPosition == 0) { + if (dataBuffer.columnStart < dataBuffer.position) { + int lastLetter = dataBuffer.position - 1; + if (!startedWithQualifier) { + while (lastLetter >= dataBuffer.columnStart && (dataBuffer.buffer[lastLetter] == Letters.SPACE + || dataBuffer.buffer[lastLetter] == Letters.TAB)) { lastLetter--; } } - - currentValue = new String(dataBuffer.Buffer, dataBuffer.ColumnStart, - lastLetter - dataBuffer.ColumnStart + 1); + currentValue = new String(dataBuffer.buffer, dataBuffer.columnStart, + lastLetter - dataBuffer.columnStart + 1); } } else { updateCurrentValue(); - - int lastLetter = columnBuffer.Position - 1; - - if (userSettings.TrimWhitespace && !startedWithQualifier) { - while (lastLetter >= 0 && (columnBuffer.Buffer[lastLetter] == Letters.SPACE - || columnBuffer.Buffer[lastLetter] == Letters.TAB)) { + int lastLetter = columnBufferPosition - 1; + if (!startedWithQualifier) { + while (lastLetter >= 0 + && (columnBuffer[lastLetter] == Letters.SPACE || columnBuffer[lastLetter] == Letters.TAB)) { lastLetter--; } } - - currentValue = new String(columnBuffer.Buffer, 0, lastLetter + 1); + currentValue = new String(columnBuffer, 0, lastLetter + 1); } } - columnBuffer.Position = 0; - + columnBufferPosition = 0; startedColumn = false; - - if (columnsCount >= 100000 && userSettings.SafetySwitch) { - close(); - - throw new IOException("Maximum column count of 100,000 exceeded in record " - + NumberFormat.getIntegerInstance().format(currentRecord) - + ". Set the SafetySwitch property to false" - + " if you're expecting more than 100,000 columns per record to" + " avoid this error."); - } - // check to see if our current holder array for // column chunks is still big enough to handle another // column chunk - if (columnsCount == values.length) { // holder array needs to grow to be able to hold another column final int newLength = values.length * 2; - final String[] holder = new String[newLength]; - System.arraycopy(values, 0, holder, 0, values.length); - values = holder; - - final boolean[] qualifiedHolder = new boolean[newLength]; - - System.arraycopy(isQualified, 0, qualifiedHolder, 0, isQualified.length); - - isQualified = qualifiedHolder; } - values[columnsCount] = StringUtils.trimToEmpty(currentValue); - - isQualified[columnsCount] = startedWithQualifier; - currentValue = ""; - - columnsCount++; - } - - /** - * Append letter. - * - * @param letter - * the letter - */ - private void appendLetter(final char letter) { - if (columnBuffer.Position == columnBuffer.Buffer.length) { - final int newLength = columnBuffer.Buffer.length * 2; - - final char[] holder = new char[newLength]; - - System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position); - - columnBuffer.Buffer = holder; - } - columnBuffer.Buffer[columnBuffer.Position++] = letter; - dataBuffer.ColumnStart = dataBuffer.Position + 1; + this.columnsCount = columnsCount + 1; } /** * Update current value. */ private void updateCurrentValue() { - if (startedColumn && dataBuffer.ColumnStart < dataBuffer.Position) { - if (columnBuffer.Buffer.length - columnBuffer.Position < dataBuffer.Position - dataBuffer.ColumnStart) { - final int newLength = columnBuffer.Buffer.length - + Math.max(dataBuffer.Position - dataBuffer.ColumnStart, columnBuffer.Buffer.length); - + if (startedColumn && dataBuffer.columnStart < dataBuffer.position) { + if (columnBuffer.length - columnBufferPosition < dataBuffer.position - dataBuffer.columnStart) { + final int newLength = columnBuffer.length + + Math.max(dataBuffer.position - dataBuffer.columnStart, columnBuffer.length); final char[] holder = new char[newLength]; - - System.arraycopy(columnBuffer.Buffer, 0, holder, 0, columnBuffer.Position); - - columnBuffer.Buffer = holder; + System.arraycopy(columnBuffer, 0, holder, 0, columnBufferPosition); + columnBuffer = holder; } - - System.arraycopy(dataBuffer.Buffer, dataBuffer.ColumnStart, columnBuffer.Buffer, columnBuffer.Position, - dataBuffer.Position - dataBuffer.ColumnStart); - - columnBuffer.Position += dataBuffer.Position - dataBuffer.ColumnStart; + System.arraycopy(dataBuffer.buffer, dataBuffer.columnStart, columnBuffer, columnBufferPosition, + dataBuffer.position - dataBuffer.columnStart); + columnBufferPosition += dataBuffer.position - dataBuffer.columnStart; } - - dataBuffer.ColumnStart = dataBuffer.Position + 1; + dataBuffer.columnStart = dataBuffer.position + 1; } - /** - * @exception IOException - * Thrown if an error occurs while reading data from the source stream. - */ - private void endRecord() throws IOException { - // this flag is used as a loop exit condition - // during parsing - + @Override + public void endRecord() { + // this flag is used as a loop exit condition during parsing hasReadNextLine = true; - currentRecord++; } @@ -1436,10 +454,8 @@ private void endRecord() throws IOException { * Thrown if this object has already been closed. */ public int getIndex(final String headerName) throws IOException { - checkClosed(); if (headersHolder == null) return -1; - final Object indexValue = headersHolder.IndexByName.get(headerName); - + final Object indexValue = headersHolder.indexes.get(headerName); if (indexValue != null) return (Integer) indexValue; return -1; } @@ -1453,50 +469,29 @@ public int getIndex(final String headerName) throws IOException { * Thrown if an error occurs while reading data from the source stream. */ public String skipLine() throws IOException { - checkClosed(); - // clear public column values for current line - - columnsCount = 0; - + this.columnsCount = 0; StringBuilder skippedLine = new StringBuilder(); - if (hasMoreData) { boolean foundEol = false; do { - if (dataBuffer.Position == dataBuffer.Count) { + if (dataBuffer.position == dataBuffer.count) { checkDataLength(); } else { - // skippedLine = true; - // grab the current letter as a char - - final char currentLetter = dataBuffer.Buffer[dataBuffer.Position]; - + final char currentLetter = dataBuffer.buffer[dataBuffer.position]; if (currentLetter == Letters.CR || currentLetter == Letters.LF) { foundEol = true; } - // keep track of the last letter because we need // it for several key decisions - - lastLetter = currentLetter; - if (!foundEol) { - skippedLine.append(lastLetter); - dataBuffer.Position++; + skippedLine.append(currentLetter); + dataBuffer.position++; } - } // end else } while (hasMoreData && !foundEol); - - columnBuffer.Position = 0; - - // dataBuffer.LineStart = dataBuffer.Position + 1; + columnBufferPosition = 0; } - - // rawBuffer.Position = 0; - // rawRecord = ""; - return skippedLine.toString(); } @@ -1505,90 +500,14 @@ public String skipLine() throws IOException { */ @Override public void close() { - if (!closed) { - close(true); - - closed = true; - } - } - - /** - * - */ - private void close(final boolean closing) { - if (!closed) { - if (closing) { - charset = null; - dataBuffer.Buffer = null; - columnBuffer.Buffer = null; - // rawBuffer.Buffer = null; - } - + if (inputStream != null) { + dataBuffer.buffer = null; + columnBuffer = null; try { - if (initialized) { inputStream.close(); } - } catch (final Exception e) { - // just eat the exception - } - + if (inputStream != null) { inputStream.close(); } + } catch (final Exception e) {} inputStream = null; - - closed = true; - } - } - - /** - * @exception IOException - * Thrown if this object has already been closed. - */ - private void checkClosed() throws IOException { - if (closed) throw new IOException("This instance of the CsvReader class has already been closed."); - } - - /** - * - */ - @Override - protected void finalize() { - close(false); - } - - /** - * The Class ComplexEscape. - */ - private static class ComplexEscape { - - /** The Constant UNICODE. */ - private static final int UNICODE = 1; - - /** The Constant OCTAL. */ - private static final int OCTAL = 2; - - /** The Constant DECIMAL. */ - private static final int DECIMAL = 3; - - /** The Constant HEX. */ - private static final int HEX = 4; - } - - /** - * Hex to dec. - * - * @param hex - * the hex - * @return the char - */ - private static char hexToDec(final char hex) { - char result; - - if (hex >= 'a') { - result = (char) (hex - 'a' + 10); - } else if (hex >= 'A') { - result = (char) (hex - 'A' + 10); - } else { - result = (char) (hex - '0'); } - - return result; } /** @@ -1597,194 +516,28 @@ private static char hexToDec(final char hex) { private static class DataBuffer { /** The Buffer. */ - public char[] Buffer; + public char[] buffer = new char[MAX_BUFFER_SIZE]; /** The Position. */ - public int Position; - - // / - // / How much usable data has been read into the stream, - // / which will not always be as long as Buffer.Length. - /** The Count. */ - // / - public int Count; - - // / - // / The position of the cursor in the buffer when the - // / current column was started or the last time data - // / was moved out to the column buffer. - /** The Column start. */ - // / - public int ColumnStart; - - // public int LineStart; + public int position; /** - * Instantiates a new data buffer. + * The Count. How much usable data has been read into the stream, which will not always be as long as + * Buffer.Length. */ - public DataBuffer() { - Buffer = new char[StaticSettings.MAX_BUFFER_SIZE]; - Position = 0; - Count = 0; - ColumnStart = 0; - // LineStart = 0; - } - } - - /** - * The Class ColumnBuffer. - */ - private static class ColumnBuffer { - - /** The Buffer. */ - public char[] Buffer; - - /** The Position. */ - public int Position; + public int count; /** - * Instantiates a new column buffer. + * The Column start. The position of the cursor in the buffer when the current column was started or the last + * time data was moved out to the column buffer */ - public ColumnBuffer() { - Buffer = new char[StaticSettings.INITIAL_COLUMN_BUFFER_SIZE]; - Position = 0; - } - } - - /** - * The Class Letters. - */ - private static class Letters { - - /** The Constant LF. */ - public static final char LF = '\n'; - - /** The Constant CR. */ - public static final char CR = '\r'; - - /** The Constant QUOTE. */ - public static final char QUOTE = '"'; - - /** The Constant COMMA. */ - public static final char COMMA = ','; - - /** The Constant SPACE. */ - public static final char SPACE = ' '; - - /** The Constant TAB. */ - public static final char TAB = '\t'; - - /** The Constant POUND. */ - public static final char POUND = '#'; - - /** The Constant BACKSLASH. */ - public static final char BACKSLASH = '\\'; - - /** The Constant NULL. */ - public static final char NULL = '\0'; - - /** The Constant BACKSPACE. */ - public static final char BACKSPACE = '\b'; - - /** The Constant FORM_FEED. */ - public static final char FORM_FEED = '\f'; + public int columnStart; - /** The Constant ESCAPE. */ - public static final char ESCAPE = '\u001B'; // ASCII/ANSI escape - - /** The Constant VERTICAL_TAB. */ - public static final char VERTICAL_TAB = '\u000B'; - - /** The Constant ALERT. */ - public static final char ALERT = '\u0007'; - - /** The Constant PIPE. */ - public static final char PIPE = '|'; - } - - /** - * The Class UserSettings. - */ - private static class UserSettings { - - // having these as publicly accessible members will prevent - // the overhead of the method call that exists on properties - // public boolean CaseSensitive; - - /** The Text qualifier. */ - public Character TextQualifier; - - /** The Trim whitespace. */ - public boolean TrimWhitespace; - - /** The Use text qualifier. */ - public boolean UseTextQualifier; - - /** The Delimiter. */ - public char Delimiter; - - /** The Record delimiter. */ - public char RecordDelimiter; - - /** The Comment. */ - public char Comment; - - /** The Use comments. */ - public boolean UseComments; - - /** The Escape mode. */ - public int EscapeMode; - - /** The Safety switch. */ - public boolean SafetySwitch; - - /** The Skip empty records. */ - public boolean SkipEmptyRecords; - - // public boolean CaptureRawRecord; - - /** - * Instantiates a new user settings. - */ - public UserSettings() { - // CaseSensitive = true; - TextQualifier = null; - TrimWhitespace = true; - UseTextQualifier = true; - Delimiter = Letters.COMMA; - RecordDelimiter = Letters.NULL; - Comment = Letters.POUND; - UseComments = false; - EscapeMode = CsvReader.ESCAPE_MODE_DOUBLED; - SafetySwitch = true; - SkipEmptyRecords = true; - // CaptureRawRecord = true; - } } /** * The Class HeadersHolder. */ - private static record HeadersHolder(String[] Headers, int Length, HashMap IndexByName) {} + private record HeadersHolder(String[] names, HashMap indexes) {} - /** - * The Class StaticSettings. - */ - private static class StaticSettings { - - // these are static instead of final so they can be changed in unit test - // isn't visible outside this class and is only accessed once during - /** The Constant MAX_BUFFER_SIZE. */ - // CsvReader construction - public static final int MAX_BUFFER_SIZE = 1024; - - /** The Constant MAX_FILE_BUFFER_SIZE. */ - public static final int MAX_FILE_BUFFER_SIZE = 4 * 1024; - - /** The Constant INITIAL_COLUMN_COUNT. */ - public static final int INITIAL_COLUMN_COUNT = 10; - - /** The Constant INITIAL_COLUMN_BUFFER_SIZE. */ - public static final int INITIAL_COLUMN_BUFFER_SIZE = 50; - } } \ No newline at end of file diff --git a/msi.gama.core/src/msi/gama/util/file/csv/CsvWriter.java b/msi.gama.core/src/msi/gama/util/file/csv/CsvWriter.java index bd708a8f90..b8269a1b8f 100644 --- a/msi.gama.core/src/msi/gama/util/file/csv/CsvWriter.java +++ b/msi.gama.core/src/msi/gama/util/file/csv/CsvWriter.java @@ -1,66 +1,30 @@ /******************************************************************************************************* * - * CsvWriter.java, in msi.gama.core, is part of the source code of the - * GAMA modeling and simulation platform (v.1.9.2). + * CsvWriter.java, in msi.gama.core, is part of the source code of the GAMA modeling and simulation platform (v.1.9.2). * * (c) 2007-2023 UMI 209 UMMISCO IRD/SU & Partners (IRIT, MIAT, TLU, CTU) * * Visit https://github.com/gama-platform/gama for license information and contacts. - * + * ********************************************************************************************************/ package msi.gama.util.file.csv; import java.io.BufferedWriter; -import java.io.Closeable; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import java.nio.charset.Charset; -import java.util.HashMap; -import java.util.Map; /** * A stream based writer for writing delimited text data to a file or a stream. */ @SuppressWarnings ({ "unchecked", "rawtypes" }) -public class CsvWriter implements Closeable { +public class CsvWriter extends AbstractCSVManipulator { /** The output stream. */ private Writer outputStream = null; - /** The file name. */ - private String fileName = null; - - /** The first column. */ - private boolean firstColumn = true; - - /** The use custom record delimiter. */ - private boolean useCustomRecordDelimiter = false; - - /** The charset. */ - private Charset charset = null; - - /** The user settings. */ - // this holds all the values for switches that the user is allowed to set - private final UserSettings userSettings = new UserSettings(); - - /** The initialized. */ - private boolean initialized = false; - - /** The closed. */ - private boolean closed = false; - - /** - * Double up the text qualifier to represent an occurrence of the text qualifier. - */ - public static final int ESCAPE_MODE_DOUBLED = 1; - - /** - * Use a backslash character before the text qualifier to represent an occurrence of the text qualifier. - */ - public static final int ESCAPE_MODE_BACKSLASH = 2; - /** * Creates a {@link msi.gama.util.file.csv.csvreader.CsvWriter CsvWriter} object using a file as the data * destination. @@ -72,14 +36,10 @@ public class CsvWriter implements Closeable { * @param charset * The {@link java.nio.charset.Charset Charset} to use while writing the data. */ - public CsvWriter(final String fileName, final char delimiter, final Charset charset) { + public CsvWriter(final String fileName, final char delimiter) { if (fileName == null) throw new IllegalArgumentException("Parameter fileName can not be null."); - - if (charset == null) throw new IllegalArgumentException("Parameter charset can not be null."); - this.fileName = fileName; - userSettings.Delimiter = delimiter; - this.charset = charset; + this.delimiter = delimiter; } /** @@ -91,7 +51,7 @@ public CsvWriter(final String fileName, final char delimiter, final Charset char * The path to the file to output the data. */ public CsvWriter(final String fileName) { - this(fileName, Letters.COMMA, Charset.forName("UTF-8")); + this(fileName, getDefaultDelimiter()); } /** @@ -104,136 +64,20 @@ public CsvWriter(final String fileName) { */ public CsvWriter(final Writer outputStream, final char delimiter) { if (outputStream == null) throw new IllegalArgumentException("Parameter outputStream can not be null."); - this.outputStream = outputStream; - userSettings.Delimiter = delimiter; - initialized = true; - } - - /** - * Gets the character being used as the column delimiter. - * - * @return The character being used as the column delimiter. - */ - public char getDelimiter() { return userSettings.Delimiter; } - - /** - * Sets the character to use as the column delimiter. - * - * @param delimiter - * The character to use as the column delimiter. - */ - public void setDelimiter(final char delimiter) { userSettings.Delimiter = delimiter; } - - /** - * Gets the record delimiter. - * - * @return the record delimiter - */ - public char getRecordDelimiter() { return userSettings.RecordDelimiter; } - - /** - * Sets the character to use as the record delimiter. - * - * @param recordDelimiter - * The character to use as the record delimiter. Default is combination of standard end of line - * characters for Windows, Unix, or Mac. - */ - public void setRecordDelimiter(final char recordDelimiter) { - useCustomRecordDelimiter = true; - userSettings.RecordDelimiter = recordDelimiter; + this.delimiter = delimiter; } /** - * Gets the character to use as a text qualifier in the data. - * - * @return The character to use as a text qualifier in the data. - */ - public char getTextQualifier() { return userSettings.TextQualifier; } - - /** - * Sets the character to use as a text qualifier in the data. - * - * @param textQualifier - * The character to use as a text qualifier in the data. - */ - public void setTextQualifier(final char textQualifier) { userSettings.TextQualifier = textQualifier; } - - /** - * Whether text qualifiers will be used while writing data or not. + * Instantiates a new csv writer. * - * @return Whether text qualifiers will be used while writing data or not. - */ - public boolean getUseTextQualifier() { return userSettings.UseTextQualifier; } - - /** - * Sets whether text qualifiers will be used while writing data or not. - * - * @param useTextQualifier - * Whether to use a text qualifier while writing data or not. + * @param outputStream + * the output stream. */ - public void setUseTextQualifier(final boolean useTextQualifier) { - userSettings.UseTextQualifier = useTextQualifier; + public CsvWriter(final Writer outputStream) { + this(outputStream, getDefaultDelimiter()); } - /** - * Gets the escape mode. - * - * @return the escape mode - */ - public int getEscapeMode() { return userSettings.EscapeMode; } - - /** - * Sets the escape mode. - * - * @param escapeMode the new escape mode - */ - public void setEscapeMode(final int escapeMode) { userSettings.EscapeMode = escapeMode; } - - /** - * Sets the comment. - * - * @param comment the new comment - */ - public void setComment(final char comment) { userSettings.Comment = comment; } - - /** - * Gets the comment. - * - * @return the comment - */ - public char getComment() { return userSettings.Comment; } - - /** - * Whether fields will be surrounded by the text qualifier even if the qualifier is not necessarily needed to escape - * this field. - * - * @return Whether fields will be forced to be qualified or not. - */ - public boolean getForceQualifier() { return userSettings.ForceQualifier; } - - /** - * Use this to force all fields to be surrounded by the text qualifier even if the qualifier is not necessarily - * needed to escape this field. Default is false. - * - * @param forceQualifier - * Whether to force the fields to be qualified or not. - */ - public void setForceQualifier(final boolean forceQualifier) { userSettings.ForceQualifier = forceQualifier; } - - /** The replacements. */ - private static Map REPLACEMENTS = new HashMap() { - - { - put(';', ','); - put(',', ';'); - put(' ', ';'); - put('|', ';'); - put(':', ';'); - put('\t', ';'); - } - }; - /** * Writes another column of data to this record. * @@ -246,105 +90,16 @@ public void setUseTextQualifier(final boolean useTextQualifier) { */ public void write(final String c, final boolean changeDelimiter) throws IOException { String content = c; - checkClosed(); - checkInit(); - if (content == null) { content = ""; } - - if (!firstColumn) { outputStream.write(userSettings.Delimiter); } - - boolean textQualify = userSettings.ForceQualifier; - + if (!firstColumn) { outputStream.write(delimiter); } if (changeDelimiter && content.length() > 0) { - content = content.replace(userSettings.Delimiter, REPLACEMENTS.get(userSettings.Delimiter)); + content = content.replace(delimiter, REPLACEMENTS.get(delimiter)); } - - if (!textQualify && userSettings.UseTextQualifier && (content.indexOf(userSettings.TextQualifier) > -1 - || content.indexOf(userSettings.Delimiter) > -1 - || !useCustomRecordDelimiter && (content.indexOf(Letters.LF) > -1 || content.indexOf(Letters.CR) > -1) - || useCustomRecordDelimiter && content.indexOf(userSettings.RecordDelimiter) > -1 - || firstColumn && content.length() > 0 && content.charAt(0) == userSettings.Comment || - // check for empty first column, which if on its own - // line must - // be qualified or the line will be skipped - firstColumn && content.length() == 0)) { textQualify = true; } - - if (userSettings.UseTextQualifier && !textQualify && content.length() > 0 /* && preserveSpaces */ ) { - final char firstLetter = content.charAt(0); - - if (firstLetter == Letters.SPACE || firstLetter == Letters.TAB) { textQualify = true; } - - if (!textQualify && content.length() > 1) { - final char lastLetter = content.charAt(content.length() - 1); - - if (lastLetter == Letters.SPACE || lastLetter == Letters.TAB) { textQualify = true; } - } - } - - if (textQualify) { - outputStream.write(userSettings.TextQualifier); - - if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) { - content = replace(content, "" + Letters.BACKSLASH, "" + Letters.BACKSLASH + Letters.BACKSLASH); - content = replace(content, "" + userSettings.TextQualifier, - "" + Letters.BACKSLASH + userSettings.TextQualifier); - } else { - content = replace(content, "" + userSettings.TextQualifier, - "" + userSettings.TextQualifier + userSettings.TextQualifier); - } - } else if (userSettings.EscapeMode == ESCAPE_MODE_BACKSLASH) { - content = replace(content, "" + Letters.BACKSLASH, "" + Letters.BACKSLASH + Letters.BACKSLASH); - content = replace(content, "" + userSettings.Delimiter, "" + Letters.BACKSLASH + userSettings.Delimiter); - - if (useCustomRecordDelimiter) { - content = replace(content, "" + userSettings.RecordDelimiter, - "" + Letters.BACKSLASH + userSettings.RecordDelimiter); - } else { - content = replace(content, "" + Letters.CR, "" + Letters.BACKSLASH + Letters.CR); - content = replace(content, "" + Letters.LF, "" + Letters.BACKSLASH + Letters.LF); - } - - if (firstColumn && content.length() > 0 && content.charAt(0) == userSettings.Comment) { - if (content.length() > 1) { - content = "" + Letters.BACKSLASH + userSettings.Comment + content.substring(1); - } else { - content = "" + Letters.BACKSLASH + userSettings.Comment; - } - } - } - outputStream.write(content); - - if (textQualify) { outputStream.write(userSettings.TextQualifier); } - firstColumn = false; } - /** - * Write comment. - * - * @param commentText the comment text - * @throws IOException Signals that an I/O exception has occurred. - */ - public void writeComment(final String commentText) throws IOException { - checkClosed(); - - checkInit(); - - outputStream.write(userSettings.Comment); - - outputStream.write(commentText); - - if (useCustomRecordDelimiter) { - outputStream.write(userSettings.RecordDelimiter); - } else { - outputStream.write(System.lineSeparator()); - } - - firstColumn = true; - } - /** * Writes a new record using the passed in array of values. * @@ -360,7 +115,6 @@ public void writeComment(final String commentText) throws IOException { public void writeRecord(final String[] values, final boolean changeDelimiter) throws IOException { if (values != null && values.length > 0) { for (final String value : values) { write(value, changeDelimiter); } - endRecord(); } } @@ -384,17 +138,10 @@ public void writeRecord(final String[] values) throws IOException { * @exception IOException * Thrown if an error occurs while writing data to the destination stream. */ + @Override public void endRecord() throws IOException { - checkClosed(); - checkInit(); - - if (useCustomRecordDelimiter) { - outputStream.write(userSettings.RecordDelimiter); - } else { - outputStream.write(System.lineSeparator()); - } - + outputStream.write(System.lineSeparator()); firstColumn = true; } @@ -402,12 +149,9 @@ public void endRecord() throws IOException { * */ private void checkInit() throws IOException { - if (!initialized) { - if (fileName != null) { - outputStream = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName), charset)); - } - - initialized = true; + if (outputStream == null && fileName != null) { + outputStream = new BufferedWriter( + new OutputStreamWriter(new FileOutputStream(fileName), Charset.forName("UTF-8"))); } } @@ -416,147 +160,36 @@ private void checkInit() throws IOException { */ @Override public void close() { - if (!closed) { - close(true); - - closed = true; - } - } - - /** - * - */ - private void close(final boolean closing) { - if (!closed) { - if (closing) { charset = null; } - - try { - if (initialized) { outputStream.close(); } - } catch (final Exception e) { - // just eat the exception - } - - outputStream = null; - - closed = true; - } - } - - /** - * - */ - private void checkClosed() throws IOException { - if (closed) throw new IOException("This instance of the CsvWriter class has already been closed."); - } - - /** - * - */ - @Override - protected void finalize() { - close(false); - } - - /** - * The Class Letters. - */ - public static class Letters { - - /** The Constant LF. */ - public static final char LF = '\n'; - - /** The Constant CR. */ - public static final char CR = '\r'; - - /** The Constant QUOTE. */ - public static final char QUOTE = '"'; - - /** The Constant COMMA. */ - public static final char COMMA = ','; - - /** The Constant SPACE. */ - public static final char SPACE = ' '; - - /** The Constant TAB. */ - public static final char TAB = '\t'; - - /** The Constant POUND. */ - public static final char POUND = '#'; - - /** The Constant BACKSLASH. */ - public static final char BACKSLASH = '\\'; - - /** The Constant NULL. */ - public static final char NULL = '\0'; - } - - /** - * The Class UserSettings. - */ - private static class UserSettings { - - // having these as publicly accessible members will prevent - /** The Text qualifier. */ - // the overhead of the method call that exists on properties - public char TextQualifier; - - /** The Use text qualifier. */ - public boolean UseTextQualifier; - - /** The Delimiter. */ - public char Delimiter; - - /** The Record delimiter. */ - public char RecordDelimiter; - - /** The Comment. */ - public char Comment; - - /** The Escape mode. */ - public int EscapeMode; - - /** The Force qualifier. */ - public boolean ForceQualifier; - - /** - * Instantiates a new user settings. - */ - public UserSettings() { - TextQualifier = Letters.QUOTE; - UseTextQualifier = false; /* was true */ - Delimiter = Letters.COMMA; - RecordDelimiter = Letters.NULL; - Comment = Letters.POUND; - EscapeMode = ESCAPE_MODE_DOUBLED; - ForceQualifier = false; - } + try { + if (outputStream != null) { outputStream.close(); } + } catch (final Exception e) {} + outputStream = null; } /** * Replace. * - * @param original the original - * @param pattern the pattern - * @param replace the replace + * @param original + * the original + * @param pattern + * the pattern + * @param replace + * the replace * @return the string */ public static String replace(final String original, final String pattern, final String replace) { final int len = pattern.length(); int found = original.indexOf(pattern); - if (found > -1) { final StringBuilder sb = new StringBuilder(); int start = 0; - while (found != -1) { sb.append(original.substring(start, found)); sb.append(replace); start = found + len; found = original.indexOf(pattern, start); } - sb.append(original.substring(start)); - return sb.toString(); } return original; diff --git a/msi.gama.core/src/msi/gaml/statements/create/CreateFromCSVDelegate.java b/msi.gama.core/src/msi/gaml/statements/create/CreateFromCSVDelegate.java index 50ced72a8c..8c7bce0c18 100644 --- a/msi.gama.core/src/msi/gaml/statements/create/CreateFromCSVDelegate.java +++ b/msi.gama.core/src/msi/gaml/statements/create/CreateFromCSVDelegate.java @@ -62,7 +62,7 @@ public boolean createFrom(final IScope scope, final List> in final GamaCSVFile source = (GamaCSVFile) input; final IExpression header = statement.getHeader(); if (header != null) { source.forceHeader(Cast.asBool(scope, header.value(scope))); } - final boolean hasHeader = source.hasHeader(); + final boolean hasHeader = source.hasHeader(scope); final IMatrix mat = source.getContents(scope); if (mat == null || mat.isEmpty(scope)) return false; int rows = mat.getRows(scope); @@ -81,7 +81,9 @@ public boolean createFrom(final IScope scope, final List> in final IList vals = mat.getRow(i); for (int j = 0; j < cols; j++) { // see issue #3786 - map.put(headers.get(j), vals.get(j)); + String s = clean(headers.get(j)); + Object v = vals.get(j); + map.put(s, v); } // CSV attributes are mixed with the attributes of agents statement.fillWithUserInit(scope, map); @@ -90,6 +92,24 @@ public boolean createFrom(final IScope scope, final List> in return true; } + /** + * Clean. + * + * @param text + * the text + * @return the string + */ + private static String clean(String text) { + // // strips off all non-ASCII characters + // text = text.replaceAll("[^\\x00-\\x7F]", ""); + // erases all the ASCII control characters + text = text.replaceAll("[\\p{Cntrl}&&[^\r\n\t]]", ""); + // removes non-printable characters from Unicode + text = text.replaceAll("\\p{C}", ""); + + return text.trim(); + } + /** * Method fromFacetType() * diff --git a/ummisco.gama.ui.experiment/src/ummisco/gama/ui/views/inspectors/PopulationInspectView.java b/ummisco.gama.ui.experiment/src/ummisco/gama/ui/views/inspectors/PopulationInspectView.java index c27e5322b8..fcef0b4e83 100644 --- a/ummisco.gama.ui.experiment/src/ummisco/gama/ui/views/inspectors/PopulationInspectView.java +++ b/ummisco.gama.ui.experiment/src/ummisco/gama/ui/views/inspectors/PopulationInspectView.java @@ -861,7 +861,6 @@ public void saveAsCSV() { // AD 2/1/16 Replaces the comma by ';' to properly output points and // lists writer.setDelimiter(';'); - writer.setUseTextQualifier(false); final List contents = new ArrayList<>(); final String[] headers = new String[columns.length]; @@ -950,6 +949,4 @@ public void widgetDisposed(final DisposeEvent e) { @Override public void pauseChanged() {} - - } diff --git a/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/MultiPageCSVEditor.java b/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/MultiPageCSVEditor.java index 4ce0793d01..d532ac8ddf 100644 --- a/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/MultiPageCSVEditor.java +++ b/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/MultiPageCSVEditor.java @@ -45,6 +45,7 @@ import org.eclipse.ui.part.FileEditorInput; import org.eclipse.ui.part.MultiPageEditorPart; +import msi.gama.util.file.csv.AbstractCSVManipulator.Letters; import ummisco.gama.ui.menus.GamaMenu; import ummisco.gama.ui.resources.IGamaIcons; import ummisco.gama.ui.utils.WorkbenchHelper; @@ -524,7 +525,7 @@ protected void fillMenu() { @Override public void widgetSelected(final SelectionEvent e1) { - refreshWithDelimiter(','); + refreshWithDelimiter(Letters.COMMA); } }, null); @@ -532,7 +533,7 @@ public void widgetSelected(final SelectionEvent e1) { @Override public void widgetSelected(final SelectionEvent e1) { - refreshWithDelimiter(';'); + refreshWithDelimiter(Letters.SEMICOLUMN); } }, null); @@ -540,7 +541,7 @@ public void widgetSelected(final SelectionEvent e1) { @Override public void widgetSelected(final SelectionEvent e1) { - refreshWithDelimiter(' '); + refreshWithDelimiter(Letters.SPACE); } }, null); @@ -548,7 +549,7 @@ public void widgetSelected(final SelectionEvent e1) { @Override public void widgetSelected(final SelectionEvent e1) { - refreshWithDelimiter('\t'); + refreshWithDelimiter(Letters.TAB); } }, null); @@ -556,7 +557,7 @@ public void widgetSelected(final SelectionEvent e1) { @Override public void widgetSelected(final SelectionEvent e1) { - refreshWithDelimiter(':'); + refreshWithDelimiter(Letters.COLUMN); } }, null); @@ -564,7 +565,7 @@ public void widgetSelected(final SelectionEvent e1) { @Override public void widgetSelected(final SelectionEvent e1) { - refreshWithDelimiter('|'); + refreshWithDelimiter(Letters.PIPE); } diff --git a/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVModel.java b/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVModel.java index e13ffaca9a..897c3e1595 100644 --- a/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVModel.java +++ b/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVModel.java @@ -23,9 +23,9 @@ import msi.gama.runtime.GAMA; import msi.gama.util.file.GamaCSVFile.CSVInfo; import msi.gama.util.file.IGamaFileMetaData; +import msi.gama.util.file.csv.AbstractCSVManipulator; import msi.gama.util.file.csv.CsvReader; import msi.gama.util.file.csv.CsvWriter; -import msi.gama.util.file.csv.CsvWriter.Letters; import ummisco.gama.dev.utils.DEBUG; import ummisco.gama.ui.interfaces.IRefreshHandler; import ummisco.gama.ui.utils.WorkbenchHelper; @@ -81,7 +81,6 @@ public void setFirstLineHeader(final boolean header) { info.header = header; saveMetaData(); } - // ResourceRefreshHandler.discardMetaData(file); } /** @@ -104,30 +103,12 @@ public void setCustomDelimiter(final char c) { saveMetaData(); } - /** - * Get the character that defines comment lines - * - * @return the comment line starting character. If no comments are allowed in this file, then Character.UNASSIGNED - * constant must be returned; - * - */ - public char getCommentChar() { return Character.UNASSIGNED; } - /** * Get custom text qualifier to use as a text qualifier in the data * * @return the text qualifier character to use as a text qualifier in the data */ - public Character getTextQualifier() { return Character.UNASSIGNED; } - - /** - * check if the text qualifier has to be use for all fields or not - * - * @return true if the text qualifier is to be used for all data fields - */ - public boolean useQualifier() { - return getTextQualifier() != null; - } + public Character getTextQualifier() { return AbstractCSVManipulator.getDefaultQualifier(); } /** * @param text @@ -142,20 +123,8 @@ public void setInput(final String text) { */ protected CsvReader initializeReader(final Reader reader) { final CsvReader csvReader = new CsvReader(reader); - final char customDelimiter = getCustomDelimiter(); csvReader.setDelimiter(customDelimiter); - - final char commentChar = getCommentChar(); - if (commentChar != Character.UNASSIGNED) { - csvReader.setComment(commentChar); - // prevent loss of comment in csv source file - csvReader.setUseComments(false); - } - - csvReader.setTextQualifier(Letters.QUOTE); - csvReader.setUseTextQualifier(true); - return csvReader; } @@ -186,15 +155,12 @@ protected void readLines(final Reader reader) { final String[] rowValues = csvReader.getValues(); if (rowValues.length > info.cols) { info.cols = rowValues.length; } final CSVRow csvRow = new CSVRow(rowValues, this); - if (!rowValues[0].startsWith(String.valueOf(getCommentChar()))) { - if (info.header && !setHeader) { - setHeader = true; - csvRow.setHeader(true); - getInfo().headers = new String[getInfo().cols]; - for (int i = 0; i < getInfo().cols; i++) { getInfo().headers[i] = rowValues[i]; } - } - } else { - csvRow.setCommentLine(true); + + if (info.header && !setHeader) { + setHeader = true; + csvRow.setHeader(true); + getInfo().headers = new String[getInfo().cols]; + for (int i = 0; i < getInfo().cols; i++) { getInfo().headers[i] = rowValues[i]; } } rows.add(csvRow); } @@ -225,22 +191,6 @@ protected void readLines(final Reader reader) { // Helper method on rows management // ---------------------------------- - /** - * @param row - */ - // public void duplicateRow(final CSVRow row) { - // CSVRow newRow = new CSVRow(row, this); - // CSVInfo info = getInfo(); - // int indexRow = findRow(row); - // if ( indexRow != -1 ) { - // rows.add(indexRow, newRow); - // } else { - // addRow(newRow); - // } - // info.rows++; - // saveMetaData(); - // } - /** * */ @@ -301,12 +251,8 @@ public Object[] getArrayRows(final boolean includeCommentLine) { // filter header and comment rows final ArrayList myrows = new ArrayList<>(); for (final CSVRow row : rows) { - // should we return the comment line - if (row.isCommentLine()) { - if (includeCommentLine) { myrows.add(row); } - } // we do not add the header line - else if (!row.isHeader()) { myrows.add(row); } + if (!row.isHeader()) { myrows.add(row); } } return myrows.toArray(); } @@ -374,13 +320,7 @@ public void removeColumn(final int colIndex) { info.headers = cols.toArray(new String[cols.size()]); } info.cols--; - for (final CSVRow row : rows) { - if (!row.isCommentLine()) { - // DEBUG.LOG("remove elmt:[" + colIndex + "] in row [" - // + row + "]"); - row.removeElementAt(colIndex); - } - } + for (final CSVRow row : rows) { row.removeElementAt(colIndex); } saveMetaData(); } @@ -420,8 +360,6 @@ protected CsvWriter initializeWriter(final Writer writer) { final char delimiter = getCustomDelimiter(); final CsvWriter csvWriter = new CsvWriter(writer, delimiter); csvWriter.setTextQualifier(getTextQualifier()); - csvWriter.setForceQualifier(useQualifier()); - csvWriter.setComment(getCommentChar()); return csvWriter; } @@ -431,14 +369,7 @@ protected CsvWriter initializeWriter(final Writer writer) { public String getTextRepresentation() { try (final StringWriter sw = new StringWriter(); final CsvWriter clw = initializeWriter(sw);) { - - for (final CSVRow row : rows) { - if (row.isCommentLine()) { - clw.writeComment(row.getComment()); - } else { - clw.writeRecord(row.getEntriesAsArray()); - } - } + for (final CSVRow row : rows) { clw.writeRecord(row.getEntriesAsArray()); } return sw.toString(); } catch (final Exception e) { DEBUG.ERR("cannot write csv file"); diff --git a/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVRow.java b/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVRow.java index 8ddf611a5a..78b7253b5d 100644 --- a/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVRow.java +++ b/ummisco.gama.ui.viewers/src/ummisco/gama/ui/viewers/csv/model/CSVRow.java @@ -1,12 +1,12 @@ /******************************************************************************************************* * - * CSVRow.java, in ummisco.gama.ui.viewers, is part of the source code of the - * GAMA modeling and simulation platform (v.1.9.2). + * CSVRow.java, in ummisco.gama.ui.viewers, is part of the source code of the GAMA modeling and simulation platform + * (v.1.9.2). * * (c) 2007-2023 UMI 209 UMMISCO IRD/SU & Partners (IRIT, MIAT, TLU, CTU) * * Visit https://github.com/gama-platform/gama for license information and contacts. - * + * ********************************************************************************************************/ package ummisco.gama.ui.viewers.csv.model; @@ -14,6 +14,7 @@ import java.util.Arrays; import java.util.LinkedList; import java.util.List; +import java.util.Objects; /** * Represents a row made of String elements @@ -29,9 +30,6 @@ public class CSVRow { /** Row changes listener */ private final IRowChangesListener listener; - /** track of commented line */ - private boolean isCommentLine; - /** The is header. */ private boolean isHeader; @@ -66,25 +64,19 @@ public CSVRow(final String[] lineElements, final IRowChangesListener listener) { */ public static CSVRow createEmptyLine(final int nbOfColumns, final IRowChangesListener listener) { final List line = new LinkedList<>(); - for (int i = 0; i < nbOfColumns; i++) { - line.add(""); - } + for (int i = 0; i < nbOfColumns; i++) { line.add(""); } return new CSVRow(line, listener); } /** * @return */ - public ArrayList getEntries() { - return entries; - } + public ArrayList getEntries() { return entries; } /** * @return */ - public String[] getEntriesAsArray() { - return entries.toArray(new String[entries.size()]); - } + public String[] getEntriesAsArray() { return entries.toArray(new String[entries.size()]); } /** * @param elementIndex @@ -105,7 +97,7 @@ public void setRowEntry(final int elementIndex, final String elementString) { * @return the element at a given index */ public String getElementAt(final int index) { - if (index >= entries.size()) { return ""; } + if (index >= entries.size()) return ""; return entries.get(index); } @@ -114,9 +106,7 @@ public String getElementAt(final int index) { * * @return number of elements in this row */ - public int getNumberOfElements() { - return entries.size(); - } + public int getNumberOfElements() { return entries.size(); } /** * @param element @@ -134,50 +124,27 @@ public void removeElementAt(final int index) { entries.remove(index); } - /** - * Sets the comment line. - * - * @param comment the new comment line - */ - public void setCommentLine(final boolean comment) { - isCommentLine = comment; - } - - /** - * Checks if is comment line. - * - * @return true, if is comment line - */ - public boolean isCommentLine() { - return isCommentLine; - } - /** * Sets the header. * - * @param header the new header + * @param header + * the new header */ - public void setHeader(final boolean header) { - isHeader = header; - } + public void setHeader(final boolean header) { isHeader = header; } /** * Checks if is header. * * @return true, if is header */ - public boolean isHeader() { - return isHeader; - } + public boolean isHeader() { return isHeader; } /** * Gets the comment. * * @return the comment */ - public String getComment() { - return entries.get(0).substring(1); - } + public String getComment() { return entries.get(0).substring(1); } /** * Give the String representation of a CSVRow object. @@ -199,10 +166,7 @@ public String toString() { */ @Override public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + (entries == null ? 0 : entries.hashCode()); - return result; + return Objects.hash(entries); } /** @@ -224,7 +188,7 @@ public boolean equals(final Object anObject) { * (!(getElementAt(i).equals(thisRow.getElementAt(i)))) { return false; } } return true; */ - if (this == anObject) { return true; } + if (this == anObject) return true; return false; }