From 850762fc165804d8278528de183cba2f0cea1695 Mon Sep 17 00:00:00 2001 From: Joakim Wennergren Date: Fri, 9 Mar 2018 20:17:38 +0100 Subject: [PATCH] Depressing pref test added --- CsvGen/Helpers.cs | 10 ++++ CsvGen/Program.cs | 91 +++++++++++++++++++++++++++++-------- CsvQuery/Csv/CsvSettings.cs | 6 +-- Tests/CsvSettingsFacts.cs | 4 +- 4 files changed, 87 insertions(+), 24 deletions(-) diff --git a/CsvGen/Helpers.cs b/CsvGen/Helpers.cs index 5722792..6374d33 100644 --- a/CsvGen/Helpers.cs +++ b/CsvGen/Helpers.cs @@ -7,6 +7,7 @@ public static class Helpers { private const string Alphabet = "abcdefghijklmnopqrstuvwxyz"; + private const string NonAscii = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"; public static IEnumerable Interspace(this IEnumerable enumerable, T separator) { @@ -29,6 +30,15 @@ public static string RandomString(this Random r, int length) return new string(chars); } + public static string RandomNonAsciiString(this Random r, int length) + { + var chars = new char[length]; + for (var i = 0; i < length; i++) + chars[i] = Alphabet[r.Next(Alphabet.Length)]; + chars[0] = char.ToUpperInvariant(chars[0]); + return new string(chars); + } + public static char Char(this Random r) { return Alphabet[r.Next(Alphabet.Length)]; diff --git a/CsvGen/Program.cs b/CsvGen/Program.cs index cc53fa2..fafc2ef 100644 --- a/CsvGen/Program.cs +++ b/CsvGen/Program.cs @@ -4,6 +4,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; + using System.Text; using CsvQuery.Csv; using CsvQuery.Tools; @@ -28,33 +29,42 @@ private static void Main() if (Param.Flag('P')) { PerformanceTest(); - return; } + else if (Param.Flag('p')) + { + PerfTestCsvParsers(); + } + else + { + var rowsToCreate = Param.Get('n', 100000); + var columns = Param.Get('c', 10); + var filename = Param.FirstOr($"random{rowsToCreate}x{columns}.csv"); + + using (var fs = new StreamWriter(filename)) + GenerateCsv(columns, rowsToCreate, fs); + } + } + + private static void GenerateCsv(int columns, int rowsToCreate, TextWriter fs, string separator=",") + { var r = new Random(); - var rowsToCreate = Param.Get('n', 100000); - var columns = Param.Get('c', 10); - var filename = Param.FirstOr($"random{rowsToCreate}x{columns}.csv"); + var columnTypes = Enumerable.Range(0, columns).Select(x => x < 7 ? x : r.Next(7)).Cast().ToList(); + + // Headers + foreach (var str in columnTypes.Select(x => r.RandomString(5) + x.ToString()).Interspace(separator)) + fs.Write(str); + fs.WriteLine(); - var columnTypes = Enumerable.Range(0, columns).Select(x => x < 7 ? x : r.Next(7)).Cast() - .ToList(); - using (var fs = new StreamWriter(filename)) + // Rows + for (var l = 0; l < rowsToCreate; l++) { - // Headers - foreach (var str in columnTypes.Select(x => r.RandomString(5) + x.ToString()).Interspace(",")) + foreach (var str in columnTypes.Select(x => r.GenColumn(x)).Interspace(separator)) fs.Write(str); fs.WriteLine(); - - // Rows - for (var l = 0; l < rowsToCreate; l++) - { - foreach (var str in columnTypes.Select(x => r.GenColumn(x)).Interspace(",")) - fs.Write(str); - fs.WriteLine(); - } } } - static void PerformanceTest() + private static void PerformanceTest() { var timer = new DiagnosticTimer(); var data = new List(); @@ -69,7 +79,50 @@ static void PerformanceTest() var result = new CsvColumnTypes(data, null); Console.WriteLine(timer.LastCheckpoint("Anlyzed")); Console.WriteLine(result); - Console.WriteLine("Column 1: " + result.Columns[0].DataType.ToString()); + Console.WriteLine("Column 1: " + result.Columns[0].DataType); + } + + private static void PerfTestCsvParsers() + { + // Prepare huge csv + var mm = new MemoryStream(); + var sw = new StreamWriter(mm); + + GenerateCsv(20,200000,sw); + + mm.Position = 0; + var sr = new StreamReader(mm, Encoding.UTF8, false, 1024*16, true); + var count = 0; + + CsvSettings csvSettings = new CsvSettings(','); + + var timer = new DiagnosticTimer(); + //foreach (var line in csvSettings.ParseVB(sr)) + // count++; + //timer.Checkpoint($"VB ({count})"); + + mm.Position = 0; + count = 0; + timer.Checkpoint("NOP"); + foreach (var line in csvSettings.ParseStandard(sr)) + count++; + timer.Checkpoint($"Standard ({count})"); + + mm.Position = 0; + count = 0; + timer.Checkpoint("NOP"); + foreach (var line in csvSettings.ParseRaw(sr)) + count++; + timer.Checkpoint($"ParseRaw ({count})"); + + mm.Position = 0; + count = 0; + timer.Checkpoint("NOP"); + foreach (var line in csvSettings.ParseRawBuffered(sr)) + count++; + var result = timer.LastCheckpoint($"ParseRawBuffered ({count})"); + + Console.WriteLine(result); } } } \ No newline at end of file diff --git a/CsvQuery/Csv/CsvSettings.cs b/CsvQuery/Csv/CsvSettings.cs index 0f67510..f2da777 100644 --- a/CsvQuery/Csv/CsvSettings.cs +++ b/CsvQuery/Csv/CsvSettings.cs @@ -95,7 +95,7 @@ public IEnumerable ParseVB(TextReader reader) public IEnumerable Parse(TextReader reader) { if (this.FieldWidths == null) - return this.ParseRawBuffered(reader); + return this.ParseStandard(reader); return this.ParseVB(reader); } @@ -118,7 +118,7 @@ public IEnumerable ParseRaw(TextReader reader) continue; } - if (c == '"' && useQuotes) + if (c == '"' && (this.UseQuotes == true || this.UseQuotes == null && sb.Length > 0)) { inQuotes = !inQuotes; if (inQuotes && sb.Length > 0) sb.Append('"'); @@ -238,7 +238,7 @@ public IEnumerable ParseStandard(TextReader reader) if (line.Length > 0 && line[0] == this.CommentCharacter) continue; foreach (var c in line) { - if (c == '"') + if (c == '"' && (this.UseQuotes == true || this.UseQuotes == null && (inQuotes || sb.Length == 0))) { inQuotes = !inQuotes; if (inQuotes && sb.Length > 0) sb.Append('"'); diff --git a/Tests/CsvSettingsFacts.cs b/Tests/CsvSettingsFacts.cs index 665d29c..f0d7b90 100644 --- a/Tests/CsvSettingsFacts.cs +++ b/Tests/CsvSettingsFacts.cs @@ -49,8 +49,8 @@ public void CanParseDifferent(string newline, char separator, bool quoted) new[] {"3", "12", "1,3\""}, new[] {"4", "2", "3"} }; - var csvText =string.Join(newline, indata.Select(x => string.Join(separator.ToString(), x.Select(l=>quoted?$"\"{l.Replace("\"","\"\"")}\"":l)))); - var set = new CsvSettings { Separator = separator, TextQualifier =quoted? '"' :default(char),HasHeader = false}; + var csvText = string.Join(newline, indata.Select(x => string.Join(separator.ToString(), x.Select(l => quoted ? $"\"{l.Replace("\"", "\"\"")}\"" : l)))); + var set = new CsvSettings {Separator = separator, TextQualifier = quoted ? '"' : default(char), HasHeader = false}; // Act var data = set.Parse(csvText);