Skip to content

Commit

Permalink
Depressing pref test added
Browse files Browse the repository at this point in the history
  • Loading branch information
Joakim Wennergren committed Mar 9, 2018
1 parent 8770853 commit 850762f
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 24 deletions.
10 changes: 10 additions & 0 deletions CsvGen/Helpers.cs
Expand Up @@ -7,6 +7,7 @@
public static class Helpers
{
private const string Alphabet = "abcdefghijklmnopqrstuvwxyz";
private const string NonAscii = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ";

public static IEnumerable<T> Interspace<T>(this IEnumerable<T> enumerable, T separator)
{
Expand All @@ -29,6 +30,15 @@ public static string RandomString(this Random r, int length)
return new string(chars);
}

public static string RandomNonAsciiString(this Random r, int length)
{
var chars = new char[length];
for (var i = 0; i < length; i++)
chars[i] = Alphabet[r.Next(Alphabet.Length)];
chars[0] = char.ToUpperInvariant(chars[0]);
return new string(chars);
}

public static char Char(this Random r)
{
return Alphabet[r.Next(Alphabet.Length)];
Expand Down
91 changes: 72 additions & 19 deletions CsvGen/Program.cs
Expand Up @@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using CsvQuery.Csv;
using CsvQuery.Tools;

Expand All @@ -28,33 +29,42 @@ private static void Main()
if (Param.Flag('P'))
{
PerformanceTest();
return;
}
else if (Param.Flag('p'))
{
PerfTestCsvParsers();
}
else
{
var rowsToCreate = Param.Get('n', 100000);
var columns = Param.Get('c', 10);
var filename = Param.FirstOr($"random{rowsToCreate}x{columns}.csv");

using (var fs = new StreamWriter(filename))
GenerateCsv(columns, rowsToCreate, fs);
}
}

private static void GenerateCsv(int columns, int rowsToCreate, TextWriter fs, string separator=",")
{
var r = new Random();
var rowsToCreate = Param.Get('n', 100000);
var columns = Param.Get('c', 10);
var filename = Param.FirstOr($"random{rowsToCreate}x{columns}.csv");
var columnTypes = Enumerable.Range(0, columns).Select(x => x < 7 ? x : r.Next(7)).Cast<CsvColumnType>().ToList();

// Headers
foreach (var str in columnTypes.Select(x => r.RandomString(5) + x.ToString()).Interspace(separator))
fs.Write(str);
fs.WriteLine();

var columnTypes = Enumerable.Range(0, columns).Select(x => x < 7 ? x : r.Next(7)).Cast<CsvColumnType>()
.ToList();
using (var fs = new StreamWriter(filename))
// Rows
for (var l = 0; l < rowsToCreate; l++)
{
// Headers
foreach (var str in columnTypes.Select(x => r.RandomString(5) + x.ToString()).Interspace(","))
foreach (var str in columnTypes.Select(x => r.GenColumn(x)).Interspace(separator))
fs.Write(str);
fs.WriteLine();

// Rows
for (var l = 0; l < rowsToCreate; l++)
{
foreach (var str in columnTypes.Select(x => r.GenColumn(x)).Interspace(","))
fs.Write(str);
fs.WriteLine();
}
}
}

static void PerformanceTest()
private static void PerformanceTest()
{
var timer = new DiagnosticTimer();
var data = new List<string[]>();
Expand All @@ -69,7 +79,50 @@ static void PerformanceTest()
var result = new CsvColumnTypes(data, null);
Console.WriteLine(timer.LastCheckpoint("Anlyzed"));
Console.WriteLine(result);
Console.WriteLine("Column 1: " + result.Columns[0].DataType.ToString());
Console.WriteLine("Column 1: " + result.Columns[0].DataType);
}

private static void PerfTestCsvParsers()
{
// Prepare huge csv
var mm = new MemoryStream();
var sw = new StreamWriter(mm);

GenerateCsv(20,200000,sw);

mm.Position = 0;
var sr = new StreamReader(mm, Encoding.UTF8, false, 1024*16, true);
var count = 0;

CsvSettings csvSettings = new CsvSettings(',');

var timer = new DiagnosticTimer();
//foreach (var line in csvSettings.ParseVB(sr))
// count++;
//timer.Checkpoint($"VB ({count})");

mm.Position = 0;
count = 0;
timer.Checkpoint("NOP");
foreach (var line in csvSettings.ParseStandard(sr))
count++;
timer.Checkpoint($"Standard ({count})");

mm.Position = 0;
count = 0;
timer.Checkpoint("NOP");
foreach (var line in csvSettings.ParseRaw(sr))
count++;
timer.Checkpoint($"ParseRaw ({count})");

mm.Position = 0;
count = 0;
timer.Checkpoint("NOP");
foreach (var line in csvSettings.ParseRawBuffered(sr))
count++;
var result = timer.LastCheckpoint($"ParseRawBuffered ({count})");

Console.WriteLine(result);
}
}
}
6 changes: 3 additions & 3 deletions CsvQuery/Csv/CsvSettings.cs
Expand Up @@ -95,7 +95,7 @@ public IEnumerable<string[]> ParseVB(TextReader reader)
public IEnumerable<string[]> Parse(TextReader reader)
{
if (this.FieldWidths == null)
return this.ParseRawBuffered(reader);
return this.ParseStandard(reader);
return this.ParseVB(reader);
}

Expand All @@ -118,7 +118,7 @@ public IEnumerable<string[]> ParseRaw(TextReader reader)
continue;
}

if (c == '"' && useQuotes)
if (c == '"' && (this.UseQuotes == true || this.UseQuotes == null && sb.Length > 0))
{
inQuotes = !inQuotes;
if (inQuotes && sb.Length > 0) sb.Append('"');
Expand Down Expand Up @@ -238,7 +238,7 @@ public IEnumerable<string[]> ParseStandard(TextReader reader)
if (line.Length > 0 && line[0] == this.CommentCharacter) continue;
foreach (var c in line)
{
if (c == '"')
if (c == '"' && (this.UseQuotes == true || this.UseQuotes == null && (inQuotes || sb.Length == 0)))
{
inQuotes = !inQuotes;
if (inQuotes && sb.Length > 0) sb.Append('"');
Expand Down
4 changes: 2 additions & 2 deletions Tests/CsvSettingsFacts.cs
Expand Up @@ -49,8 +49,8 @@ public void CanParseDifferent(string newline, char separator, bool quoted)
new[] {"3", "12", "1,3\""},
new[] {"4", "2", "3"}
};
var csvText =string.Join(newline, indata.Select(x => string.Join(separator.ToString(), x.Select(l=>quoted?$"\"{l.Replace("\"","\"\"")}\"":l))));
var set = new CsvSettings { Separator = separator, TextQualifier =quoted? '"' :default(char),HasHeader = false};
var csvText = string.Join(newline, indata.Select(x => string.Join(separator.ToString(), x.Select(l => quoted ? $"\"{l.Replace("\"", "\"\"")}\"" : l))));
var set = new CsvSettings {Separator = separator, TextQualifier = quoted ? '"' : default(char), HasHeader = false};

// Act
var data = set.Parse(csvText);
Expand Down

0 comments on commit 850762f

Please sign in to comment.