Skip to content

Commit

Permalink
Convert BigQuery field validation from a regex to hand-written code
Browse files Browse the repository at this point in the history
This is entirely for the sake of performance. Benchmark results on my machine:

netcoreapp2.0:
- Short: 180ns => 9ns
- Long: 939ns => 96ns

netcoreapp3.1:
- Short: 159ns => 8ns
- Long: 887ns => 79ns

net461:
- Short: 197ns => 8ns
- Long: 1425ns => 86ns

Fixes #4975
  • Loading branch information
jskeet committed May 15, 2020
1 parent 372df03 commit 575c61a
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>netcoreapp2.0;net461</TargetFrameworks>
<TargetFrameworks Condition=" '$(OS)' != 'Windows_NT' ">netcoreapp2.0</TargetFrameworks>
<TargetFrameworks>netcoreapp2.0;netcoreapp3.1;net461</TargetFrameworks>
<OutputType>Exe</OutputType>
<IsPackable>False</IsPackable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<LangVersion>7.2</LangVersion>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.10.14" />
<PackageReference Include="BenchmarkDotNet" Version="0.12.1" />
<ProjectReference Include="../Google.Cloud.BigQuery.V2/Google.Cloud.BigQuery.V2.csproj" />
</ItemGroup>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using BenchmarkDotNet.Attributes;

namespace Google.Cloud.BigQuery.V2.Benchmarks
{
public class TableSchemaBuilderBenchmark
{
private const string ShortFieldName = "abc";
private const string LongFieldName = "abcdefghijklmnopqrstuvwxyz1234567890_ABCDEFGHIJKLMNOPQRSTUVWXYZ";

[Benchmark]
public void ValidateFieldName_Short() => TableSchemaBuilder.ValidateFieldName(ShortFieldName, "param");

[Benchmark]
public void ValidateFieldName_Long() => TableSchemaBuilder.ValidateFieldName(LongFieldName, "param");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
// limitations under the License.
using System.Runtime.CompilerServices;

[assembly: InternalsVisibleTo("Google.Cloud.BigQuery.V2.Tests,PublicKey=0024000004800000940000000602000000240000525341310004000001000100afab79952ee22215f12b4e09337e65509c943fbc22d7006bc371d581d0f0ebf0da5d8039aab2607fb68a138a5d80a71bc02b7ebf586dbe1f2493c0ab20423ababfd15ce74d2264a6b37745f3658f016abaad662182aaef634a60f1346fcc45343acab5b6781535a3134818e13fac895a6c106c0480e34bbb06cb123e5583d8d2")]
[assembly: InternalsVisibleTo("Google.Cloud.BigQuery.V2.Tests,PublicKey=0024000004800000940000000602000000240000525341310004000001000100afab79952ee22215f12b4e09337e65509c943fbc22d7006bc371d581d0f0ebf0da5d8039aab2607fb68a138a5d80a71bc02b7ebf586dbe1f2493c0ab20423ababfd15ce74d2264a6b37745f3658f016abaad662182aaef634a60f1346fcc45343acab5b6781535a3134818e13fac895a6c106c0480e34bbb06cb123e5583d8d2")]
[assembly: InternalsVisibleTo("Google.Cloud.BigQuery.V2.Benchmarks,PublicKey=0024000004800000940000000602000000240000525341310004000001000100afab79952ee22215f12b4e09337e65509c943fbc22d7006bc371d581d0f0ebf0da5d8039aab2607fb68a138a5d80a71bc02b7ebf586dbe1f2493c0ab20423ababfd15ce74d2264a6b37745f3658f016abaad662182aaef634a60f1346fcc45343acab5b6781535a3134818e13fac895a6c106c0480e34bbb06cb123e5583d8d2")]
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public struct BigQueryNumeric : IEquatable<BigQueryNumeric>, IComparable<BigQuer

private static readonly BigInteger s_integerScalingFactor = new BigInteger(1_000_000_000L);
// TODO: Don't require a 0 before the decimal point.
// TODO: Replace with manual validation if we find this is a performance bottleneck (as it was with field name validation).
private static readonly Regex s_validation = new Regex(@"^-?[0-9]+\.?[0-9]*$");

// Note: the following properties must be declared *after* s_maxValue and s_minValue. Initialization order matters.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ namespace Google.Cloud.BigQuery.V2
/// </remarks>
public sealed class TableSchemaBuilder : IEnumerable
{
// From BigQuery documentation:
// The name must contain only letters (a-z, A-Z), numbers (0-9),
// or underscores (_), and must start with a letter or underscore.
// The maximum length is 128 characters.
private static readonly Regex s_fieldNamePattern = new Regex("^[a-zA-Z_][a-zA-Z0-9_]{0,127}$");
private readonly List<TableFieldSchema> _fields = new List<TableFieldSchema>();

/// <summary>
Expand Down Expand Up @@ -76,7 +71,38 @@ public void Add(string name, BigQueryDbType type, BigQueryFieldMode mode = BigQu
internal static void ValidateFieldName(string name, string paramName)
{
GaxPreconditions.CheckNotNull(name, paramName);
GaxPreconditions.CheckArgument(s_fieldNamePattern.IsMatch(name), paramName, "Invalid field name '{0}'", name);

GaxPreconditions.CheckArgument(IsValidFieldName(name), paramName, "Invalid field name '{0}'", name);
}

// From BigQuery documentation:
// The name must contain only letters (a-z, A-Z), numbers (0-9),
// or underscores (_), and must start with a letter or underscore.
// The maximum length is 128 characters.
// This was originally a regular expression, but the manual code is very significantly faster.
// (Roughly 10x faster with the benchmarks I've run.)
private static bool IsValidFieldName(string name)
{
if (name.Length < 1 || name.Length > 128)
{
return false;
}
char first = name[0];
bool validFirst = (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_';
if (!validFirst)
{
return false;
}
for (int i = 1; i < name.Length; i++)
{
char c = name[i];
bool valid = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
if (!valid)
{
return false;
}
}
return true;
}

/// <summary>
Expand Down

0 comments on commit 575c61a

Please sign in to comment.