From 4deb6a47f4b8b8a46043a45ac572a4d138793954 Mon Sep 17 00:00:00 2001
From: Luigi Mori <l@isidora.org>
Date: Fri, 7 Dec 2018 14:34:40 +0100
Subject: [PATCH 01/14] Adds support for quoted types and use pythonic
 console_script

Signed-off-by: Luigi Mori <l@isidora.org>
---
 bigquery_schema_generator/generate_schema.py | 20 ++++++++++++++++++++
 scripts/generate-schema                      |  1 -
 setup.py                                     |  9 +++++++--
 3 files changed, 27 insertions(+), 3 deletions(-)
 delete mode 100755 scripts/generate-schema

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index 84be658..bb7d2a4 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -55,6 +55,9 @@ class SchemaGenerator:
     # Detect a TIME field of the form [H]H:[M]M:[S]S[.DDDDDD]
     TIME_MATCHER = re.compile(r'^\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})?$')
 
+    INTEGER_MATCHER = re.compile(r'[-]?^\d+$')
+    FLOAT_MATCHER = re.compile(r'[-]?^\d+\.\d+$')
+
     def __init__(self,
                  keep_nulls=False,
                  debugging_interval=1000,
@@ -240,6 +243,7 @@ def get_schema_entry(self, key, value):
         object, instead of a primitive.
         """
         value_mode, value_type = self.infer_bigquery_type(value)
+
         if value_type == 'RECORD':
             # recursively figure out the RECORD
             fields = OrderedDict()
@@ -326,6 +330,12 @@ def infer_value_type(self, value):
                 return 'DATE'
             elif self.TIME_MATCHER.match(value):
                 return 'TIME'
+            elif self.INTEGER_MATCHER.match(value):
+                return 'QINTEGER' # quoted integer
+            elif self.FLOAT_MATCHER.match(value):
+                return 'QFLOAT' # quoted float
+            elif value.lower() in ['true', 'false']:
+                return 'QBOOLEAN' # quoted boolean
             else:
                 return 'STRING'
         # Python 'bool' is a subclass of 'int' so we must check it first
@@ -412,8 +422,16 @@ def convert_type(atype, btype):
         return atype
     if atype == 'INTEGER' and btype == 'FLOAT':
         return 'FLOAT'
+    if atype == 'QINTEGER' and btype == 'QFLOAT':
+        return 'QFLOAT'
     if atype == 'FLOAT' and btype == 'INTEGER':
         return 'FLOAT'
+    if atype == 'QFLOAT' and btype == 'QINTEGER':
+        return 'QFLOAT'
+    if atype in ['QINTEGER', 'QFLOAT', 'QBOOLEAN'] and btype == 'STRING':
+        return 'STRING'
+    if atype == 'STRING' and btype in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']:
+        return 'STRING'
     if is_string_type(atype) and is_string_type(btype):
         return 'STRING'
     return None
@@ -466,6 +484,8 @@ def flatten_schema_map(schema_map, keep_nulls=False):
                 else:
                     # Recursively flatten the sub-fields of a RECORD entry.
                     new_value = flatten_schema_map(value, keep_nulls)
+            elif key == 'type' and value in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']:
+                new_value = value[1:]
             else:
                 new_value = value
             new_info[key] = new_value
diff --git a/scripts/generate-schema b/scripts/generate-schema
deleted file mode 100755
index 3865080..0000000
--- a/scripts/generate-schema
+++ /dev/null
@@ -1 +0,0 @@
-python3 -m bigquery_schema_generator.generate_schema "$@"
diff --git a/setup.py b/setup.py
index 65d7ed7..f230034 100644
--- a/setup.py
+++ b/setup.py
@@ -22,5 +22,10 @@
       author_email='brian@xparks.net',
       license='Apache 2.0',
       packages=['bigquery_schema_generator'],
-      scripts=['scripts/generate-schema'],
-      python_requires='~=3.5')
+      python_requires='~=3.5',
+      entry_points={
+          'console_scripts': [
+            'generate-schema = bigquery_schema_generator.generate_schema:main'
+        ]
+      }
+)

From 88f567dcc8ec17c418caa77839e7176d03f7ff3a Mon Sep 17 00:00:00 2001
From: Luigi Mori <l@isidora.org>
Date: Fri, 7 Dec 2018 14:43:51 +0100
Subject: [PATCH 02/14] Adds fix for string types

Signed-off-by: Luigi Mori <l@isidora.org>
---
 bigquery_schema_generator/generate_schema.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index bb7d2a4..671490e 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -440,8 +440,7 @@ def convert_type(atype, btype):
 def is_string_type(thetype):
     """Returns true if the type is one of: STRING, TIMESTAMP, DATE, or
     TIME."""
-    return (thetype == 'STRING' or thetype == 'TIMESTAMP' or
-            thetype == 'DATE' or thetype == 'TIME')
+    return thetype in ['STRING', 'TIMESTAMP', 'DATE', 'TIME', 'QINTEGER', 'QFLOAT', 'QBOOLEAN']
 
 
 def flatten_schema_map(schema_map, keep_nulls=False):

From 47aa01438219ce4d9861c357193d315db4457cc3 Mon Sep 17 00:00:00 2001
From: Luigi Mori <l@isidora.org>
Date: Fri, 7 Dec 2018 15:00:52 +0100
Subject: [PATCH 03/14] Tuned Date Regex

Signed-off-by: Luigi Mori <l@isidora.org>
---
 bigquery_schema_generator/generate_schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index 671490e..b96ee8a 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -50,7 +50,7 @@ class SchemaGenerator:
         r'(([+-]\d{1,2}(:\d{1,2})?)|Z)?$')
 
     # Detect a DATE field of the form YYYY-[M]M-[D]D.
-    DATE_MATCHER = re.compile(r'^\d{4}-\d{1,2}-\d{1,2}$')
+    DATE_MATCHER = re.compile(r'^\d{4}-(?:0[1-9]|1[012])-(?:0[1-9]|[12][0-9]|3[01])$')
 
     # Detect a TIME field of the form [H]H:[M]M:[S]S[.DDDDDD]
     TIME_MATCHER = re.compile(r'^\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})?$')

From cc927d3d9639b16641d79381e43991cf27e9c6e2 Mon Sep 17 00:00:00 2001
From: Luigi Mori <l@isidora.org>
Date: Fri, 7 Dec 2018 23:48:14 +0100
Subject: [PATCH 04/14] Improved DATE regex and modified tests

Signed-off-by: Luigi Mori <l@isidora.org>
---
 bigquery_schema_generator/generate_schema.py |  2 +-
 tests/test_generate_schema.py                |  1 +
 tests/testdata.txt                           | 79 ++++++++++++++++++++
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index b96ee8a..d8e66e5 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -50,7 +50,7 @@ class SchemaGenerator:
         r'(([+-]\d{1,2}(:\d{1,2})?)|Z)?$')
 
     # Detect a DATE field of the form YYYY-[M]M-[D]D.
-    DATE_MATCHER = re.compile(r'^\d{4}-(?:0[1-9]|1[012])-(?:0[1-9]|[12][0-9]|3[01])$')
+    DATE_MATCHER = re.compile(r'^\d{4}-(?:[1-9]|0[1-9]|1[012])-(?:[1-9]|0[1-9]|[12][0-9]|3[01])$')
 
     # Detect a TIME field of the form [H]H:[M]M:[S]S[.DDDDDD]
     TIME_MATCHER = re.compile(r'^\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})?$')
diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
index b239b3b..5e9fe15 100755
--- a/tests/test_generate_schema.py
+++ b/tests/test_generate_schema.py
@@ -81,6 +81,7 @@ def test_date_matcher_valid(self):
     def test_date_matcher_invalid(self):
         self.assertFalse(SchemaGenerator.DATE_MATCHER.match('17-05-22'))
         self.assertFalse(SchemaGenerator.DATE_MATCHER.match('2017-111-22'))
+        self.assertFalse(SchemaGenerator.DATE_MATCHER.match('1988-00-00'))
 
     def test_time_matcher_valid(self):
         self.assertTrue(SchemaGenerator.TIME_MATCHER.match('12:33:01'))
diff --git a/tests/testdata.txt b/tests/testdata.txt
index 977f619..6b0d4d3 100644
--- a/tests/testdata.txt
+++ b/tests/testdata.txt
@@ -477,3 +477,82 @@ SCHEMA
   }
 ]
 END
+
+# QINTEGER, QFLOAT, QBOOLEAN
+DATA
+{ "qi" : "1", "qf": "1", "qb": "true" }
+{ "qi" : "2", "qf": "1.1", "qb": "True" }
+{ "qi" : "3", "qf": "2", "qb": "false" }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qi",
+    "type": "INTEGER"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qf",
+    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qb",
+    "type": "BOOLEAN"
+  }
+]
+END
+
+# From STRING to [QINTEGER, QFLOAT, QBOOLEAN] = STRING
+DATA
+{ "qi" : "foo", "qf": "bar", "qb": "foo2" }
+{ "qi" : "2", "qf": "1.1", "qb": "True" }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qi",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qf",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qb",
+    "type": "STRING"
+  }
+]
+END
+
+# QINTEGER -> QFLOAT -> STRING
+DATA
+{ "qn" : "1" }
+{ "qn" : "1.1" }
+{ "qn" : "test" }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qn",
+    "type": "STRING"
+  }
+]
+END
+
+# QBOOLEAN -> STRING
+DATA
+{ "qb" : "true" }
+{ "qb" : "False" }
+{ "qb" : "test" }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qb",
+    "type": "STRING"
+  }
+]
+END

From 04fe116ea275a514e7f6844ef9e23d74ac808c00 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Fri, 7 Dec 2018 22:22:08 -0800
Subject: [PATCH 05/14] Add conversion between quoted types and their
 equilvalent unquoted types (e.g. QINTEGER + INTEGER), and cross conversions
 (QINTEGER + FLOAT, or QFLOAT + INTEGER); add extensive unit tests to cover
 all combinations of conversions

---
 bigquery_schema_generator/generate_schema.py | 70 ++++++++++++---
 tests/test_generate_schema.py                | 60 ++++++++++++-
 tests/testdata.txt                           | 95 +++++++++++++++++++-
 3 files changed, 209 insertions(+), 16 deletions(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index d8e66e5..5c033b8 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -413,34 +413,80 @@ def run(self):
 
 def convert_type(atype, btype):
     """Return the compatible type between 'atype' and 'btype'. Return 'None'
-    if there is no compatible type. Type conversions are:
-
-    * INTEGER, FLOAT => FLOAT
-    * DATE, TIME, TIMESTAMP, STRING => STRING
+    if there is no compatible type. Type conversions (in order of precedence)
+    are:
+
+    * type + type => type
+    * [Q]BOOLEAN + [Q]BOOLEAN => BOOLEAN
+    * [Q]INTEGER + [Q]INTEGER => INTEGER
+    * [Q]FLOAT + [Q]FLOAT => FLOAT
+    * QINTEGER + QFLOAT = QFLOAT
+    * QFLOAT + QINTEGER = QFLOAT
+    * [Q]INTEGER + [Q]FLOAT => FLOAT (except QINTEGER + QFLOAT)
+    * [Q]FLOAT + [Q]INTEGER => FLOAT (except QFLOAT + QINTEGER)
+    * (DATE, TIME, TIMESTAMP, QBOOLEAN, QINTEGER, QFLOAT, STRING) +
+        (DATE, TIME, TIMESTAMP, QBOOLEAN, QINTEGER, QFLOAT, STRING) => STRING
     """
+    # type + type => type
     if atype == btype:
         return atype
-    if atype == 'INTEGER' and btype == 'FLOAT':
+
+    # [Q]BOOLEAN + [Q]BOOLEAN => BOOLEAN
+    if atype == 'BOOLEAN' and btype == 'QBOOLEAN':
+        return 'BOOLEAN'
+    if atype == 'QBOOLEAN' and btype == 'BOOLEAN':
+        return 'BOOLEAN'
+
+    # [Q]INTEGER + [Q]INTEGER => INTEGER
+    if atype == 'QINTEGER' and btype == 'INTEGER':
+        return 'INTEGER'
+    if atype == 'INTEGER' and btype == 'QINTEGER':
+        return 'INTEGER'
+
+    # [Q]FLOAT + [Q]FLOAT => FLOAT
+    if atype == 'QFLOAT' and btype == 'FLOAT':
+        return 'FLOAT'
+    if atype == 'FLOAT' and btype == 'QFLOAT':
         return 'FLOAT'
+
+    # QINTEGER + QFLOAT => QFLOAT
     if atype == 'QINTEGER' and btype == 'QFLOAT':
         return 'QFLOAT'
-    if atype == 'FLOAT' and btype == 'INTEGER':
-        return 'FLOAT'
+
+    # QFLOAT + QINTEGER => QFLOAT
     if atype == 'QFLOAT' and btype == 'QINTEGER':
         return 'QFLOAT'
-    if atype in ['QINTEGER', 'QFLOAT', 'QBOOLEAN'] and btype == 'STRING':
-        return 'STRING'
-    if atype == 'STRING' and btype in ['QINTEGER', 'QFLOAT', 'QBOOLEAN']:
-        return 'STRING'
+
+    # [Q]INTEGER + [Q]FLOAT => FLOAT (except QINTEGER + QFLOAT => QFLOAT)
+    if atype == 'INTEGER' and btype == 'FLOAT':
+        return 'FLOAT'
+    if atype == 'INTEGER' and btype == 'QFLOAT':
+        return 'FLOAT'
+    if atype == 'QINTEGER' and btype == 'FLOAT':
+        return 'FLOAT'
+
+    # [Q]FLOAT + [Q]INTEGER => FLOAT (except # QFLOAT + QINTEGER => QFLOAT)
+    if atype == 'FLOAT' and btype == 'INTEGER':
+        return 'FLOAT'
+    if atype == 'FLOAT' and btype == 'QINTEGER':
+        return 'FLOAT'
+    if atype == 'QFLOAT' and btype == 'INTEGER':
+        return 'FLOAT'
+
+    # All remaining combination of:
+    # (DATE, TIME, TIMESTAMP, QBOOLEAN, QINTEGER, QFLOAT, STRING) +
+    #   (DATE, TIME, TIMESTAMP, QBOOLEAN, QINTEGER, QFLOAT, STRING) => STRING
     if is_string_type(atype) and is_string_type(btype):
         return 'STRING'
+
     return None
 
 
 def is_string_type(thetype):
     """Returns true if the type is one of: STRING, TIMESTAMP, DATE, or
     TIME."""
-    return thetype in ['STRING', 'TIMESTAMP', 'DATE', 'TIME', 'QINTEGER', 'QFLOAT', 'QBOOLEAN']
+    return thetype in ['STRING', 'TIMESTAMP', 'DATE', 'TIME',
+        'QINTEGER', 'QFLOAT', 'QBOOLEAN']
 
 
 def flatten_schema_map(schema_map, keep_nulls=False):
diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
index 5e9fe15..8b37dfc 100755
--- a/tests/test_generate_schema.py
+++ b/tests/test_generate_schema.py
@@ -103,8 +103,11 @@ def test_infer_value_type(self):
                          generator.infer_value_type('2018-02-08T12:34:56'))
         self.assertEqual('STRING', generator.infer_value_type('abc'))
         self.assertEqual('BOOLEAN', generator.infer_value_type(True))
+        self.assertEqual('QBOOLEAN', generator.infer_value_type('True'))
         self.assertEqual('INTEGER', generator.infer_value_type(1))
+        self.assertEqual('QINTEGER', generator.infer_value_type('2'))
         self.assertEqual('FLOAT', generator.infer_value_type(2.0))
+        self.assertEqual('QFLOAT', generator.infer_value_type('3.0'))
         self.assertEqual('RECORD', generator.infer_value_type({
             'a': 1,
             'b': 2
@@ -236,16 +239,64 @@ def test_infer_array_type(self):
 
     def test_convert_type(self):
         # no conversion needed
+        self.assertEqual('BOOLEAN', convert_type('BOOLEAN', 'BOOLEAN'))
         self.assertEqual('INTEGER', convert_type('INTEGER', 'INTEGER'))
         self.assertEqual('FLOAT', convert_type('FLOAT', 'FLOAT'))
         self.assertEqual('STRING', convert_type('STRING', 'STRING'))
-        self.assertEqual('BOOLEAN', convert_type('BOOLEAN', 'BOOLEAN'))
         self.assertEqual('DATE', convert_type('DATE', 'DATE'))
         self.assertEqual('RECORD', convert_type('RECORD', 'RECORD'))
 
-        # conversions
+        # quoted and unquoted versions of the same type
+        self.assertEqual('BOOLEAN', convert_type('BOOLEAN', 'QBOOLEAN'))
+        self.assertEqual('BOOLEAN', convert_type('QBOOLEAN', 'BOOLEAN'))
+        self.assertEqual('INTEGER', convert_type('INTEGER', 'QINTEGER'))
+        self.assertEqual('INTEGER', convert_type('QINTEGER', 'INTEGER'))
+        self.assertEqual('FLOAT', convert_type('FLOAT', 'QFLOAT'))
+        self.assertEqual('FLOAT', convert_type('QFLOAT', 'FLOAT'))
+
+        # [Q]INTEGER and [Q]FLOAT conversions
         self.assertEqual('FLOAT', convert_type('INTEGER', 'FLOAT'))
+        self.assertEqual('FLOAT', convert_type('INTEGER', 'QFLOAT'))
+        self.assertEqual('FLOAT', convert_type('QINTEGER', 'FLOAT'))
+        self.assertEqual('QFLOAT', convert_type('QINTEGER', 'QFLOAT'))
         self.assertEqual('FLOAT', convert_type('FLOAT', 'INTEGER'))
+        self.assertEqual('FLOAT', convert_type('FLOAT', 'QINTEGER'))
+        self.assertEqual('FLOAT', convert_type('QFLOAT', 'INTEGER'))
+        self.assertEqual('QFLOAT', convert_type('QFLOAT', 'QINTEGER'))
+
+        # quoted and STRING conversions
+        self.assertEqual('STRING', convert_type('STRING', 'QBOOLEAN'))
+        self.assertEqual('STRING', convert_type('STRING', 'QINTEGER'))
+        self.assertEqual('STRING', convert_type('STRING', 'QFLOAT'))
+        self.assertEqual('STRING', convert_type('QBOOLEAN', 'STRING'))
+        self.assertEqual('STRING', convert_type('QINTEGER', 'STRING'))
+        self.assertEqual('STRING', convert_type('QFLOAT', 'STRING'))
+
+        # quoted and DATE conversions
+        self.assertEqual('STRING', convert_type('DATE', 'QBOOLEAN'))
+        self.assertEqual('STRING', convert_type('DATE', 'QINTEGER'))
+        self.assertEqual('STRING', convert_type('DATE', 'QFLOAT'))
+        self.assertEqual('STRING', convert_type('QBOOLEAN', 'DATE'))
+        self.assertEqual('STRING', convert_type('QINTEGER', 'DATE'))
+        self.assertEqual('STRING', convert_type('QFLOAT', 'DATE'))
+
+        # quoted and TIME conversions
+        self.assertEqual('STRING', convert_type('TIME', 'QBOOLEAN'))
+        self.assertEqual('STRING', convert_type('TIME', 'QINTEGER'))
+        self.assertEqual('STRING', convert_type('TIME', 'QFLOAT'))
+        self.assertEqual('STRING', convert_type('QBOOLEAN', 'TIME'))
+        self.assertEqual('STRING', convert_type('QINTEGER', 'TIME'))
+        self.assertEqual('STRING', convert_type('QFLOAT', 'TIME'))
+
+        # quoted and TIMESTAMP conversions
+        self.assertEqual('STRING', convert_type('TIMESTAMP', 'QBOOLEAN'))
+        self.assertEqual('STRING', convert_type('TIMESTAMP', 'QINTEGER'))
+        self.assertEqual('STRING', convert_type('TIMESTAMP', 'QFLOAT'))
+        self.assertEqual('STRING', convert_type('QBOOLEAN', 'TIMESTAMP'))
+        self.assertEqual('STRING', convert_type('QINTEGER', 'TIMESTAMP'))
+        self.assertEqual('STRING', convert_type('QFLOAT', 'TIMESTAMP'))
+
+        # DATE, TIME, and TIMESTAMP conversions
         self.assertEqual('STRING', convert_type('DATE', 'TIME'))
         self.assertEqual('STRING', convert_type('DATE', 'TIMESTAMP'))
         self.assertEqual('STRING', convert_type('DATE', 'STRING'))
@@ -255,6 +306,11 @@ def test_convert_type(self):
 
         # no conversion possible
         self.assertEqual(None, convert_type('INTEGER', 'BOOLEAN'))
+        self.assertEqual(None, convert_type('QINTEGER', 'BOOLEAN'))
+        self.assertEqual(None, convert_type('INTEGER', 'QBOOLEAN'))
+        self.assertEqual(None, convert_type('FLOAT', 'BOOLEAN'))
+        self.assertEqual(None, convert_type('QFLOAT', 'BOOLEAN'))
+        self.assertEqual(None, convert_type('FLOAT', 'QBOOLEAN'))
         self.assertEqual(None, convert_type('FLOAT', 'STRING'))
         self.assertEqual(None, convert_type('STRING', 'BOOLEAN'))
         self.assertEqual(None, convert_type('BOOLEAN', 'DATE'))
diff --git a/tests/testdata.txt b/tests/testdata.txt
index 6b0d4d3..a03cad3 100644
--- a/tests/testdata.txt
+++ b/tests/testdata.txt
@@ -480,9 +480,9 @@ END
 
 # QINTEGER, QFLOAT, QBOOLEAN
 DATA
-{ "qi" : "1", "qf": "1", "qb": "true" }
+{ "qi" : "1", "qf": "1.0", "qb": "true" }
 { "qi" : "2", "qf": "1.1", "qb": "True" }
-{ "qi" : "3", "qf": "2", "qb": "false" }
+{ "qi" : "3", "qf": "2.0", "qb": "false" }
 SCHEMA
 [
   {
@@ -503,6 +503,49 @@ SCHEMA
 ]
 END
 
+# QINTEGER, QFLOAT, QBOOLEAN -> INTEGER, FLOAT, BOOLEAN
+DATA
+{ "qi" : "1", "qf": "1.0", "qb": "true" }
+{ "qi" : 2, "qf": 2.0, "qb": false }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qi",
+    "type": "INTEGER"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qf",
+    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qb",
+    "type": "BOOLEAN"
+  }
+]
+END
+
+# mixed [Q]INTEGER, [Q]FLOAT
+DATA
+{ "qf_i" : "1.0", "qi_f": "2" }
+{ "qf_i" : 1.1, "qi_f": 2.1 }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qf_i",
+    "type": "FLOAT"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qi_f",
+    "type": "FLOAT"
+  }
+]
+END
+
 # From STRING to [QINTEGER, QFLOAT, QBOOLEAN] = STRING
 DATA
 { "qi" : "foo", "qf": "bar", "qb": "foo2" }
@@ -556,3 +599,51 @@ SCHEMA
   }
 ]
 END
+
+# DATE, TIME, DATETIME
+DATA
+{ "qd" : "2018-12-07", "qt": "21:52:00", "qdt": "2018-12-07T21:52:00-08:00" }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qd",
+    "type": "DATE"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qt",
+    "type": "TIME"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qdt",
+    "type": "TIMESTAMP"
+  }
+]
+END
+
+# DATE, TIME, DATETIME + [QINTEGER, QFLOAT, QBOOLEAN] => STRING
+DATA
+{ "qd" : "2018-12-07", "qt": "21:52:00", "qdt": "2018-12-07T21:52:00-08:00" }
+{ "qd" : "1", "qt": "1.1", "qdt": "true" }
+SCHEMA
+[
+  {
+    "mode": "NULLABLE",
+    "name": "qd",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qt",
+    "type": "STRING"
+  },
+  {
+    "mode": "NULLABLE",
+    "name": "qdt",
+    "type": "STRING"
+  }
+]
+END
+

From 4e101d8cdbb4144474b0cab58b1e3e82b4a46c0e Mon Sep 17 00:00:00 2001
From: Luigi Mori <l@isidora.org>
Date: Tue, 11 Dec 2018 13:25:18 +0100
Subject: [PATCH 06/14] Fix negative number support in Q REs and adds unit
 tests

Signed-off-by: Luigi Mori <l@isidora.org>
---
 bigquery_schema_generator/generate_schema.py | 4 ++--
 tests/test_generate_schema.py                | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index 5c033b8..5bb1a79 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -55,8 +55,8 @@ class SchemaGenerator:
     # Detect a TIME field of the form [H]H:[M]M:[S]S[.DDDDDD]
     TIME_MATCHER = re.compile(r'^\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})?$')
 
-    INTEGER_MATCHER = re.compile(r'[-]?^\d+$')
-    FLOAT_MATCHER = re.compile(r'[-]?^\d+\.\d+$')
+    INTEGER_MATCHER = re.compile(r'^[-]?\d+$')
+    FLOAT_MATCHER = re.compile(r'^[-]?\d+\.\d+$')
 
     def __init__(self,
                  keep_nulls=False,
diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
index 8b37dfc..03ee8c8 100755
--- a/tests/test_generate_schema.py
+++ b/tests/test_generate_schema.py
@@ -104,10 +104,15 @@ def test_infer_value_type(self):
         self.assertEqual('STRING', generator.infer_value_type('abc'))
         self.assertEqual('BOOLEAN', generator.infer_value_type(True))
         self.assertEqual('QBOOLEAN', generator.infer_value_type('True'))
+        self.assertEqual('QBOOLEAN', generator.infer_value_type('False'))
+        self.assertEqual('QBOOLEAN', generator.infer_value_type('true'))
+        self.assertEqual('QBOOLEAN', generator.infer_value_type('false'))
         self.assertEqual('INTEGER', generator.infer_value_type(1))
         self.assertEqual('QINTEGER', generator.infer_value_type('2'))
+        self.assertEqual('QINTEGER', generator.infer_value_type('-1000'))
         self.assertEqual('FLOAT', generator.infer_value_type(2.0))
         self.assertEqual('QFLOAT', generator.infer_value_type('3.0'))
+        self.assertEqual('QFLOAT', generator.infer_value_type('-5.4'))
         self.assertEqual('RECORD', generator.infer_value_type({
             'a': 1,
             'b': 2

From 6c26248b36d1be4355389ce2a812d42b30571e41 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Fri, 7 Dec 2018 10:22:48 -0800
Subject: [PATCH 07/14] README.md: fix typo 'theirs' to 'their'

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b1961f6..d4a30e7 100644
--- a/README.md
+++ b/README.md
@@ -238,7 +238,7 @@ $ generate-schema --debugging_interval 50 < file.data.json > file.schema.json
 
 Instead of printing out the BigQuery schema, the `--debugging_map` prints out
 the bookkeeping metadata map which is used internally to keep track of the
-various fields and theirs types that were inferred using the data file. This
+various fields and their types that were inferred using the data file. This
 flag is intended to be used for debugging.
 
 ```

From d559594a6ec759301eccffa118cd77b7a2121ad9 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Fri, 7 Dec 2018 10:26:37 -0800
Subject: [PATCH 08/14] README.md: change 'downgrades to STRING' to 'upgrades
 to STRING' since STRING is the super type of TIME, DATE, TIMESTAMP; makes
 this usage consistent with other parts of that section

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d4a30e7..3cc339f 100644
--- a/README.md
+++ b/README.md
@@ -282,7 +282,7 @@ compatibility rules implemented by **bq load**:
       upgraded to a `FLOAT`
     * the reverse does not happen, once a field is a `FLOAT`, it will remain a
       `FLOAT`
-* conflicting `TIME`, `DATE`, `TIMESTAMP` types downgrades to `STRING`
+* conflicting `TIME`, `DATE`, `TIMESTAMP` types upgrades to `STRING`
     * if a field is determined to have one type of "time" in one record, then
       subsequently a different "time" type, then the field will be assigned a
       `STRING` type

From e06987ff2752899c1c0601674adf5f06856a1992 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Tue, 11 Dec 2018 11:50:32 -0800
Subject: [PATCH 09/14] README.md: add documentation about additional type
 inference of strings which are semantically INTEGER or FLOAT

---
 README.md | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 3cc339f..fed1749 100644
--- a/README.md
+++ b/README.md
@@ -109,9 +109,9 @@ This is essentially what the `generate-schema` command does.
 
 **3) Python script**
 
-If you retrieved this code from its [GitHub
-repository](https://github.com/bxparks/bigquery-schema-generator), then you can invoke
-the Python script directly:
+If you retrieved this code from its
+[GitHub repository](https://github.com/bxparks/bigquery-schema-generator),
+then you can invoke the Python script directly:
 ```
 $ ./generate_schema.py < file.data.json > file.schema.json
 ```
@@ -121,21 +121,33 @@ $ ./generate_schema.py < file.data.json > file.schema.json
 The resulting schema file can be given to the **bq load** command using the
 `--schema` flag:
 ```
+
 $ bq load --source_format NEWLINE_DELIMITED_JSON \
         --ignore_unknown_values \
         --schema file.schema.json \
         mydataset.mytable \
         file.data.json
 ```
-
 where `mydataset.mytable` is the target table in BigQuery.
 
-A useful flag for **bq load** is `--ignore_unknown_values`, which causes **bq load**
-to ignore fields in the input data which are not defined in the schema. When
-`generate_schema.py` detects an inconsistency in the definition of a particular
-field in the input data, it removes the field from the schema definition.
-Without the `--ignore_unknown_values`, the **bq load** fails when the
-inconsistent data record is read.
+For debugging purposes, here is the equivalent `bq load` command using schema
+autodetection:
+
+```
+$ bq load --source_format NEWLINE_DELIMITED_JSON \
+    --ignore_unknown_values \
+    --autodetect
+    mydataset.mytable \
+    file.data.json
+```
+
+A useful flag for `bq load` is `--ignore_unknown_values`, which causes `bq
+load` to ignore fields in the input data which are not defined in the schema.
+When `generate_schema.py` detects an inconsistency in the definition of a
+particular field in the input data, it removes the field from the schema
+definition. Without the `--ignore_unknown_values`, the `bq load` fails when
+the inconsistent data record is read. Another useful flag during development and
+debugging is `--replace` which replaces any existing BigQuery table.
 
 After the BigQuery table is loaded, the schema can be retrieved using:
 
@@ -299,6 +311,10 @@ compatibility rules implemented by **bq load**:
     * we follow the same logic as **bq load** and always infer these as
       `TIMESTAMP`
 
+The BigQuery loader also looks inside strings to determine if they are actually
+INTEGER or FLOAT types instead. Luigi Mori (jtschichold@) added additional logic
+to replicate the type conversion logic used by `bq load` for these strings.
+
 ## Examples
 
 Here is an example of a single JSON data record on the STDIN (the `^D` below
@@ -392,9 +408,10 @@ tested it on:
 * Ubuntu 16.04, Python 3.5.2
 * MacOS 10.13.2, [Python 3.6.4](https://www.python.org/downloads/release/python-364/)
 
-## Author
+## Authors
 
-Created by Brian T. Park (brian@xparks.net).
+* Created by Brian T. Park (brian@xparks.net).
+* Additional type inferrence logic by Luigi Mori (jtschichold@).
 
 ## License
 

From 2a03c91efbc05e27365cc539bd709501cabb191b Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Mon, 17 Dec 2018 10:39:33 -0800
Subject: [PATCH 10/14] Beautify generate_schema.py through yapf3 formatter

---
 bigquery_schema_generator/generate_schema.py | 35 +++++++++++---------
 tests/test_generate_schema.py                | 12 ++++---
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/bigquery_schema_generator/generate_schema.py b/bigquery_schema_generator/generate_schema.py
index 5bb1a79..7a6675b 100755
--- a/bigquery_schema_generator/generate_schema.py
+++ b/bigquery_schema_generator/generate_schema.py
@@ -50,7 +50,8 @@ class SchemaGenerator:
         r'(([+-]\d{1,2}(:\d{1,2})?)|Z)?$')
 
     # Detect a DATE field of the form YYYY-[M]M-[D]D.
-    DATE_MATCHER = re.compile(r'^\d{4}-(?:[1-9]|0[1-9]|1[012])-(?:[1-9]|0[1-9]|[12][0-9]|3[01])$')
+    DATE_MATCHER = re.compile(
+        r'^\d{4}-(?:[1-9]|0[1-9]|1[012])-(?:[1-9]|0[1-9]|[12][0-9]|3[01])$')
 
     # Detect a TIME field of the form [H]H:[M]M:[S]S[.DDDDDD]
     TIME_MATCHER = re.compile(r'^\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})?$')
@@ -139,8 +140,8 @@ def deduce_schema_for_line(self, json_object, schema_map):
             schema_entry = schema_map.get(key)
             try:
                 new_schema_entry = self.get_schema_entry(key, value)
-                merged_schema_entry = self.merge_schema_entry(schema_entry,
-                                                              new_schema_entry)
+                merged_schema_entry = self.merge_schema_entry(
+                    schema_entry, new_schema_entry)
             except Exception as e:
                 self.log_error(str(e))
                 continue
@@ -203,8 +204,8 @@ def merge_schema_entry(self, old_schema_entry, new_schema_entry):
             elif old_mode == 'REPEATED' and new_mode == 'NULLABLE':
                 # TODO: Maybe remove this warning output. It was helpful during
                 # development, but maybe it's just natural.
-                self.log_error('Leaving schema for "%s" as REPEATED RECORD' %
-                               old_name)
+                self.log_error(
+                    'Leaving schema for "%s" as REPEATED RECORD' % old_name)
 
             # RECORD type needs a recursive merging of sub-fields. We merge into
             # the 'old_schema_entry' which assumes that the 'old_schema_entry'
@@ -244,6 +245,7 @@ def get_schema_entry(self, key, value):
         """
         value_mode, value_type = self.infer_bigquery_type(value)
 
+        # yapf: disable
         if value_type == 'RECORD':
             # recursively figure out the RECORD
             fields = OrderedDict()
@@ -288,6 +290,7 @@ def get_schema_entry(self, key, value):
                                             ('name', key),
                                             ('type', value_type),
                                         ]))])
+        # yapf: enable
         return schema_entry
 
     def infer_bigquery_type(self, node_value):
@@ -304,8 +307,8 @@ def infer_bigquery_type(self, node_value):
         array_type = self.infer_array_type(node_value)
         if not array_type:
             raise Exception(
-                "All array elements must be the same compatible type: %s"
-                % node_value)
+                "All array elements must be the same compatible type: %s" %
+                node_value)
 
         # Disallow array of special types (with '__' not supported).
         # EXCEPTION: allow (REPEATED __empty_record) ([{}]) because it is
@@ -331,11 +334,11 @@ def infer_value_type(self, value):
             elif self.TIME_MATCHER.match(value):
                 return 'TIME'
             elif self.INTEGER_MATCHER.match(value):
-                return 'QINTEGER' # quoted integer
+                return 'QINTEGER'  # quoted integer
             elif self.FLOAT_MATCHER.match(value):
-                return 'QFLOAT' # quoted float
+                return 'QFLOAT'  # quoted float
             elif value.lower() in ['true', 'false']:
-                return 'QBOOLEAN' # quoted boolean
+                return 'QBOOLEAN'  # quoted boolean
             else:
                 return 'STRING'
         # Python 'bool' is a subclass of 'int' so we must check it first
@@ -485,8 +488,9 @@ def convert_type(atype, btype):
 def is_string_type(thetype):
     """Returns true if the type is one of: STRING, TIMESTAMP, DATE, or
     TIME."""
-    return thetype in ['STRING', 'TIMESTAMP', 'DATE', 'TIME',
-        'QINTEGER', 'QFLOAT', 'QBOOLEAN']
+    return thetype in [
+        'STRING', 'TIMESTAMP', 'DATE', 'TIME', 'QINTEGER', 'QFLOAT', 'QBOOLEAN'
+    ]
 
 
 def flatten_schema_map(schema_map, keep_nulls=False):
@@ -496,8 +500,8 @@ def flatten_schema_map(schema_map, keep_nulls=False):
     data.
     """
     if not isinstance(schema_map, dict):
-        raise Exception("Unexpected type '%s' for schema_map" %
-                        type(schema_map))
+        raise Exception(
+            "Unexpected type '%s' for schema_map" % type(schema_map))
 
     # Build the BigQuery schema from the internal 'schema_map'.
     schema = []
@@ -575,7 +579,8 @@ def main():
         default=1000)
     parser.add_argument(
         '--debugging_map',
-        help='Print the metadata schema_map instead of the schema for debugging',
+        help=
+        'Print the metadata schema_map instead of the schema for debugging',
         action="store_true")
     args = parser.parse_args()
 
diff --git a/tests/test_generate_schema.py b/tests/test_generate_schema.py
index 03ee8c8..54988d2 100755
--- a/tests/test_generate_schema.py
+++ b/tests/test_generate_schema.py
@@ -195,8 +195,9 @@ def test_infer_array_type(self):
         self.assertEqual('FLOAT', generator.infer_array_type([1.0, 2.0]))
         self.assertEqual('BOOLEAN', generator.infer_array_type([True, False]))
         self.assertEqual('STRING', generator.infer_array_type(['a', 'b']))
-        self.assertEqual(
-            'DATE', generator.infer_array_type(['2018-02-09', '2018-02-10']))
+        self.assertEqual('DATE',
+                         generator.infer_array_type(
+                             ['2018-02-09', '2018-02-10']))
         self.assertEqual('TIME',
                          generator.infer_array_type(['10:44:00', '10:44:01']))
         self.assertEqual('TIMESTAMP',
@@ -210,8 +211,9 @@ def test_infer_array_type(self):
         self.assertEqual('__empty_array__', generator.infer_array_type([[]]))
 
         # Mixed TIME, DATE, TIMESTAMP converts to STRING
-        self.assertEqual(
-            'STRING', generator.infer_array_type(['2018-02-09', '10:44:00']))
+        self.assertEqual('STRING',
+                         generator.infer_array_type(['2018-02-09',
+                                                     '10:44:00']))
         self.assertEqual('STRING',
                          generator.infer_array_type(
                              ['2018-02-09T11:00:00', '10:44:00']))
@@ -328,6 +330,7 @@ def test_is_string_type(self):
         self.assertTrue(is_string_type('TIME'))
 
     def test_sort_schema(self):
+        # yapf: disable
         unsorted = [{
             "mode": "REPEATED",
             "name": "a",
@@ -347,7 +350,6 @@ def test_sort_schema(self):
             "type": "STRING"
         }]
 
-        # yapf: disable
         expected = [
             OrderedDict([
                 ("mode", "REPEATED"),

From ec6febb9728f465967f28688e7f15ec685900676 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Mon, 17 Dec 2018 10:40:04 -0800
Subject: [PATCH 11/14] Update comments in testdata.txt for consistency

---
 tests/testdata.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/testdata.txt b/tests/testdata.txt
index a03cad3..df75626 100644
--- a/tests/testdata.txt
+++ b/tests/testdata.txt
@@ -527,7 +527,7 @@ SCHEMA
 ]
 END
 
-# mixed [Q]INTEGER, [Q]FLOAT
+# [Q]INTEGER + [Q]FLOAT -> FLOAT
 DATA
 { "qf_i" : "1.0", "qi_f": "2" }
 { "qf_i" : 1.1, "qi_f": 2.1 }
@@ -546,7 +546,7 @@ SCHEMA
 ]
 END
 
-# From STRING to [QINTEGER, QFLOAT, QBOOLEAN] = STRING
+# STRING + [QINTEGER, QFLOAT, QBOOLEAN] -> STRING
 DATA
 { "qi" : "foo", "qf": "bar", "qb": "foo2" }
 { "qi" : "2", "qf": "1.1", "qb": "True" }
@@ -623,7 +623,7 @@ SCHEMA
 ]
 END
 
-# DATE, TIME, DATETIME + [QINTEGER, QFLOAT, QBOOLEAN] => STRING
+# DATE, TIME, DATETIME + [QINTEGER, QFLOAT, QBOOLEAN] -> STRING
 DATA
 { "qd" : "2018-12-07", "qt": "21:52:00", "qdt": "2018-12-07T21:52:00-08:00" }
 { "qd" : "1", "qt": "1.1", "qdt": "true" }

From 063505a840b15abddaf5089d8f4e538e62fb35da Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Mon, 17 Dec 2018 10:55:39 -0800
Subject: [PATCH 12/14] README.md: Add examples of QBOOLEAN, QINTEGER, and
 QFLOAT types

---
 README.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index fed1749..9b03283 100644
--- a/README.md
+++ b/README.md
@@ -311,9 +311,11 @@ compatibility rules implemented by **bq load**:
     * we follow the same logic as **bq load** and always infer these as
       `TIMESTAMP`
 
-The BigQuery loader also looks inside strings to determine if they are actually
-INTEGER or FLOAT types instead. Luigi Mori (jtschichold@) added additional logic
-to replicate the type conversion logic used by `bq load` for these strings.
+The BigQuery loader looks inside string values to determine if they are actually
+BOOLEAN, INTEGER or FLOAT types instead. In other words, `"True"` is considered
+a BOOLEAN type, `"1"` is considered an INTEGER type, and `"2.1"` is consiered a
+FLOAT type. Luigi Mori (jtschichold@) added additional logic to replicate the
+type conversion logic used by `bq load` for these strings.
 
 ## Examples
 
@@ -403,6 +405,7 @@ took 77s on a Dell Precision M4700 laptop with an Intel Core i7-3840QM CPU @
 This project was initially developed on Ubuntu 17.04 using Python 3.5.3. I have
 tested it on:
 
+* Ubuntu 18.04, Python 3.6.7
 * Ubuntu 17.10, Python 3.6.3
 * Ubuntu 17.04, Python 3.5.3
 * Ubuntu 16.04, Python 3.5.2

From 1f4679fbe46c86b64a57fcfbefdca112bb635b94 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Mon, 17 Dec 2018 10:57:53 -0800
Subject: [PATCH 13/14] CHANGELOG.md: change to 4-space indents for MD files
 per GitHub guide

---
 CHANGELOG.md | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f74ae59..e6dc34b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,29 +1,31 @@
 # Changelog
 
 * 0.2.1 (2018-07-18)
-  * Add `anonymizer.py` script to create anonymized data files for benchmarking.
-  * Add benchmark numbers to README.md.
-  * Add `DEVELOPER.md` file to record how to upload to PyPI.
-  * Fix some minor warnings from pylint3.
+    * Add `anonymizer.py` script to create anonymized data files for
+      benchmarking.
+    * Add benchmark numbers to README.md.
+    * Add `DEVELOPER.md` file to record how to upload to PyPI.
+    * Fix some minor warnings from pylint3.
 * 0.2.0 (2018-02-10)
-  * Add support for `DATE` and `TIME` types.
-  * Update type conversion rules to be more compatible with **bq load**.
-    * Allow `DATE`, `TIME` and `TIMESTAMP` to gracefully degrade to `STRING`.
-    * Allow type conversions of elements within arrays
-      (e.g. array of `INTEGER` and `FLOAT`, or array of mixed `DATE`, `TIME`, or
-      `TIMESTAMP` elements).
-    * Better detection of invalid values (e.g. arrays of arrays).
+    * Add support for `DATE` and `TIME` types.
+    * Update type conversion rules to be more compatible with **bq load**.
+        * Allow `DATE`, `TIME` and `TIMESTAMP` to gracefully degrade to
+          `STRING`.
+        * Allow type conversions of elements within arrays
+          (e.g. array of `INTEGER` and `FLOAT`, or array of mixed `DATE`,
+          `TIME`, or `TIMESTAMP` elements).
+        * Better detection of invalid values (e.g. arrays of arrays).
 * 0.1.6 (2018-01-26)
-  * Pass along command line arguments to `generate-schema`.
+    * Pass along command line arguments to `generate-schema`.
 * 0.1.5 (2018-01-25)
-  * Updated installation instructions for MacOS.
+    * Updated installation instructions for MacOS.
 * 0.1.4 (2018-01-23)
-  * Attempt #3 to fix exception during pip3 install.
+    * Attempt #3 to fix exception during pip3 install.
 * 0.1.3 (2018-01-23)
-  * Attempt #2 to fix exception during pip3 install.
+    * Attempt #2 to fix exception during pip3 install.
 * 0.1.2 (2018-01-23)
-  * Attemp to fix exception during pip3 install. Didn't work. Pulled.
+    * Attemp to fix exception during pip3 install. Didn't work. Pulled.
 * 0.1.1 (2018-01-03)
-  * Install `generate-schema` script in `/usr/local/bin`
+    * Install `generate-schema` script in `/usr/local/bin`
 * 0.1 (2018-01-02)
-  * Iniitial release to PyPI.
+    * Iniitial release to PyPI.

From 1678335842d7536464d0fa113ca3d9cfd6ff1745 Mon Sep 17 00:00:00 2001
From: Brian Park <brian@xparks.net>
Date: Mon, 17 Dec 2018 10:58:33 -0800
Subject: [PATCH 14/14] Bump version to 0.3

---
 CHANGELOG.md | 6 ++++++
 README.md    | 2 +-
 setup.py     | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e6dc34b..b203f02 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+* 0.3 (2018-12-17)
+    * Tighten TIMESTAMP and DATE validation (thanks jtschichold@).
+    * Inspect the internals of STRING values to infer BOOLEAN, INTEGER or FLOAT
+      types (thanks jtschichold@).
+    * Handle conversion of these string types when mixed with their non-quoted
+      equivalents, matching the conversion logic followed by 'bq load'.
 * 0.2.1 (2018-07-18)
     * Add `anonymizer.py` script to create anonymized data files for
       benchmarking.
diff --git a/README.md b/README.md
index 9b03283..28c7c4c 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ Usage:
 $ generate-schema < file.data.json > file.schema.json
 ```
 
-Version: 0.2.1 (2018-07-18)
+Version: 0.3 (2018-12-17)
 
 ## Background
 
diff --git a/setup.py b/setup.py
index f230034..4541557 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
         long_description = 'BigQuery schema generator.'
 
 setup(name='bigquery-schema-generator',
-      version='0.2.1',
+      version='0.3',
       description='BigQuery schema generator',
       long_description=long_description,
       url='https://github.com/bxparks/bigquery-schema-generator',