From eb058b6eb526d1b1f486c45cad9858f382200757 Mon Sep 17 00:00:00 2001 From: Nick Pellegrino Date: Mon, 24 Dec 2018 11:33:28 -0500 Subject: [PATCH 1/4] use properties to determine database type --- commcare_export/writers.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index c90ddc75..73ce6ed2 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -269,15 +269,27 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.connection.close() + @property + def is_postgres(self): + return 'postgres' in self.db_url + + @property + def is_mysql(self): + return 'mysql' in self.db_url + + @property + def is_mssql(self): + return 'mssql' in self.db_url + @property def max_column_length(self): - if 'postgres' in self.db_url: + if self.is_postgres: # https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS return 63 - if 'mysql' in self.db_url: + if self.is_mysql: # https://dev.mysql.com/doc/refman/8.0/en/identifiers.html return 64 - if 'mssql' in self.db_url: + if self.is_mssql: # https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017 return 128 raise Exception("Unknown database dialect: {}".format(self.db_url)) From a5a422136b39563f602d7bace23496d0b3885ce5 Mon Sep 17 00:00:00 2001 From: Nick Pellegrino Date: Mon, 24 Dec 2018 11:34:13 -0500 Subject: [PATCH 2/4] never change primary key column type --- commcare_export/writers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 73ce6ed2..30bd989b 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -455,7 +455,7 @@ def get_cols(): self.metadata.clear() self.metadata.reflect() columns = get_cols() - else: + elif not columns[column].primary_key: current_ty = columns[column].type new_type = None if self.strict_types: From c19d9812a22d9c92625f66ea800f2f865c91efaf Mon Sep 17 00:00:00 2001 From: Nick Pellegrino Date: Mon, 24 Dec 2018 11:35:36 -0500 Subject: [PATCH 3/4] for string data, pick a column type appropriate for that database --- commcare_export/writers.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 30bd989b..07478ac4 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -340,14 +340,20 @@ def best_type_for(self, val): if isinstance(val, int): return sqlalchemy.Integer() elif isinstance(val, six.string_types): - # Notes on the conversions between various string types: - # 1. PostgreSQL is the best; you can use TEXT everywhere and it works like a charm. - # 2. MySQL cannot build an index on TEXT due to the lack of a field length, so we - # try to use VARCHAR when possible. - if len(val) < self.MAX_VARCHAR_LEN: # FIXME: Is 255 an interesting cutoff? - return sqlalchemy.Unicode( max(len(val), self.MIN_VARCHAR_LEN), collation=self.collation) - else: + if self.is_postgres: + # PostgreSQL is the best; you can use TEXT everywhere and it works like a charm. return sqlalchemy.UnicodeText(collation=self.collation) + elif self.is_mysql: + # MySQL cannot build an index on TEXT due to the lack of a field length, so we + # try to use VARCHAR when possible. + if len(val) < self.MAX_VARCHAR_LEN: # FIXME: Is 255 an interesting cutoff? + return sqlalchemy.Unicode( max(len(val), self.MIN_VARCHAR_LEN), collation=self.collation) + else: + return sqlalchemy.UnicodeText(collation=self.collation) + elif self.is_mssql: + return sqlalchemy.NVARCHAR(collation=self.collation) + else: + raise Exception("Unknown database dialect: {}".format(self.db_url)) else: # We do not have a name for "bottom" in SQL aka the type whose least upper bound # with any other type is the other type. From 876c46d69fc3aab849260950cfc03020175da0d5 Mon Sep 17 00:00:00 2001 From: Nick Pellegrino Date: Mon, 24 Dec 2018 11:35:59 -0500 Subject: [PATCH 4/4] lint --- commcare_export/writers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/commcare_export/writers.py b/commcare_export/writers.py index 07478ac4..b026431b 100644 --- a/commcare_export/writers.py +++ b/commcare_export/writers.py @@ -346,8 +346,8 @@ def best_type_for(self, val): elif self.is_mysql: # MySQL cannot build an index on TEXT due to the lack of a field length, so we # try to use VARCHAR when possible. - if len(val) < self.MAX_VARCHAR_LEN: # FIXME: Is 255 an interesting cutoff? - return sqlalchemy.Unicode( max(len(val), self.MIN_VARCHAR_LEN), collation=self.collation) + if len(val) < self.MAX_VARCHAR_LEN: # FIXME: Is 255 an interesting cutoff? + return sqlalchemy.Unicode(max(len(val), self.MIN_VARCHAR_LEN), collation=self.collation) else: return sqlalchemy.UnicodeText(collation=self.collation) elif self.is_mssql: