Permalink
Browse files

unicode: Converted the template output and database I/O interfaces to

understand unicode strings. All tests pass (except for one commented out with
"XFAIL"), but untested with database servers using non-UTF8, non-ASCII on the
server.


git-svn-id: http://code.djangoproject.com/svn/django/branches/unicode@4971 bcc190cf-cafb-0310-a4f2-bffc1f526a37
  • Loading branch information...
1 parent 232b7ac commit b493b7e3cf09eb0df5c460e8ca7b6a934e40e43c @malcolmt malcolmt committed Apr 9, 2007
@@ -81,7 +81,7 @@ def cursor(self):
kwargs = {
'conv': django_conversions,
'charset': 'utf8',
- 'use_unicode': False,
+ 'use_unicode': True,
}
if settings.DATABASE_USER:
kwargs['user'] = settings.DATABASE_USER
@@ -89,6 +89,7 @@ def cursor(self):
'db': settings.DATABASE_NAME,
'passwd': settings.DATABASE_PASSWORD,
'conv': django_conversions,
+ 'use_unicode': True,
}
if settings.DATABASE_HOST.startswith('/'):
kwargs['unix_socket'] = settings.DATABASE_HOST
@@ -101,6 +102,7 @@ def cursor(self):
cursor = self.connection.cursor()
if self.connection.get_server_info() >= '4.1':
cursor.execute("SET NAMES 'utf8'")
+ cursor.execute("SET CHARACTER SET 'utf8'")
else:
cursor = self.connection.cursor()
if settings.DEBUG:
@@ -4,7 +4,9 @@
Requires psycopg 1: http://initd.org/projects/psycopg1
"""
+from django.utils.encoding import smart_str, smart_unicode
from django.db.backends import util
+from django.db.backends.postgresql.encodings import ENCODING_MAP
try:
import psycopg as Database
except ImportError, e:
@@ -20,30 +22,28 @@
# Import copy of _thread_local.py from Python 2.4
from django.utils._threading_local import local
-def smart_basestring(s, charset):
- if isinstance(s, unicode):
- return s.encode(charset)
- return s
-
class UnicodeCursorWrapper(object):
"""
A thin wrapper around psycopg cursors that allows them to accept Unicode
strings as params.
This is necessary because psycopg doesn't apply any DB quoting to
parameters that are Unicode strings. If a param is Unicode, this will
- convert it to a bytestring using DEFAULT_CHARSET before passing it to
- psycopg.
+ convert it to a bytestring using database client's encoding before passing
+ it to psycopg.
+
+ All results retrieved from the database are converted into Unicode strings
+ before being returned to the caller.
"""
def __init__(self, cursor, charset):
self.cursor = cursor
self.charset = charset
def execute(self, sql, params=()):
- return self.cursor.execute(sql, [smart_basestring(p, self.charset) for p in params])
+ return self.cursor.execute(smart_str(sql, self.charset), [smart_str(p, self.charset, True) for p in params])
def executemany(self, sql, param_list):
- new_param_list = [tuple([smart_basestring(p, self.charset) for p in params]) for params in param_list]
+ new_param_list = [tuple([smart_str(p, self.charset) for p in params]) for params in param_list]
return self.cursor.executemany(sql, new_param_list)
def __getattr__(self, attr):
@@ -53,6 +53,7 @@ def __getattr__(self, attr):
return getattr(self.cursor, attr)
postgres_version = None
+client_encoding = None
class DatabaseWrapper(local):
def __init__(self, **kwargs):
@@ -82,11 +83,21 @@ def cursor(self):
cursor = self.connection.cursor()
if set_tz:
cursor.execute("SET TIME ZONE %s", [settings.TIME_ZONE])
- cursor = UnicodeCursorWrapper(cursor, settings.DEFAULT_CHARSET)
+ if not settings.DATABASE_CHARSET:
+ cursor.execute("SHOW client_encoding")
+ encoding = ENCODING_MAP[cursor.fetchone()[0]]
+ else:
+ encoding = settings.DATABASE_CHARSET
+ cursor = UnicodeCursorWrapper(cursor, encoding)
+ global client_encoding
+ if not client_encoding:
+ # We assume the client encoding isn't going to change for random
+ # reasons.
+ client_encoding = encoding
global postgres_version
if not postgres_version:
cursor.execute("SELECT version()")
- postgres_version = [int(val) for val in cursor.fetchone()[0].split()[1].split('.')]
+ postgres_version = [int(val) for val in cursor.fetchone()[0].split()[1].split('.')]
if settings.DEBUG:
return util.CursorDebugWrapper(cursor, self)
return cursor
@@ -148,7 +159,7 @@ def get_random_function_sql():
def get_deferrable_sql():
return " DEFERRABLE INITIALLY DEFERRED"
-
+
def get_fulltext_search_sql(field_name):
raise NotImplementedError
@@ -162,20 +173,21 @@ def get_sql_flush(style, tables, sequences):
"""Return a list of SQL statements required to remove all data from
all tables in the database (without actually removing the tables
themselves) and put the database in an empty 'initial' state
-
- """
+
+ """
if tables:
if postgres_version[0] >= 8 and postgres_version[1] >= 1:
- # Postgres 8.1+ can do 'TRUNCATE x, y, z...;'. In fact, it *has to* in order to be able to
- # truncate tables referenced by a foreign key in any other table. The result is a
- # single SQL TRUNCATE statement.
+ # Postgres 8.1+ can do 'TRUNCATE x, y, z...;'. In fact, it *has to*
+ # in order to be able to truncate tables referenced by a foreign
+ # key in any other table. The result is a single SQL TRUNCATE
+ # statement.
sql = ['%s %s;' % \
(style.SQL_KEYWORD('TRUNCATE'),
style.SQL_FIELD(', '.join([quote_name(table) for table in tables]))
)]
else:
- # Older versions of Postgres can't do TRUNCATE in a single call, so they must use
- # a simple delete.
+ # Older versions of Postgres can't do TRUNCATE in a single call, so
+ # they must use a simple delete.
sql = ['%s %s %s;' % \
(style.SQL_KEYWORD('DELETE'),
style.SQL_KEYWORD('FROM'),
@@ -237,7 +249,15 @@ def get_sql_sequence_reset(style, model_list):
style.SQL_KEYWORD('FROM'),
style.SQL_TABLE(f.m2m_db_table())))
return output
-
+
+def typecast_string(s):
+ """
+ Cast all returned strings to unicode strings.
+ """
+ if not s:
+ return s
+ return smart_unicode(s, client_encoding)
+
# Register these custom typecasts, because Django expects dates/times to be
# in Python's native (standard-library) datetime/time format, whereas psycopg
# use mx.DateTime by default.
@@ -248,6 +268,7 @@ def get_sql_sequence_reset(style, model_list):
Database.register_type(Database.new_type((1083,1266), "TIME", util.typecast_time))
Database.register_type(Database.new_type((1114,1184), "TIMESTAMP", util.typecast_timestamp))
Database.register_type(Database.new_type((16,), "BOOLEAN", util.typecast_boolean))
+Database.register_type(Database.new_type(Database.types[1043].values, 'STRING', typecast_string))
OPERATOR_MAPPING = {
'exact': '= %s',
@@ -0,0 +1,84 @@
+# Mapping between PostgreSQL encodings and Python codec names. This mapping
+# doesn't exist in psycopg, so we have to maintain it by hand (using
+# information from section 21.2.1 in the PostgreSQL manual).
+ENCODING_MAP = {
+ "BIG5": 'big5-tw',
+ "EUC_CN": 'gb2312',
+ "EUC_JP": 'euc_jp',
+ "EUC_KR": 'euc_kr',
+ "GB18030": 'gb18030',
+ "GBK": 'gbk',
+ "ISO_8859_5": 'iso8859_5',
+ "ISO_8859_6": 'iso8859_6',
+ "ISO_8859_7": 'iso8859_7',
+ "ISO_8859_8": 'iso8859_8',
+ "JOHAB": 'johab',
+ "KOI8": 'koi18_r',
+ "KOI18R": 'koi18_r',
+ "LATIN1": 'latin_1',
+ "LATIN2": 'iso8859_2',
+ "LATIN3": 'iso8859_3',
+ "LATIN4": 'iso8859_4',
+ "LATIN5": 'iso8859_9',
+ "LATIN6": 'iso8859_10',
+ "LATIN7": 'iso8859_13',
+ "LATIN8": 'iso8859_14',
+ "LATIN9": 'iso8859_15',
+ "SJIS": 'shift_jis',
+ "SQL_ASCII": 'ascii',
+ "UHC": 'cp949',
+ "UTF8": 'utf-8',
+ "WIN866": 'cp866',
+ "WIN874": 'cp874',
+ "WIN1250": 'cp1250',
+ "WIN1251": 'cp1251',
+ "WIN1252": 'cp1252',
+ "WIN1256": 'cp1256',
+ "WIN1258": 'cp1258',
+
+ # Unsupported (no equivalents in codecs module):
+ # EUC_TW
+ # LATIN10
+}
+# Mapping between PostgreSQL encodings and Python codec names. This mapping
+# doesn't exist in psycopg, so we have to maintain it by hand (using
+# information from section 21.2.1 in the PostgreSQL manual).
+ENCODING_MAP = {
+ "BIG5": 'big5-tw',
+ "EUC_CN": 'gb2312',
+ "EUC_JP": 'euc_jp',
+ "EUC_KR": 'euc_kr',
+ "GB18030": 'gb18030',
+ "GBK": 'gbk',
+ "ISO_8859_5": 'iso8859_5',
+ "ISO_8859_6": 'iso8859_6',
+ "ISO_8859_7": 'iso8859_7',
+ "ISO_8859_8": 'iso8859_8',
+ "JOHAB": 'johab',
+ "KOI8": 'koi18_r',
+ "KOI18R": 'koi18_r',
+ "LATIN1": 'latin_1',
+ "LATIN2": 'iso8859_2',
+ "LATIN3": 'iso8859_3',
+ "LATIN4": 'iso8859_4',
+ "LATIN5": 'iso8859_9',
+ "LATIN6": 'iso8859_10',
+ "LATIN7": 'iso8859_13',
+ "LATIN8": 'iso8859_14',
+ "LATIN9": 'iso8859_15',
+ "SJIS": 'shift_jis',
+ "SQL_ASCII": 'ascii',
+ "UHC": 'cp949',
+ "UTF8": 'utf-8',
+ "WIN866": 'cp866',
+ "WIN874": 'cp874',
+ "WIN1250": 'cp1250',
+ "WIN1251": 'cp1251',
+ "WIN1252": 'cp1252',
+ "WIN1256": 'cp1256',
+ "WIN1258": 'cp1258',
+
+ # Unsupported (no equivalents in codecs module):
+ # EUC_TW
+ # LATIN10
+}
@@ -7,6 +7,7 @@
from django.db.backends import util
try:
import psycopg2 as Database
+ import psycopg2.extensions
except ImportError, e:
from django.core.exceptions import ImproperlyConfigured
raise ImproperlyConfigured, "Error loading psycopg2 module: %s" % e
@@ -20,6 +21,8 @@
# Import copy of _thread_local.py from Python 2.4
from django.utils._threading_local import local
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+
postgres_version = None
class DatabaseWrapper(local):
@@ -47,6 +50,7 @@ def cursor(self):
conn_string += " port=%s" % settings.DATABASE_PORT
self.connection = Database.connect(conn_string, **self.options)
self.connection.set_isolation_level(1) # make transactions transparent to all cursors
+ self.connection.set_client_encoding('UTF8')
cursor = self.connection.cursor()
cursor.tzinfo_factory = None
if set_tz:
@@ -26,14 +26,6 @@
Database.register_converter("timestamp", util.typecast_timestamp)
Database.register_converter("TIMESTAMP", util.typecast_timestamp)
-def utf8rowFactory(cursor, row):
- def utf8(s):
- if type(s) == unicode:
- return s.encode("utf-8")
- else:
- return s
- return [utf8(r) for r in row]
-
try:
# Only exists in Python 2.4+
from threading import local
@@ -60,7 +52,6 @@ def cursor(self):
self.connection.create_function("django_extract", 2, _sqlite_extract)
self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc)
cursor = self.connection.cursor(factory=SQLiteCursorWrapper)
- cursor.row_factory = utf8rowFactory
if settings.DEBUG:
return util.CursorDebugWrapper(cursor, self)
else:
@@ -76,8 +67,9 @@ def _rollback(self):
def close(self):
from django.conf import settings
- # If database is in memory, closing the connection destroys the database.
- # To prevent accidental data loss, ignore close requests on an in-memory db.
+ # If database is in memory, closing the connection destroys the
+ # database. To prevent accidental data loss, ignore close requests on
+ # an in-memory db.
if self.connection is not None and settings.DATABASE_NAME != ":memory:":
self.connection.close()
self.connection = None
@@ -153,10 +145,10 @@ def get_pk_default_value():
return "NULL"
def get_sql_flush(style, tables, sequences):
- """Return a list of SQL statements required to remove all data from
- all tables in the database (without actually removing the tables
- themselves) and put the database in an empty 'initial' state
-
+ """
+ Return a list of SQL statements required to remove all data from all tables
+ in the database (without actually removing the tables themselves) and put
+ the database in an empty 'initial' state.
"""
# NB: The generated SQL below is specific to SQLite
# Note: The DELETE FROM... SQL generated below works for SQLite databases
@@ -174,7 +166,7 @@ def get_sql_sequence_reset(style, model_list):
"Returns a list of the SQL statements to reset sequences for the given models."
# No sequence reset required
return []
-
+
def _sqlite_date_trunc(lookup_type, dt):
try:
dt = util.typecast_timestamp(dt)
@@ -204,3 +196,4 @@ def _sqlite_date_trunc(lookup_type, dt):
'istartswith': "LIKE %s ESCAPE '\\'",
'iendswith': "LIKE %s ESCAPE '\\'",
}
+
Oops, something went wrong.

0 comments on commit b493b7e

Please sign in to comment.