Skip to content

Commit

Permalink
UTF-8 Multibyte (utf8mb4) support
Browse files Browse the repository at this point in the history
Add utf8mb4 connection charset support in MySQL, MySQLi and PDOMySQL drivers
  • Loading branch information
Nicholas K. Dionysopoulos committed Jun 13, 2015
1 parent e692048 commit e3c8bda
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 7 deletions.
19 changes: 19 additions & 0 deletions libraries/joomla/database/driver.php
Expand Up @@ -146,6 +146,12 @@ abstract class JDatabaseDriver extends JDatabase implements JDatabaseInterface
*/
protected $utf = true;

/**
* @var boolean True if the database engine supports UTF-8 Multibyte (utf8mb4) character encoding.
* @since CMS 3.5.0
*/
protected $utf8mb4 = false;

/**
* @var integer The database error number
* @since 11.1
Expand Down Expand Up @@ -971,6 +977,19 @@ public function hasUTFSupport()
return $this->utf;
}

/**
* Determine whether the database engine support the UTF-8 Multibyte (utf8mb4) character encoding. This applies to
* MySQL databases.
*
* @return boolean True if the database engine supports UTF-8 Multibyte.
*
* @since CMS 3.5.0
*/
public function hasUTF8mb4Support()
{
return $this->utf8mb4;
}

/**
* Get the version of the database connector
*
Expand Down
58 changes: 55 additions & 3 deletions libraries/joomla/database/driver/mysql.php
Expand Up @@ -100,8 +100,11 @@ public function connect()
$this->select($this->options['database']);
}

// Set charactersets (needed for MySQL 4.1.2+).
$this->setUTF();
// Pre-populate the UTF-8 Multibyte compatibility flag based on server version
$this->utf8mb4 = $this->serverClaimsUtf8mb4Support();

// Set the character set (needed for MySQL 4.1.2+).
$this->utf = $this->setUTF();

// Turn MySQL profiling ON in debug mode:
if ($this->debug && $this->hasProfiling())
Expand Down Expand Up @@ -380,9 +383,34 @@ public function select($database)
*/
public function setUTF()
{
// If UTF is not supported return false immediately
if (!$this->utf)
{
return false;
}

// Make sure we're connected to the server
$this->connect();

return mysql_set_charset('utf8', $this->connection);
// Which charset should I use, plain utf8 or multibyte utf8mb4?
$charset = $this->utf8mb4 ? 'utf8mb4' : 'utf8';

$result = @mysql_set_charset($charset, $this->connection);

/**
* If I could not set the utf8mb4 charset then the server doesn't support utf8mb4 despite claiming otherwise.
* This happens on old MySQL server versions (less than 5.5.3) using the mysqlnd PHP driver. Since mysqlnd
* masks the server version and reports only its own we can not be sure if the server actually does support
* UTF-8 Multibyte (i.e. it's MySQL 5.5.3 or later). Since the utf8mb4 charset is undefined in this case we
* catch the error and determine that utf8mb4 is not supported!
*/
if (!$result && $this->utf8mb4)
{
$this->utf8mb4 = false;
$result = @mysql_set_charset('utf8', $this->connection);
}

return $result;
}

/**
Expand Down Expand Up @@ -463,4 +491,28 @@ private function hasProfiling()
return false;
}
}

/**
* Does the database server claim to have support for UTF-8 Multibyte (utf8mb4) collation?
*
* libmysql supports utf8mb4 since 5.5.3 (same version as the MySQL server). mysqlnd supports utf8mb4 since 5.0.9.
*
* @return boolean
* @since CMS 3.5.0
*/
private function serverClaimsUtf8mb4Support()
{
$client_version = mysql_get_client_info();

if (strpos($client_version, 'mysqlnd') !== false)
{
$client_version = preg_replace('/^\D+([\d.]+).*/', '$1', $client_version);

return version_compare($client_version, '5.0.9', '>=');
}
else
{
return version_compare($client_version, '5.5.3', '>=');
}
}
}
64 changes: 61 additions & 3 deletions libraries/joomla/database/driver/mysqli.php
Expand Up @@ -25,6 +25,12 @@ class JDatabaseDriverMysqli extends JDatabaseDriver
*/
public $name = 'mysqli';

/**
* @var mysqli The database connection resource.
* @since 11.1
*/
protected $connection;

/**
* The character(s) used to quote SQL statement names such as table names or field names,
* etc. The child classes should define this as necessary. If a single character string the
Expand Down Expand Up @@ -178,8 +184,11 @@ public function connect()
$this->select($this->options['database']);
}

// Set charactersets (needed for MySQL 4.1.2+).
$this->setUTF();
// Pre-populate the UTF-8 Multibyte compatibility flag based on server version
$this->utf8mb4 = $this->serverClaimsUtf8mb4Support();

// Set the character set (needed for MySQL 4.1.2+).
$this->utf = $this->setUTF();

// Turn MySQL profiling ON in debug mode:
if ($this->debug && $this->hasProfiling())
Expand Down Expand Up @@ -670,9 +679,34 @@ public function select($database)
*/
public function setUTF()
{
// If UTF is not supported return false immediately
if (!$this->utf)
{
return false;
}

// Make sure we're connected to the server
$this->connect();

return $this->connection->set_charset('utf8');
// Which charset should I use, plain utf8 or multibyte utf8mb4?
$charset = $this->utf8mb4 ? 'utf8mb4' : 'utf8';

$result = @$this->connection->set_charset($charset);

/**
* If I could not set the utf8mb4 charset then the server doesn't support utf8mb4 despite claiming otherwise.
* This happens on old MySQL server versions (less than 5.5.3) using the mysqlnd PHP driver. Since mysqlnd
* masks the server version and reports only its own we can not be sure if the server actually does support
* UTF-8 Multibyte (i.e. it's MySQL 5.5.3 or later). Since the utf8mb4 charset is undefined in this case we
* catch the error and determine that utf8mb4 is not supported!
*/
if (!$result && $this->utf8mb4)
{
$this->utf8mb4 = false;
$result = @$this->connection->set_charset('utf8');
}

return $result;
}

/**
Expand Down Expand Up @@ -866,4 +900,28 @@ private function hasProfiling()
return false;
}
}

/**
* Does the database server claim to have support for UTF-8 Multibyte (utf8mb4) collation?
*
* libmysql supports utf8mb4 since 5.5.3 (same version as the MySQL server). mysqlnd supports utf8mb4 since 5.0.9.
*
* @return boolean
* @since CMS 3.5.0
*/
private function serverClaimsUtf8mb4Support()
{
$client_version = mysqli_get_client_info();

if (strpos($client_version, 'mysqlnd') !== false)
{
$client_version = preg_replace('/^\D+([\d.]+).*/', '$1', $client_version);

return version_compare($client_version, '5.0.9', '>=');
}
else
{
return version_compare($client_version, '5.5.3', '>=');
}
}
}
6 changes: 6 additions & 0 deletions libraries/joomla/database/driver/pdo.php
Expand Up @@ -25,6 +25,12 @@ abstract class JDatabaseDriverPdo extends JDatabaseDriver
*/
public $name = 'pdo';

/**
* @var PDO The database connection resource.
* @since 12.1
*/
protected $connection;

/**
* The character(s) used to quote SQL statement names such as table names or field names,
* etc. The child classes should define this as necessary. If a single character string the
Expand Down
41 changes: 40 additions & 1 deletion libraries/joomla/database/driver/pdomysql.php
Expand Up @@ -64,10 +64,23 @@ class JDatabaseDriverPdomysql extends JDatabaseDriverPdo
*/
public function __construct($options)
{
/**
* Pre-populate the UTF-8 Multibyte compatibility flag. Unfortuantely PDO won't report the server version
* unless we're connected to it and we cannot connect to it unless we know if it supports utf8mb4 which requires
* us knowing the server version. Between this chicken and egg issue we _assume_ it's supported and we'll just
* catch any problems at connection time.
*/
$this->utf8mb4 = true;

// Get some basic values from the options.
$options['driver'] = 'mysql';
$options['charset'] = (isset($options['charset'])) ? $options['charset'] : 'utf8';

if ($this->utf8mb4 && ($options['charset'] == 'utf8'))
{
$options['charset'] = 'utf8mb4';
}

$this->charset = $options['charset'];

// Finalize initialisation.
Expand All @@ -84,7 +97,33 @@ public function __construct($options)
*/
public function connect()
{
parent::connect();
try
{
// Try to connect to MySQL
parent::connect();
}
catch (\RuntimeException $e)
{
// If the connection failed but not because of the wrong character set bubble up the exception
if (!$this->utf8mb4 || ($this->options['charset'] != 'utf8mb4'))
{
throw $e;
}

/**
* If the connection failed and I was trying to use the utf8mb4 charset then it is likely that the server
* doesn't support utf8mb4 despite claiming otherwise.
*
* This happens on old MySQL server versions (less than 5.5.3) using the mysqlnd PHP driver. Since mysqlnd
* masks the server version and reports only its own we can not be sure if the server actually does support
* UTF-8 Multibyte (i.e. it's MySQL 5.5.3 or later). Since the utf8mb4 charset is undefined in this case we
* catch the error and determine that utf8mb4 is not supported!
*/
$this->utf8mb4 = false;
$this->options['charset'] = 'utf8';

parent::connect();
}

$this->connection->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$this->connection->setAttribute(PDO::ATTR_EMULATE_PREPARES, true);
Expand Down

0 comments on commit e3c8bda

Please sign in to comment.