Skip to content

Commit

Permalink
UTF-8 Multibyte (utf8mb4) support
Browse files Browse the repository at this point in the history
Added alterTableCharacterSet method to JDatabaseDriver to allow Joomla! to transparently convert tables to utf8/utf8mb4
  • Loading branch information
Nicholas K. Dionysopoulos committed Jun 13, 2015
1 parent e5b0383 commit e2621e3
Showing 1 changed file with 179 additions and 0 deletions.
179 changes: 179 additions & 0 deletions libraries/joomla/database/driver.php
Expand Up @@ -461,6 +461,98 @@ public function alterDbCharacterSet($dbName)
return $this->execute();
}

/**
* Alter a table's character set, obtaining an array of queries to do so from a protected method. The conversion is
* wrapped in a transaction, if supported by the database driver. Otherwise the table will be locked before the
* conversion. This prevents data corruption.
*
* @param string $tableName The name of the table to alter
* @param boolean $rethrow True to rethrow database exceptions. Default: false (exceptions are suppressed)
*
* @return boolean True if successful
*
* @since CMS 3.5.0
* @throws RuntimeException If the table name is empty
* @throws Exception Relayed from the database layer if a database error occurs and $rethrow == true
*/
public function alterTableCharacterSet($tableName, $rethrow = false)
{
if (is_null($tableName))
{
throw new RuntimeException('Table name must not be null.');
}

$queries = $this->getAlterTableCharacterSet($tableName);

if (empty($queries))
{
return false;
}

$hasTransaction = true;

try
{
$this->transactionStart();
}
catch (Exception $e)
{
$hasTransaction = false;
$this->lockTable($tableName);
}

foreach ($queries as $query)
{
try
{
$this->setQuery($query)->execute();
}
catch (Exception $e)
{
if ($hasTransaction)
{
$this->transactionRollback();
}
else
{
$this->unlockTables();
}

if ($rethrow)
{
throw $e;
}

return false;
}
}

if ($hasTransaction)
{
try
{
$this->transactionCommit();
}
catch (Exception $e)
{
$this->transactionRollback();

if ($rethrow)
{
throw $e;
}

return false;
}
}
else
{
$this->unlockTables();
}

return true;
}

/**
* Connects to the database if needed.
*
Expand Down Expand Up @@ -631,6 +723,93 @@ protected function getAlterDbCharacterSet($dbName)
return 'ALTER DATABASE ' . $this->quoteName($dbName) . ' CHARACTER SET `' . $charset .'`';
}

/**
* Get the query strings to alter the character set and collation of a table.
*
* @param string $tableName The name of the table
*
* @return string[] The queries required to alter the table's character set and collation
*
* @since CMS 3.5.0
*/
public function getAlterTableCharacterSet($tableName)
{
$charset = $this->utf8mb4 ? 'utf8mb4' : 'utf8';
$collation = $charset . '_general_ci';

$quotedTableName = $this->quoteName($tableName);

$queries = array();
$queries[] = "ALTER TABLE $quotedTableName CONVERT TO CHARACTER SET $charset COLLATE $collation";

/**
* We also need to convert each text column, modifying their character set and collation. This allows us to
* change, for example, a utf8_bin collated column to a utf8mb4_bin collated column.
*/
$sql = "SHOW FULL COLUMNS FROM $quotedTableName";
$this->setQuery($sql);
$columns = $this->loadAssocList();
$columnMods = array();

if (is_array($columns))
{
foreach ($columns as $column)
{
// Make sure we are redefining only columns which do support a collation
$col = (object)$column;

if (empty($col->Collation))
{
continue;
}

// Default new collation: utf8_general_ci or utf8mb4_general_ci
$newCollation = $charset . '_general_ci';
$collationParts = explode('_', $col->Collation);

/**
* If the collation is in the form charset_collationType_ci or charset_collationType we have to change
* the charset but leave the collationType intact (e.g. utf8_bin must become utf8mb4_bin, NOT
* utf8mb4_general_ci).
*/
if (count($collationParts) >= 2)
{
$ci = array_pop($collationParts);
$collationType = array_pop($collationParts);
$newCollation = $charset . '_' . $collationType . '_' . $ci;

/**
* When the last part of the old collation is not _ci we have a charset_collationType format,
* something like utf8_bin. Therefore the new collation only has *two* parts.
*/
if ($ci != 'ci')
{
$newCollation = $charset . '_' . $ci;
}
}

// If the old and new collation is the same we don't have to change the collation type
if (strtolower($newCollation) == strtolower($col->Collation))
{
continue;
}

$null = $col->Null == 'YES' ? 'NULL' : 'NOT NULL';
$default = is_null($col->Default) ? '' : "DEFAULT '" . $this->q($col->Default) . "'";
$columnMods[] = "MODIFY COLUMN `{$col->Field}` {$col->Type} CHARACTER SET $charset COLLATE $newCollation $null $default";
}
}

if (count($columnMods))
{
$queries[] = "ALTER TABLE $quotedTableName " .
implode(',', $columnMods) .
" CHARACTER SET $charset COLLATE $collation";
}

return $queries;
}

/**
* Return the query string to create new Database.
* Each database driver, other than MySQL, need to override this member to return correct string.
Expand Down

0 comments on commit e2621e3

Please sign in to comment.