Skip to content

Commit

Permalink
MySQL now creates all indexes correctly, this replaces the old method…
Browse files Browse the repository at this point in the history
… of trying to guess which columns needs indexing from cardinality
  • Loading branch information
Elliot Chance committed Jun 18, 2010
1 parent 3864746 commit bd21d3b
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 29 deletions.
13 changes: 13 additions & 0 deletions backend/example.pl
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,19 @@ sub backend_NAME_table_column_exists {
}


# mbz_index_exists($index_name)
# Check if an index already exists.
# @param $index_name The name of the index to look for.
# @return 1 if the index exists, otherwise 0.
sub backend_NAME_index_exists {
my $index_name = $_[0];

# your code here

return 0;
}


# mbz_load_data()
# Load the data from the mbdump files into the tables.
sub backend_NAME_load_data {
Expand Down
120 changes: 91 additions & 29 deletions backend/mysql.pl
Original file line number Diff line number Diff line change
Expand Up @@ -114,41 +114,79 @@ sub backend_mysql_update_schema {
}


# We can't always use the CreateIndexes.sql script provided by MusicBrainz because it has
# PostgreSQL specific functions. Instead we use a cardinality calculation to determine the need for
# an index.
sub backend_mysql_update_index {
# go through each table
$sth = $dbh->prepare('show tables');
# backend_mysql_get_column_type($table_name, $col_name)
# Get the MySQL column type.
# @param $table_name The name of the table.
# @param $col_name The name of the column to fetch the type.
# @return MySQL column type.
sub backend_mysql_get_column_type {
my ($table_name, $col_name) = @_;

my $sth = $dbh->prepare("describe `$table_name`");
$sth->execute();
$start = time();
while(@result = $sth->fetchrow_array()) {
next if($result[0] eq $g_pending || $result[0] eq $g_pendingdata);
return $result[1] if($result[0] eq $col_name);
}

return "";
}


# backend_mysql_update_index()
# Attemp to pull as much relevant information from CreateIndexes.sql as we can. MySQL does not
# support function indexes so we will skip those. Any indexes created already on the database will
# be left intact.
# @return Always 1.
sub backend_mysql_update_index {
open(SQL, "temp/CreateIndexes.sql");
chomp(my @lines = <SQL>);

foreach my $line (@lines) {
$line = mbz_trim($line);
my $pos_index = index($line, 'INDEX ');
my $pos_on = index($line, 'ON ');

print "Indexing $result[0]\n";
$sth2 = $dbh->prepare("\\d \"" . $result[0] . "\"");
$sth2->execute();
while(@result2 = $sth2->fetchrow_array()) {
$start2 = time();
if($result2[3] eq "" && $result2[1] ne "text") {
print " Calculating cardinality of $result2[0]... ";
$sth_card = $dbh->prepare("select count(1)/(select count(1) from \"$result[0]\") ".
"from (select distinct \"$result2[0]\" from \"$result[0]\") as t");
$sth_card->execute();
my @card = $sth_card->fetchrow_array();
if($card[0] >= 0.01) {
print "$card[0] (Yes)\n";
print " Adding index $result[0].$result2[0]...";
mbz_do_sql("create index $result[0]_" . $result2[0] .
" on \"$result[0]\"(\"$result2[0]\")");
print " Done (", mbz_format_time(time() - $start2), ", ",
mbz_format_time(time() - $start), " total)\n";
} else {
print "$card[0] (No)\n";
}
# skip blank lines, comments, psql settings and lines that arn't any use to us.
next if($line eq '' || substr($line, 0, 2) eq '--' || substr($line, 0, 1) eq "\\" ||
$pos_index < 0);

# skip function-based indexes.
next if($line =~ /.*\(.*\(.*\)\)/);

# get the names
my $index_name = mbz_trim(substr($line, $pos_index + 6, index($line, ' ', $pos_index + 7) -
$pos_index - 6));
my $table_name = mbz_trim(substr($line, $pos_on + 3, index($line, ' ', $pos_on + 4) -
$pos_on - 3));
my $cols = substr($line, index($line, '(') + 1, index($line, ')') - index($line, '(') - 1);

# see if the index aleady exists, if so skip
next if(mbz_index_exists($index_name));

# split and clean column names. this is also a good time to find out there type, if its
# TEXT then MySQL requires and index length.
my @columns = split(",", $cols);
for(my $i = 0; $i < @columns; ++$i) {
if(backend_mysql_get_column_type($table_name, mbz_trim($columns[$i])) eq 'text') {
$columns[$i] = "`" . mbz_trim($columns[$i]) . "`(32)";
} else {
$columns[$i] = "`" . mbz_trim($columns[$i]) . "`";
}
}

# now we construct the index back together in case there was changes along the way
$new_line = substr($line, 0, $pos_index) . "INDEX `$index_name` ON `$table_name` (";
$new_line .= join(",", @columns) . ")";

# all looks good so far ... create the index
mbz_do_sql($new_line);

print "$new_line\n";
}

close(SQL);
exit(0);
return 1;
}


Expand All @@ -170,6 +208,30 @@ sub backend_mysql_table_exists {
}


# mbz_index_exists($index_name)
# Check if an index already exists.
# @param $index_name The name of the index to look for.
# @return 1 if the index exists, otherwise 0.
sub backend_mysql_index_exists {
my $index_name = $_[0];

# yes I know this is a highly inefficent way to do it, but its simple and is only called on
# schema changes.
my $sth = $dbh->prepare("show tables");
$sth->execute();
while(@result = $sth->fetchrow_array()) {
my $sth2 = $dbh->prepare("show indexes from `$result[0]`");
$sth2->execute();
while(@result2 = $sth2->fetchrow_array()) {
return 1 if($result2[2] eq $index_name);
}
}

# the index was not found
return 0;
}


# mbz_table_column_exists($table_name, $col_name)
# Check if a table already has a column.
# @param $table_name The name of the table to look for.
Expand Down
13 changes: 13 additions & 0 deletions backend/postgresql.pl
Original file line number Diff line number Diff line change
Expand Up @@ -239,5 +239,18 @@ sub backend_postgresql_create_extra_tables {
}


# mbz_index_exists($index_name)
# Check if an index already exists.
# @param $index_name The name of the index to look for.
# @return 1 if the index exists, otherwise 0.
sub backend_postgresql_index_exists {
my $index_name = $_[0];

# TODO: incomplete

return 0;
}


# be nice
return 1;
12 changes: 12 additions & 0 deletions src/functions.pl
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,18 @@ sub mbz_update_index {
}


# mbz_update_index()
# This subroutine is just a controller that redirects to the index exists for the RDBMS we are
# using.
# @param $index_name The name of the index to look for.
# @return Passthru from backend_DB_index_exists().
sub mbz_index_exists {
# use the subroutine appropriate for the RDBMS
my $index_name = $_[0];
return eval("backend_$g_db_rdbms" . "_index_exists(\"$index_name\");");
}


# mbz_create_extra_tables()
# This subroutine is just a controller that redirects to the create extra tables for the RDBMS we
# are using.
Expand Down

0 comments on commit bd21d3b

Please sign in to comment.