Skip to content
Browse files

Added the git URLs to download the non-NGS and MySQL is able to built…

… the schema and indexes from non-NGS.
  • Loading branch information...
1 parent bd21d3b commit c6611967e8d2efc1b43656bf0f75c5dcdd02ee40 Elliot Chance committed
Showing with 160 additions and 87 deletions.
  1. +2 −0 .gitignore
  2. +9 −0 backend/example.pl
  3. +18 −2 backend/mysql.pl
  4. +50 −0 backend/postgresql.pl
  5. +18 −6 settings.pl
  6. +63 −79 src/functions.pl
View
2 .gitignore
@@ -1,2 +1,4 @@
.AppleDouble*
:*
+cloc.pl
+sloc.sh
View
9 backend/example.pl
@@ -89,6 +89,15 @@ sub backend_NAME_load_data {
}
+# mbz_load_pending($id)
+# Load Pending and PendingData from the downaloded replication into the respective tables. This
+# function is different to mbz_load_data that loads the raw mbdump/ whole tables.
+# @param $id The current replication number. See mbz_get_current_replication().
+# @return Always 1.
+sub backend_NAME_load_pending {
+}
+
+
# mbz_create_extra_tables()
# The mbzdb plugins use a basic key-value table to hold information such as settings.
# @see mbz_set_key(), mbz_get_key().
View
20 backend/mysql.pl
@@ -62,6 +62,7 @@ sub backend_mysql_update_schema {
# do not create the table if it already exists
if(!mbz_table_exists($table)) {
$stmt = "CREATE TABLE `$table` (dummycolumn int)";
+ $stmt .= " engine=$g_mysql_engine" if($g_mysql_engine ne '');
$stmt .= " tablespace $g_tablespace" if($g_tablespace ne '');
}
} elsif(substr($line, 0, 1) eq " " || substr($line, 0, 1) eq "\t") {
@@ -160,6 +161,10 @@ sub backend_mysql_update_index {
$pos_on - 3));
my $cols = substr($line, index($line, '(') + 1, index($line, ')') - index($line, '(') - 1);
+ # PostgreSQL will put double-quotes around some entity names, we have to remove these
+ $index_name = mbz_remove_quotes($index_name);
+ $table_name = mbz_remove_quotes($table_name);
+
# see if the index aleady exists, if so skip
next if(mbz_index_exists($index_name));
@@ -168,9 +173,9 @@ sub backend_mysql_update_index {
my @columns = split(",", $cols);
for(my $i = 0; $i < @columns; ++$i) {
if(backend_mysql_get_column_type($table_name, mbz_trim($columns[$i])) eq 'text') {
- $columns[$i] = "`" . mbz_trim($columns[$i]) . "`(32)";
+ $columns[$i] = "`" . mbz_trim(mbz_remove_quotes($columns[$i])) . "`(32)";
} else {
- $columns[$i] = "`" . mbz_trim($columns[$i]) . "`";
+ $columns[$i] = "`" . mbz_trim(mbz_remove_quotes($columns[$i])) . "`";
}
}
@@ -270,10 +275,21 @@ sub backend_mysql_create_extra_tables {
"name varchar(255) not null primary key," .
"value text" .
")";
+ $sql .= " engine=$g_mysql_engine" if($g_mysql_engine ne '');
$sql .= " tablespace $g_tablespace" if($g_tablespace ne "");
return mbz_do_sql($sql);
}
+# mbz_load_pending($id)
+# Load Pending and PendingData from the downaloded replication into the respective tables. This
+# function is different to mbz_load_data that loads the raw mbdump/ whole tables.
+# @param $id The current replication number. See mbz_get_current_replication().
+# @return Always 1.
+sub backend_mysql_load_pending {
+ return 1;
+}
+
+
# be nice
return 1;
View
50 backend/postgresql.pl
@@ -252,5 +252,55 @@ sub backend_postgresql_index_exists {
}
+# mbz_load_pending($id)
+# Load Pending and PendingData from the downaloded replication into the respective tables. This
+# function is different to mbz_load_data that loads the raw mbdump/ whole tables.
+# @param $id The current replication number. See mbz_get_current_replication().
+# @return Always 1.
+sub backend_postgresql_load_pending {
+ $id = $_[0];
+
+ # make sure there are no pending transactions before cleanup
+ $temp = $dbh->prepare("SELECT count(1) FROM $g_pending");
+ $temp->execute;
+ @row = $temp->fetchrow_array();
+ $temp->finish;
+ return -1 if($row[0] ne '0');
+
+ # perform cleanup (makes sure there no left over records in the PendingData table)
+ $dbh->do("DELETE FROM $g_pending");
+
+ # load Pending and PendingData
+ print localtime() . ": Loading pending tables... ";
+
+ open(TABLEDUMP, "replication/$id/mbdump/Pending")
+ or warn("Error: cannot open file 'replication/$id/mbdump/Pending'\n");
+ $dbh->do("COPY $g_pending FROM STDIN");
+ while($readline = <TABLEDUMP>) {
+ $dbh->pg_putcopydata($readline);
+ }
+ close(TABLEDUMP);
+ $dbh->pg_putcopyend();
+
+ open(TABLEDUMP, "replication/$id/mbdump/PendingData")
+ or warn("Error: cannot open file 'replication/$id/mbdump/PendingData'\n");
+ $dbh->do("COPY $g_pendingdata FROM STDIN");
+ while($readline = <TABLEDUMP>) {
+ $dbh->pg_putcopydata($readline);
+ }
+ close(TABLEDUMP);
+ $dbh->pg_putcopyend();
+
+ print "Done\n";
+
+ # PLUGIN_beforereplication()
+ foreach my $plugin (@g_active_plugins) {
+ eval($plugin . "_beforereplication($id)") or warn($!);
+ }
+
+ return 1;
+}
+
+
# be nice
return 1;
View
24 settings.pl
@@ -16,10 +16,10 @@
$g_db_pass = 'abcd1234';
# The name of the database to use.
-$g_db_name = 'mbzdb_ngs';
+$g_db_name = 'mbzdb_old';
# Use NGS (Next Generation Schema)?
-$g_use_ngs = 1;
+$g_use_ngs = 0;
#############################
@@ -30,6 +30,10 @@
# with contrib/cube installed. So if you are unsure leave this as 0.
$g_contrib_cube = 0;
+# The engine to use when creating tables with MySQL. Set this to "" if you want to use the MySQL
+# default storage engine.
+$g_mysql_engine = 'InnoDB';
+
# Server host, use 'localhost' if the database is on the same server as this script.
$g_db_host = 'localhost';
@@ -51,10 +55,18 @@
# Schema. This is where the SQL scripts to create the schema come from, only edit this if you know
# what you're doing.
$schema_base = 'http://git.musicbrainz.org/gitweb/?p=musicbrainz-server/core.git;a=blob_plain';
-$g_schema_url = "$schema_base;f=admin/sql/CreateTables.sql;hb=master";
-$g_index_url = "$schema_base;f=admin/sql/CreateIndexes.sql;hb=master";
-$g_pk_url = "$schema_base;f=admin/sql/CreatePrimaryKeys.sql;hb=master";
-$g_func_url = "$schema_base;f=admin/sql/CreateFunctions.sql;hb=master";
+$hb = ($g_use_ngs ? 'master' : '6b70f50c57401fc07140dcbb242550b7e5ebfa31');
+$g_schema_url = "$schema_base;f=admin/sql/CreateTables.sql;hb=$hb";
+$g_index_url = "$schema_base;f=admin/sql/CreateIndexes.sql;hb=$hb";
+$g_pk_url = "$schema_base;f=admin/sql/CreatePrimaryKeys.sql;hb=$hb";
+$g_func_url = "$schema_base;f=admin/sql/CreateFunctions.sql;hb=$hb";
+
+# Replications URLs
+# TODO: add the URLs for NGS when they are available.
+if($g_use_ngs) {
+} else {
+ $g_rep_url = "ftp://ftp.musicbrainz.org/pub/musicbrainz/data/replication";
+}
# Kill the update script if a duplicate error (i.e. a duplicate unique key) occurs. It is
# recommended you leave this at 0.
View
142 src/functions.pl
@@ -162,20 +162,24 @@ sub mbz_get_key {
sub mbz_raw_download {
print "Logging into MusicBrainz FTP...\n";
- # find out the latest NGS
- my $latest = "";
- my $host = 'ftp.musicbrainz.org';
- my $ftp = Net::FTP->new($host, Timeout => 60)
- or die "Cannot contact $host: $!";
- $ftp->login('anonymous') or die "Can't login ($host): " . $ftp->message;
- $ftp->cwd('/pub/musicbrainz/data/ngs/')
- or die "Can't change directory ($host): " . $ftp->message;
- my @ls = $ftp->ls('-lr');
- my @parts = split(' ', $ls[0]);
- $latest = pop(@parts);
- print "The latest is mbdump is '$latest'\n";
- $ftp->cwd("/pub/musicbrainz/data/ngs/$latest")
- or die "Can't change directory (ftp.musicbrainz.org): " . $ftp->message;
+ if($g_use_ngs) {
+ # find out the latest NGS
+ my $latest = "";
+ my $host = 'ftp.musicbrainz.org';
+ my $ftp = Net::FTP->new($host, Timeout => 60)
+ or die "Cannot contact $host: $!";
+ $ftp->login('anonymous') or die "Can't login ($host): " . $ftp->message;
+ $ftp->cwd('/pub/musicbrainz/data/ngs/')
+ or die "Can't change directory ($host): " . $ftp->message;
+ my @ls = $ftp->ls('-lr');
+ my @parts = split(' ', $ls[0]);
+ $latest = pop(@parts);
+ print "The latest is mbdump is '$latest'\n";
+ $ftp->cwd("/pub/musicbrainz/data/ngs/$latest")
+ or die "Can't change directory (ftp.musicbrainz.org): " . $ftp->message;
+ } else {
+ # TODO: get non-NGS downloads.
+ }
# these are the files we need to download, there is more but their not required.
my @files = (
@@ -320,6 +324,16 @@ sub mbz_update_index {
}
+# mbz_load_pending()
+# This subroutine is just a controller that redirects to the load pending for the RDBMS we are
+# using.
+# @return Passthru from backend_DB_load_pending().
+sub mbz_load_pending {
+ # use the subroutine appropriate for the RDBMS
+ return eval("backend_$g_db_rdbms" . "_load_pending();");
+}
+
+
# mbz_update_index()
# This subroutine is just a controller that redirects to the index exists for the RDBMS we are
# using.
@@ -427,11 +441,11 @@ sub mbz_unpack_data {
$packed = substr($packed, $+[0]);
- if (defined $v) {
+ if(defined($v)) {
my $t = '';
- while (length $v) {
- $t .= "\\", next if $v =~ s/\A\\\\//;
- $t .= "'", next if $v =~ s/\A\\'// or $v =~ s/\A''//;
+ while(length($v)) {
+ $t .= "\\", next if($v =~ s/\A\\\\//);
+ $t .= "'", next if($v =~ s/\A\\'// or $v =~ s/\A''//);
$t .= substr($v, 0, 1, '');
}
$v = $t;
@@ -529,12 +543,35 @@ sub mbz_get_count {
# mbz_run_transactions()
-# TODO: fix description
-# PLEASE NOTE: Each XID is a transaction, however for this function we run the replication
-# statements inderpendantly in case the user is not using the InnoDB engine.
+#
+# The replications work by first loading a Pending and PendingData table. Each Pending record is
+# a single replication action that joins to one or two records in the PendingData table. The
+# PendingData table for any given replication record will have a raw data record and key record
+# which is indicated by IsKey. The raw data record is used for INSERT and UPDATE as the new data to
+# be inserted whereas the key record is use to specify the columns for the WHERE clause to be used
+# in UPDATE and DELETE statements.
+#
+# Multiple replications are grouped into a single transaction with the XID column. For example a
+# transaction would include the INSERT of a release and all the tracks for that album. The
+# transaction support isn't implemented yet as the data given from MusicBrainz is assumed to be
+# correct because it has already been passed the constraint checks. There may be some benefit to
+# speed if the whole hours replication is wrapped into a single transaction but this can be left
+# for some time in the future. It is however important that the Pending data run in the correct
+# order specified by SeqId.
+#
+# Those wishing to keep the replication data can use the pendinglog plugin which will put all the
+# incoming replications into a separate table that will grow over time. The pendinglog plugin uses
+# a single table that uses one record per one replication item regardless of the replication action
+# taken.
+#
+# This subroutine could possibly be moved to backend specific so that each RDBMS can impose its own
+# optimsed rules however the SQL will always be the same, so for now i'll keep it generic SQL for
+# all backend databases.
+#
+# @note Each XID is a transaction, however for this function we run the replication statements
+# inderpendantly in case the user is not using the InnoDB storage engine with MySQL.
# @return Always 1.
sub mbz_run_transactions {
- # TODO: some of this is database specific but dont move the whole subroutine, thats too messy.
my $rep_handle = $dbh->prepare("select * from $g_pending left join $g_pendingdata ".
"on $g_pending.\"SeqId\"=$g_pendingdata.\"SeqId\" ".
"order by $g_pending.\"SeqId\", \"IsKey\" desc");
@@ -616,56 +653,6 @@ sub mbz_run_transactions {
}
-# mbz_load_pending($id)
-# Load Pending and PendingData from the downaloded replciation into the respective tables. This
-# function is different to mbz_load_data that loads the raw mbdump/ whole tables.
-# @param $id The current replication number. See mbz_get_current_replication().
-# @return Always 1.
-sub mbz_load_pending {
- $id = $_[0];
-
- # make sure there are no pending transactions before cleanup
- $temp = $dbh->prepare("SELECT count(1) FROM $g_pending");
- $temp->execute;
- @row = $temp->fetchrow_array();
- $temp->finish;
- return -1 if($row[0] ne '0');
-
- # perform cleanup (makes sure there no left over records in the PendingData table)
- $dbh->do("DELETE FROM $g_pending");
-
- # load Pending and PendingData
- print localtime() . ": Loading pending tables... ";
-
- open(TABLEDUMP, "replication/$id/mbdump/Pending")
- or warn("Error: cannot open file 'replication/$id/mbdump/Pending'\n");
- $dbh->do("COPY $g_pending FROM STDIN");
- while($readline = <TABLEDUMP>) {
- $dbh->pg_putcopydata($readline);
- }
- close(TABLEDUMP);
- $dbh->pg_putcopyend();
-
- open(TABLEDUMP, "replication/$id/mbdump/PendingData")
- or warn("Error: cannot open file 'replication/$id/mbdump/PendingData'\n");
- $dbh->do("COPY $g_pendingdata FROM STDIN");
- while($readline = <TABLEDUMP>) {
- $dbh->pg_putcopydata($readline);
- }
- close(TABLEDUMP);
- $dbh->pg_putcopyend();
-
- print "Done\n";
-
- # PLUGIN_beforereplication()
- foreach my $plugin (@g_active_plugins) {
- eval($plugin . "_beforereplication($id)") or warn($!);
- }
-
- return 1;
-}
-
-
# mbz_unzip_replication($id)
# Unzip downloaded replication.
# @param $id The current replication number. See mbz_get_current_replication().
@@ -742,15 +729,12 @@ sub mbz_download_replication {
my $id = $_[0];
print "===== $id =====\n";
- # its possible the script was exited by the user or a crash during
- # downloading or decompression, for this reason we always download
- # the latest copy.
+ # its possible the script was exited by the user or a crash during downloading or decompression,
+ # for this reason we always download the latest copy.
print localtime() . ": Downloading... ";
$localfile = "replication/replication-$id.tar.bz2";
- $url = "ftp://ftp.musicbrainz.org/pub/musicbrainz/data/replication/replication-$id.tar.bz2";
- $ua = LWP::UserAgent->new();
- $request = HTTP::Request->new('GET', $url);
- $resp = $ua->request($request, $localfile);
+ $url = "$g_rep_url/replication-$id.tar.bz2";
+ my $resp = mbz_download_file($url, $localfile);
$found = 0;
use HTTP::Status qw( RC_OK RC_NOT_FOUND RC_NOT_MODIFIED );

0 comments on commit c661196

Please sign in to comment.
Something went wrong with that request. Please try again.