forked from elliotchance/mbzdb
/
postgresql.pl
executable file
·338 lines (282 loc) · 11.2 KB
/
postgresql.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# mbz_connect()
# Make database connection. It will set the global $dbh and it will return it.
# $g_db_name, $g_db_host, $g_db_port, $g_db_user and $g_db_pass are supplied by settings.pl.
# @return $dbh
sub backend_postgresql_connect {
$dbh = DBI->connect("dbi:Pg:dbname=$g_db_name;host=$g_db_host;port=$g_db_port",
$g_db_user, $g_db_pass);
return $dbh;
}
# mbz_update_index()
# @return Always 1.
sub backend_postgresql_update_index {
print $L{'downloadschema'};
mbz_download_schema();
print $L{'done'} . "\n";
# we attempt to create language, load all the native functions and indexes. If the create
# language or functions fail they will ultimatly be skipped.
# for PostgreSQL we need to try CREATE LANGUAGE
backend_postgresql_create_plpgsql() if($g_db_rdbms eq 'postgresql');
open(SQL, "replication/CreateFunctions.sql");
chomp(my @lines = <SQL>);
my $full = "";
foreach my $line (@lines) {
# skip blank lines and single bracket lines
next if($line eq "" || substr($line, 0, 2) eq "--" || substr($line, 0, 1) eq "\\" ||
$line eq "BEGIN;" || $line eq "COMMIT;");
$full .= "$line\n";
if(index($line, 'plpgsql') > 0) {
#print "$full\n";
mbz_do_sql("begin");
mbz_do_sql($full, 'nodie');
mbz_do_sql("commit");
$full = "";
}
}
close(SQL);
open(SQL, "replication/CreateIndexes.sql");
chomp(my @lines = <SQL>);
foreach my $line (@lines) {
# skip blank lines and single bracket lines
next if($line eq "" || substr($line, 0, 2) eq "--" || substr($line, 0, 1) eq "\\" ||
substr($line, 0, 5) eq "BEGIN");
# no need to create the index if it already exists
my $pos_index = index($line, 'INDEX ');
my $index_name = mbz_trim(substr($line, $pos_index + 6, index($line, ' ', $pos_index + 7) -
$pos_index - 6));
next if(backend_postgresql_index_exists($index_name));
print "$line\n";
mbz_do_sql($line, 'nodie');
}
close(SQL);
open(SQL, "replication/CreatePrimaryKeys.sql");
chomp(my @lines = <SQL>);
foreach my $line (@lines) {
# skip blank lines and single bracket lines
next if($line eq "" || substr($line, 0, 2) eq "--" || substr($line, 0, 1) eq "\\" ||
substr($line, 0, 5) eq "BEGIN");
# no need to create the index if it already exists
my $pos_index = index($line, 'CONSTRAINT ');
my $index_name = mbz_trim(substr($line, $pos_index + 11, index($line, ' ', $pos_index + 12) -
$pos_index - 11));
next if(backend_postgresql_index_exists($index_name));
print "$line\n";
mbz_do_sql($line, 'nodie');
}
close(SQL);
}
# backend_postgresql_update_schema()
# Attempt to update the scheme from the current version to a new version by creating a table with a
# dummy field, altering the tables by adding one field at a time them removing the dummy field. The
# idea is that given any schema and SQL file the new table fields will be added, the same fields
# will result in an error and the table will be left unchanged and fields and tables that have been
# removed from the new schema will not be removed from the current schema.
# This is a crude way of doing it. The field order in each table after it's altered will not be
# retained from the new schema however the field order should not have a big bearing on the usage
# of the database because name based and column ID in scripts that use the database will remain the
# same.
# It would be nice if this subroutine had a makeover so that it would check items before attempting
# to create (and replace) them. This is just so all the error messages and so nasty.
# @return Always 1.
sub backend_postgresql_update_schema_file {
open(SQL, $_[0]);
chomp(my @lines = <SQL>);
my $table = "";
foreach my $line (@lines) {
# skip blank lines and single bracket lines
next if($line eq "" || $line eq "(" || substr($line, 0, 1) eq "\\");
my $stmt = "";
if(substr($line, 0, 12) eq "CREATE TABLE") {
$table = mbz_remove_quotes(substr($line, 13, length($line)));
if(substr($table, length($table) - 1, 1) eq '(') {
$table = substr($table, 0, length($table) - 1);
}
$table = mbz_trim($table);
print $L{'table'} . " $table\n";
$stmt = "CREATE TABLE \"$table\" (dummycolumn int)";
$stmt .= " tablespace $g_tablespace" if($g_tablespace ne '');
} elsif(substr($line, 0, 1) eq " " || substr($line, 0, 1) eq "\t") {
my @parts = split(" ", $line);
for($i = 0; $i < @parts; ++$i) {
if(substr($parts[$i], 0, 2) eq "--") {
@parts = @parts[0 .. ($i - 1)];
last;
}
# because the original MusicBrainz database is PostgreSQL we only need to make
# minimal changes to the SQL.
if(uc(substr($parts[$i], 0, 4)) eq "CUBE" && !$g_contrib_cube) {
$parts[$i] = "TEXT";
}
}
if(substr(reverse($parts[@parts - 1]), 0, 1) eq ",") {
$parts[@parts - 1] = substr($parts[@parts - 1], 0, length($parts[@parts - 1]) - 1);
}
next if($parts[0] eq "CHECK" || $parts[0] eq "CONSTRAINT" || $parts[0] eq "");
$parts[0] = mbz_remove_quotes($parts[0]);
$stmt = "ALTER TABLE \"$table\" ADD \"$parts[0]\" " .
join(" ", @parts[1 .. @parts - 1]);
} elsif(substr($line, 0, 2) eq ");") {
$stmt = "ALTER TABLE \"$table\" DROP dummycolumn";
}
if($stmt ne "") {
# if this statement fails its hopefully because the field exists
$dbh->do($stmt) or print "";
}
}
close(SQL);
return 1;
}
sub backend_postgresql_update_schema {
backend_postgresql_update_schema_file("replication/CreateTables.sql");
backend_postgresql_update_schema_file("replication/ReplicationSetup.sql");
}
# backend_postgresql_table_exists($tablename)
# Check if a table already exists.
# @note This must support searching for VIEWs as well. mbz_table_exists() is used for testing if
# tables and views exist.
# @param $table_name The name of the table to look for.
# @return 1 if the table exists, otherwise 0.
sub backend_postgresql_table_exists {
# TODO: I don't know if this is checking for views.
my $sth = $dbh->prepare("select count(1) as count from information_schema.tables ".
"where table_name='$_[0]'");
$sth->execute();
my $result = $sth->fetchrow_hashref();
return $result->{'count'};
}
# mbz_table_column_exists($table_name, $col_name)
# Check if a table already has a column.
# @param $table_name The name of the table to look for.
# @param $col_name The column name in the table.
# @return 1 if the table column exists, otherwise 0.
sub backend_postgresql_table_column_exists {
my ($table_name, $col_name) = @_;
# TODO: incomplete
return 0;
}
# mbz_load_data()
# Load the data from the mbdump files into the tables.
# @return Always 1, but if something bad goes wrong like a file cannot be opened it will issue a
# die().
sub backend_postgresql_load_data {
my $temp_time = time();
opendir(DIR, "mbdump") || die "Can't open ./mbdump: $!";
@files = sort(grep { $_ ne '.' and $_ ne '..' } readdir(DIR));
$count = @files;
$i = 1;
foreach my $file (@files) {
my $t1 = time();
$table = $file;
next if($table eq "blank.file" || substr($table, 0, 1) eq '.');
print "\n" . localtime() . ": Loading data into '$file' ($i of $count)...\n";
# make sure the table exists
next if(!mbz_table_exists($table));
open(TABLEDUMP, "mbdump/$file") or warn("Error: cannot open file 'mbdump/$file'\n");
my $sth2 = $dbh->prepare("select count(1) from information_schema.columns ".
"where table_name='$table'");
$sth2->execute();
my $result2 = $sth2->fetchrow_hashref();
$dbh->do("COPY $table FROM STDIN");
while($readline = <TABLEDUMP>) {
chomp($readline);
# crop to make postgres happy
my @cols = split(' ', $readline);
$dbh->pg_putcopydata(join(' ', @cols[0 .. ($result2->{'count'} - 1)]) . "\n");
}
close(TABLEDUMP);
$dbh->pg_putcopyend();
my $t2 = time();
print "Done (" . mbz_format_time($t2 - $t1) . ")\n";
++$i;
}
closedir(DIR);
my $t2 = time();
print "\nComplete (" . mbz_format_time($t2 - $temp_time) . ")\n";
return 1;
}
# backend_postgresql_create_extra_tables()
# The mbzdb plugins use a basic key-value table to hold information such as settings.
# @see mbz_set_key(), mbz_get_key().
# @return Passthru from $dbh::do().
sub backend_postgresql_create_extra_tables {
# no need to if the table already exists
return 1 if(mbz_table_exists("kv"));
$sql = "CREATE TABLE kv (" .
"name varchar(255) not null primary key," .
"value text" .
")";
$sql .= " tablespace $g_tablespace" if($g_tablespace ne "");
return mbz_do_sql($sql);
}
# mbz_index_exists($index_name)
# Check if an index already exists.
# @param $index_name The name of the index to look for.
# @return 1 if the index exists, otherwise 0.
sub backend_postgresql_index_exists {
my $sth = $dbh->prepare("select count(*) from (".
"select constraint_name from information_schema.key_column_usage ".
"where constraint_name='$_[0]' union all select indexname from pg_indexes ".
"where indexname='$_[0]') as t");
$sth->execute();
my $result = $sth->fetchrow_hashref();
return $result->{'count'};
}
# mbz_load_pending($id)
# Load Pending and PendingData from the downaloded replication into the respective tables. This
# function is different to mbz_load_data that loads the raw mbdump/ whole tables.
# @param $id The current replication number. See mbz_get_current_replication().
# @return Always 1.
sub backend_postgresql_load_pending {
$id = $_[0];
my $pending = mbz_escape_entity($g_pending);
my $pendingdata = mbz_escape_entity($g_pendingdata);
# make sure there are no pending transactions before cleanup
$temp = $dbh->prepare("SELECT count(1) FROM $pending");
$temp->execute;
@row = $temp->fetchrow_array();
$temp->finish;
return -1 if($row[0] ne '0');
# perform cleanup (makes sure there no left over records in the PendingData table)
$dbh->do("DELETE FROM $pending");
# load Pending and PendingData
print localtime() . ": Loading pending tables... ";
open(TABLEDUMP, "replication/$id/mbdump/$g_pendingfile")
or warn("Error: cannot open file 'replication/$id/mbdump/$g_pendingfile'\n");
$dbh->do("COPY $pending FROM STDIN");
while($readline = <TABLEDUMP>) {
$dbh->pg_putcopydata($readline);
}
close(TABLEDUMP);
$dbh->pg_putcopyend();
open(TABLEDUMP, "replication/$id/mbdump/$g_pendingdatafile")
or warn("Error: cannot open file 'replication/$id/mbdump/$g_pendingdatafile'\n");
$dbh->do("COPY $pendingdata FROM STDIN");
while($readline = <TABLEDUMP>) {
$dbh->pg_putcopydata($readline);
}
close(TABLEDUMP);
$dbh->pg_putcopyend();
print "Done\n";
# PLUGIN_beforereplication()
foreach my $plugin (@g_active_plugins) {
my $function_name = "${plugin}_beforereplication";
(\&$function_name)->($id) || die($!);
}
return 1;
}
# mbz_escape_entity($entity)
# Wnen dealing with table and column names that contain upper and lowercase letters some databases
# require the table name to be encapsulated. PostgreSQL uses double-quotes.
# @return A new encapsulated entity.
sub backend_postgresql_escape_entity {
my $entity = $_[0];
return "\"$entity\"";
}
sub backend_postgresql_create_plpgsql {
my $sth = $dbh->prepare("SELECT count(*) FROM pg_catalog.pg_language WHERE lanname='plpgsql'");
$sth->execute();
my @row = $sth->fetchrow_array();
mbz_do_sql("CREATE LANGUAGE plpgsql") if($row[0] == 0)
}
# be nice
return 1;