New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Dump and restore Stochastic Event Set #1300
Changes from 12 commits
d0ecad0
735913d
9351be0
050dac1
fcd0432
00409d2
773172e
ec2b185
dd82f1a
ec46f67
d2f8a98
11342c1
9317bf7
2407520
1beea6f
47f7a04
ebb7ac9
0ed3cd7
2513cd1
4b8cc9b
cb015d3
db386a0
2367c51
b853685
0ffd443
a725726
a9c3364
8925e60
3c207bb
9eda7e3
36d6d68
95f54de
69f228d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
GRANT ALL ON SCHEMA admin TO oq_admin; | ||
GRANT ALL ON SCHEMA htemp TO oq_admin; | ||
GRANT ALL ON SCHEMA hzrdi TO oq_admin; | ||
GRANT ALL ON SCHEMA hzrdr TO oq_admin; | ||
GRANT ALL ON SCHEMA riski TO oq_admin; | ||
GRANT ALL ON SCHEMA riskr TO oq_admin; | ||
GRANT ALL ON SCHEMA uiapi TO oq_admin; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,7 +43,6 @@ | |
import os | ||
import shutil | ||
import tarfile | ||
import gzip | ||
import argparse | ||
import psycopg2 | ||
import tempfile | ||
|
@@ -60,7 +59,7 @@ def _tuplestr(tup): | |
class Copier(object): | ||
""" | ||
Small wrapper around a psycopg2 cursor, which a .copy method | ||
writing directly to .gz files. It remembers the copied filenames, | ||
writing directly to csv files. It remembers the copied filenames, | ||
which are stored in the attribute .filenames. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason why we are not generating .gz files? This makes a lot of difference in terms of disk space occupation. Notice that this tool is intended to be used even for large outputs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This work is experimental. So, I did not want to add too many feature. I prefer at this moment to let the user zip or not zip the result. however, I see where you are coming from. Therefore, I have added a ticket https://bugs.launchpad.net/oq-engine/+bug/1248584 |
||
""" | ||
def __init__(self, psycopg2_cursor): | ||
|
@@ -79,22 +78,20 @@ def fetchall(self, query, *args): | |
|
||
def copy(self, query, dest, name, mode): | ||
""" | ||
Performs a COPY TO/FROM operation. <Works directly with gzipped files. | ||
Performs a COPY TO/FROM operation. <Works directly with csv files. | ||
|
||
:param str query: the COPY query | ||
:param str dest: the destination directory | ||
:param str name: the destination file name (no .gz) | ||
:param str name: the destination file name | ||
:param chr mode: 'w' (for COPY TO) or 'r' (for COPY FROM) | ||
""" | ||
fname = os.path.join(dest, name + '.gz') | ||
fname = os.path.join(dest, name) | ||
log.info('%s\n(-> %s)', query, fname) | ||
TIMESTAMP = 1378800715.0 # some fake timestamp | ||
# here is some trick to avoid storing filename and timestamp info | ||
with open(fname, mode) as fileobj: | ||
with gzip.GzipFile('', fileobj=fileobj, mtime=TIMESTAMP) as z: | ||
self._cursor.copy_expert(query, z) | ||
if fname not in self.filenames: | ||
self.filenames.append(fname) | ||
self._cursor.copy_expert(query, fileobj) | ||
if fname not in self.filenames: | ||
self.filenames.append(fname) | ||
|
||
|
||
class HazardDumper(object): | ||
|
@@ -107,51 +104,39 @@ class HazardDumper(object): | |
print hd.mktar() # generate a tarfile | ||
""" | ||
|
||
def __init__(self, conn, outdir=None, format='text'): | ||
def __init__(self, conn, outdir=None): | ||
self.conn = conn | ||
self.curs = Copier(conn.cursor()) | ||
self.format = format | ||
# there is no binary format for geography in postgis 1.5, | ||
# this is why we are requiring text format | ||
assert format == 'text', format | ||
if outdir: | ||
if os.path.exists(outdir): | ||
# cleanup previously dumped archives, if any | ||
for fname in os.listdir(outdir): | ||
if fname.endswith('.gz'): | ||
os.remove(os.path.join(outdir, fname)) | ||
else: | ||
os.mkdir(outdir) | ||
outdir = outdir or "/tmp/hc-dump" | ||
if os.path.exists(outdir): | ||
# cleanup previously dumped archives, if any | ||
for fname in os.listdir(outdir): | ||
if fname.endswith('.csv'): | ||
os.remove(os.path.join(outdir, fname)) | ||
else: | ||
outdir = tempfile.mkdtemp(prefix='hazard_calculation-') | ||
os.mkdir(outdir) | ||
self.outdir = outdir | ||
|
||
def hazard_calculation(self, ids): | ||
"""Dump hazard_calculation, lt_realization, hazard_site""" | ||
self.curs.copy( | ||
"""copy (select * from uiapi.hazard_calculation where id in %s) | ||
to stdout with (format '%s')""" % (ids, self.format), | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % ids, | ||
self.outdir, 'uiapi.hazard_calculation.csv', 'w') | ||
self.curs.copy( | ||
"""copy (select * from hzrdr.lt_realization | ||
where hazard_calculation_id in %s) | ||
to stdout with (format '%s')""" % (ids, self.format), | ||
where hazard_calculation_id in %s) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % ids, | ||
self.outdir, 'hzrdr.lt_realization.csv', 'w') | ||
self.curs.copy( | ||
"""copy (select * from hzrdi.hazard_site | ||
where hazard_calculation_id in %s) | ||
to stdout with (format '%s')""" % (ids, self.format), | ||
where hazard_calculation_id in %s) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % ids, | ||
self.outdir, 'hzrdi.hazard_site.csv', 'w') | ||
|
||
def performance(self, *job_ids): | ||
"""Dump performance""" | ||
ids = _tuplestr(job_ids) | ||
self.oq_job(ids) | ||
self.curs.copy( | ||
"""copy (select * from uiapi.performance where oq_job_id in %s) | ||
to stdout with (format '%s')""" % (ids, self.format), | ||
self.outdir, 'uiapi.performance.csv', 'w') | ||
|
||
def oq_job(self, ids): | ||
"""Dump hazard_calculation, oq_job""" | ||
hc_ids = self.curs.tuplestr( | ||
|
@@ -163,71 +148,81 @@ def oq_job(self, ids): | |
self.hazard_calculation(hc_ids) | ||
self.curs.copy( | ||
"""copy (select * from uiapi.oq_job where id in %s) | ||
to stdout with (format '%s')""" % (ids, self.format), | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % ids, | ||
self.outdir, 'uiapi.oq_job.csv', 'w') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a lot of duplication, probably a template for the "copy" SQL query is a good idea. |
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While dumping the performance table is not related to the load/save functionality, it is very common that we want to save the performance info about one computation before deleting it. This is the reason why the functionality was there. |
||
def output(self, ids): | ||
"""Dump output""" | ||
self.curs.copy( | ||
"""copy (select * from uiapi.output where id in %s) | ||
to stdout with (format '%s')""" % (ids, self.format), | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % ids, | ||
self.outdir, 'uiapi.output.csv', 'w') | ||
|
||
def hazard_curve(self, output): | ||
"""Dump hazard_curve, hazard_curve_data""" | ||
self.curs.copy( | ||
"""copy (select * from hzrdr.hazard_curve where output_id in %s) | ||
to stdout with (format '%s')""" % (output, self.format), | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % output, | ||
self.outdir, 'hzrdr.hazard_curve.csv', 'a') | ||
|
||
ids = self.curs.tuplestr( | ||
'select id from hzrdr.hazard_curve where output_id in %s' % output) | ||
|
||
self.curs.copy( | ||
"""copy (select * from hzrdr.hazard_curve_data | ||
where hazard_curve_id in {}) | ||
to stdout with (format '{}')""".format(ids, self.format), | ||
where hazard_curve_id in {}) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""".format( | ||
ids), | ||
self.outdir, 'hzrdr.hazard_curve_data.csv', 'a') | ||
|
||
def gmf(self, output): | ||
"""Dump gmf, gmf_data""" | ||
self.curs.copy( | ||
"""copy (select * from hzrdr.gmf | ||
where output_id in %s) | ||
to stdout with (format '%s')""" % (output, self.format), | ||
where output_id in %s) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % output, | ||
self.outdir, 'hzrdr.gmf.csv', 'a') | ||
|
||
coll_ids = self.curs.tuplestr('select id from hzrdr.gmf ' | ||
'where output_id in %s' % output) | ||
self.curs.copy( | ||
"""copy (select * from hzrdr.gmf_data | ||
where gmf_id in %s) | ||
to stdout with (format '%s')""" % (coll_ids, self.format), | ||
where gmf_id in %s) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % coll_ids, | ||
self.outdir, 'hzrdr.gmf_data.csv', 'a') | ||
|
||
def ses(self, output): | ||
"""Dump ses_collection, ses, ses_rupture""" | ||
self.curs.copy( | ||
"""copy (select * from hzrdr.ses_collection | ||
where output_id in %s) | ||
to stdout with (format '%s')""" % (output, self.format), | ||
where output_id in %s) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % output, | ||
self.outdir, 'hzrdr.ses_collection.csv', 'a') | ||
|
||
coll_ids = self.curs.tuplestr('select id from hzrdr.ses_collection ' | ||
'where output_id in %s' % output) | ||
self.curs.copy( | ||
"""copy (select * from hzrdr.ses | ||
where ses_collection_id in %s) | ||
to stdout with (format '%s')""" % (coll_ids, self.format), | ||
where ses_collection_id in %s) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % coll_ids, | ||
self.outdir, 'hzrdr.ses.csv', 'a') | ||
|
||
ses_ids = self.curs.tuplestr( | ||
'select id from hzrdr.ses where ses_collection_id in %s' | ||
% coll_ids) | ||
self.curs.copy( | ||
"""copy (select * from hzrdr.ses_rupture | ||
where ses_id in %s) | ||
to stdout with (format '%s')""" % (ses_ids, self.format), | ||
where ses_id in %s) | ||
to stdout | ||
with (format 'csv', header true, encoding 'utf8')""" % ses_ids, | ||
self.outdir, 'hzrdr.ses_rupture.csv', 'a') | ||
|
||
def dump(self, *hazard_calculation_ids): | ||
|
@@ -268,7 +263,7 @@ def dump(self, *hazard_calculation_ids): | |
self.output(_tuplestr(all_outs)) | ||
for output_type, output_ids in outputs: | ||
ids = _tuplestr(output_ids) | ||
print "Dumping %s %s" % (output_type, ids) | ||
print "Dumping %s %s in %s" % (output_type, ids, self.outdir) | ||
if output_type in ['hazard_curve', 'hazard_curve_multi']: | ||
self.hazard_curve(ids) | ||
elif output_type in ('gmf', 'gmf_scenario'): | ||
|
@@ -298,30 +293,41 @@ def mktar(self): | |
|
||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the functionality is intended to be called from bin/openquake only we should remove the main. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And perhaps also adding a comment/docstring saying that the dump/restore library is used in bin/openquake. |
||
def main(hazard_calculation_id, outdir=None, | ||
host='localhost', dbname='openquake', | ||
user='admin', password='', port=None): | ||
host=None, dbname=None, user=None, password=None, port=None): | ||
""" | ||
Dump a hazard_calculation and its relative outputs | ||
""" | ||
from openquake.engine.db.models import set_django_settings_module | ||
set_django_settings_module() | ||
from django.conf import settings | ||
default_cfg = settings.DATABASES['default'] | ||
host = host or default_cfg.get('HOST', 'localhost') | ||
dbname = dbname or default_cfg.get('NAME', 'openquake') | ||
user = default_cfg.get('USER', 'oq_admin') | ||
password = default_cfg.get('PASSWORD', 'openquake') | ||
port = port or str(default_cfg.get('PORT', 5432)) | ||
# this is not using the predefined Django connections since | ||
# the typical use case is to dump from a remote database | ||
logging.basicConfig(level=logging.INFO) | ||
logging.basicConfig(level=logging.WARN) | ||
conn = psycopg2.connect( | ||
host=host, database=dbname, user=user, password=password, port=port) | ||
hc = HazardDumper(conn, outdir) | ||
hc.dump(hazard_calculation_id) | ||
log.info('Written %s' % hc.outdir) | ||
conn.close() | ||
return hc.outdir | ||
|
||
|
||
if __name__ == '__main__': | ||
p = argparse.ArgumentParser() | ||
|
||
p.add_argument('hazard_calculation_id') | ||
p.add_argument('outdir') | ||
p.add_argument('host', nargs='?', default='localhost') | ||
p.add_argument('dbname', nargs='?', default='openquake') | ||
p.add_argument('user', nargs='?', default='oq_admin') | ||
p.add_argument('password', nargs='?', default='openquake') | ||
p.add_argument('port', nargs='?', default='5432') | ||
p.add_argument('outdir', nargs='?') | ||
p.add_argument('host', nargs='?') | ||
p.add_argument('dbname', nargs='?') | ||
p.add_argument('user', nargs='?') | ||
p.add_argument('password', nargs='?') | ||
p.add_argument('port', nargs='?') | ||
arg = p.parse_args() | ||
main(arg.hazard_calculation_id, arg.outdir, arg.host, | ||
arg.dbname, arg.user, arg.password, arg.port) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After yesterday discussion, I think we should change the names to --save-hazard-calculation and --load-hazard-calculation. Paul and Matteo like those names better. Also restore_hazards.py and dump_hazards.py could be renamed.