Skip to content

Commit

Permalink
Merge pull request #171 from pagreene/add-ref-data
Browse files Browse the repository at this point in the history
Refactor Content Updates
  • Loading branch information
pagreene committed May 25, 2021
2 parents dccb4f0 + dd2a286 commit 053df45
Show file tree
Hide file tree
Showing 7 changed files with 948 additions and 633 deletions.
35 changes: 24 additions & 11 deletions indra_db/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,12 @@ def pipeline_stats(task):
@click.argument("sources", nargs=-1,
type=click.Choice(["pubmed", "pmc_oa", "manuscripts"]),
required=False)
@click.option("-n", "--num_procs", type=int, default=1,
help=('Select the number of processors to use during this '
'operation. Default is 1.'))
@click.option('-c', '--continuing', is_flag=True,
help=('Continue uploading or updating, picking up where you left '
'off.'))
@click.option('-d', '--debug', is_flag=True,
help='Run with debugging level output.')
def content_run(task, sources, num_procs, continuing, debug):
def content_run(task, sources, continuing, debug):
"""Upload/update text refs and content on the database.
\b
Expand Down Expand Up @@ -144,26 +141,42 @@ def content_run(task, sources, num_procs, continuing, debug):
for ContentManager in selected_managers:
if task == 'upload':
print(f"Uploading {ContentManager.my_source}.")
ContentManager().populate(db, num_procs, continuing)
ContentManager().populate(db, continuing)
elif task == 'update':
print(f"Updating {ContentManager.my_source}")
ContentManager().update(db, num_procs)
ContentManager().update(db)


@main.command()
def content_list():
@click.option('-l', '--long', is_flag=True,
help="Include a list of the most recently added content for all "
"source types.")
def content_list(long):
"""List the current knowledge sources and their status."""
# Import what is needed.
import tabulate
from sqlalchemy import func
from .content import Pubmed, PmcOA, Manuscripts

content_managers = [Pubmed, PmcOA, Manuscripts]
db = get_db('primary')

# Generate the rows.
rows = [(cm.my_source, format_date(cm.get_latest_update(db)))
for cm in content_managers]
print(tabulate.tabulate(rows, ('Source', 'Last Updated'),
tablefmt='simple'))
source_updates = {cm.my_source: format_date(cm.get_latest_update(db))
for cm in content_managers}
if long:
print("This may take a while...", end='', flush=True)
q = (db.session.query(db.TextContent.source,
func.max(db.TextContent.insert_date))
.group_by(db.TextContent.source))
rows = [(src, source_updates.get(src, '-'), format_date(last_insert))
for src, last_insert in q.all()]
headers = ('Source', 'Last Updated', 'Latest Insert')
print("\r", end='')
else:
rows = [(k, v) for k, v in source_updates.items()]
headers = ('Source', 'Last Updated')
print(tabulate.tabulate(rows, headers, tablefmt='simple'))


@main.command()
Expand Down

0 comments on commit 053df45

Please sign in to comment.