Skip to content

Commit

Permalink
Merge pull request #2009 from antgonza/fix-doi
Browse files Browse the repository at this point in the history
WIP: Fix DOI issues
  • Loading branch information
josenavas committed Dec 16, 2016
2 parents c5c38a6 + 4151658 commit c0b5a6f
Show file tree
Hide file tree
Showing 18 changed files with 1,488 additions and 1,124 deletions.
66 changes: 19 additions & 47 deletions qiita_db/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ def get_info(cls, study_ids=None, info_cols=None):
qdb.util.get_table_cols('study'),
qdb.util.get_table_cols('study_status'),
qdb.util.get_table_cols('timeseries_type'),
qdb.util.get_table_cols('study_publication')))
# placeholder for table study_publication
['publications']))

if info_cols is None:
info_cols = _info_cols
Expand All @@ -254,19 +255,19 @@ def get_info(cls, study_ids=None, info_cols=None):

with qdb.sql_connection.TRN:
sql = """SELECT {0}
FROM (
qiita.study
JOIN qiita.timeseries_type USING (timeseries_type_id)
LEFT JOIN (
SELECT study_id, array_agg(
publication_doi ORDER BY publication_doi)
AS publication_doi
FROM qiita.study
LEFT JOIN (
SELECT study_id,
array_agg(row_to_json((publication, is_doi), true))
AS publications
FROM qiita.study_publication
GROUP BY study_id) sp USING (study_id)
JOIN qiita.study_portal USING (study_id)
JOIN qiita.portal_type USING (portal_type_id))
GROUP BY study_id)
AS full_publications
USING (study_id)
JOIN qiita.timeseries_type USING (timeseries_type_id)
JOIN qiita.study_portal USING (study_id)
JOIN qiita.portal_type USING (portal_type_id)
WHERE portal = %s""".format(search_cols)

args = [qiita_config.portal]
if study_ids is not None:
sql = "{0} AND study_id IN %s".format(sql)
Expand Down Expand Up @@ -645,11 +646,9 @@ def publications(self):
list of all the DOI and pubmed ids
"""
with qdb.sql_connection.TRN:
sql = """SELECT doi, pubmed_id
FROM qiita.publication p
JOIN qiita.study_publication sp
ON sp.publication_doi = p.doi
WHERE sp.study_id = %s"""
sql = """SELECT publication, is_doi
FROM qiita.study_publication
WHERE study_id = %s"""
qdb.sql_connection.TRN.add(sql, [self._id])
return qdb.sql_connection.TRN.execute_fetchindex()

Expand Down Expand Up @@ -677,37 +676,10 @@ def publications(self, values):
qdb.sql_connection.TRN.add(sql, [self._id])

# Set the new ones
sql = """INSERT INTO qiita.publication (doi, pubmed_id)
SELECT %s, %s
WHERE NOT EXISTS(
SELECT doi FROM qiita.publication WHERE doi = %s)"""
sql_args = [(doi, pmid, doi) for doi, pmid in values]
qdb.sql_connection.TRN.add(sql, sql_args, many=True)

sql = """INSERT INTO qiita.study_publication
(study_id, publication_doi)
VALUES (%s, %s)"""
sql_args = [[self._id, doi] for doi, _ in values]
qdb.sql_connection.TRN.add(sql, sql_args, many=True)
qdb.sql_connection.TRN.execute()

def add_publications(self, publications):
"""Add publications to study
Parameters
----------
publications : list of (str, str)
A list with the (DOI, pubmed id) to associate with the study
"""
with qdb.sql_connection.TRN:
sql = """INSERT INTO qiita.publication (doi, pubmed_id)
VALUES (%s, %s)"""
qdb.sql_connection.TRN.add(sql, publications, many=True)

sql = """INSERT INTO qiita.study_publication
(study_id, publication_doi)
VALUES (%s, %s)"""
sql_args = [[self.id, doi] for doi, _ in publications]
(study_id, publication, is_doi)
VALUES (%s, %s, %s)"""
sql_args = [[self._id, pub, is_doi] for pub, is_doi in values]
qdb.sql_connection.TRN.add(sql, sql_args, many=True)
qdb.sql_connection.TRN.execute()

Expand Down
19 changes: 19 additions & 0 deletions qiita_db/support_files/patches/46.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- Dec 3, 2016
-- Modify qiita.study_publication so studies can have string
-- dois and pubmed ids


-- dropping PRIMARY KEY ( study_id, publication_doi )
ALTER TABLE qiita.study_publication DROP CONSTRAINT idx_study_publication_0;

-- dropping FOREIGN KEY ( study_id ) REFERENCES qiita.study( study_id )
ALTER TABLE qiita.study_publication DROP CONSTRAINT fk_study_publication_study;

-- dropping FOREIGN KEY ( publication_doi ) REFERENCES qiita.publication( doi )
ALTER TABLE qiita.study_publication DROP CONSTRAINT fk_study_publication;

-- renaming publication_doi to publication
ALTER TABLE qiita.study_publication RENAME publication_doi TO publication;

-- adding a new column so we know if the publication is doi or pubmedid
ALTER TABLE qiita.study_publication ADD COLUMN is_doi boolean;
32 changes: 32 additions & 0 deletions qiita_db/support_files/patches/python_patches/46.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import qiita_db as qdb


# selecting all doi/pubmedids
with qdb.sql_connection.TRN:
sql = """SELECT p.doi, pubmed_id, study_id
FROM qiita.study_publication AS sp
LEFT JOIN qiita.publication AS p ON (sp.publication = p.doi)
WHERE p.doi NOT IN (
SELECT publication_doi FROM qiita.software_publication)"""
qdb.sql_connection.TRN.add(sql)

pubs = qdb.sql_connection.TRN.execute_fetchindex()

# deleting all references to start from scratch
sql = """DELETE FROM qiita.study_publication"""
qdb.sql_connection.TRN.add(sql)
qdb.sql_connection.TRN.execute()

# reinserting following the new structure
for doi, pid, sid in pubs:
to_insert = []
if doi is not None:
to_insert.append([doi, True, sid])
if pid not in to_insert:
to_insert.append([pid, False, sid])

sql = """INSERT INTO qiita.study_publication
(publication, is_doi, study_id)
VALUES (%s, %s, %s)"""
qdb.sql_connection.TRN.add(sql, to_insert, many=True)
qdb.sql_connection.TRN.execute()
18 changes: 3 additions & 15 deletions qiita_db/support_files/qiita-db.dbs
Original file line number Diff line number Diff line change
Expand Up @@ -1726,23 +1726,11 @@ Controlled Vocabulary]]></comment>
</table>
<table name="study_publication" >
<column name="study_id" type="bigint" jt="-5" mandatory="y" />
<column name="publication_doi" type="varchar" jt="12" mandatory="y" />
<index name="idx_study_publication" unique="NORMAL" >
<column name="study_id" />
</index>
<index name="idx_study_publication" unique="NORMAL" >
<column name="publication_doi" />
</index>
<index name="idx_study_publication_0" unique="PRIMARY_KEY" >
<column name="study_id" />
<column name="publication_doi" />
</index>
<column name="publication" type="varchar" jt="12" mandatory="y" />
<column name="is_doi" type="bool" jt="-7" />
<fk name="fk_study_publication_study" to_schema="qiita" to_table="study" >
<fk_column name="study_id" pk="study_id" />
</fk>
<fk name="fk_study_publication" to_schema="qiita" to_table="publication" >
<fk_column name="publication_doi" pk="doi" />
</fk>
</table>
<table name="study_sample" >
<comment>Required info for each sample. One row is one sample.</comment>
Expand Down Expand Up @@ -1909,7 +1897,6 @@ Controlled Vocabulary]]></comment>
<entity schema="qiita" name="study_sample" color="d0def5" x="1455" y="150" />
<entity schema="qiita" name="publication" color="b2cdf7" x="2175" y="795" />
<entity schema="qiita" name="timeseries_type" color="c0d4f3" x="2235" y="555" />
<entity schema="qiita" name="study_publication" color="b2cdf7" x="1980" y="705" />
<entity schema="qiita" name="oauth_identifiers" color="b7c8e3" x="2490" y="720" />
<entity schema="qiita" name="oauth_software" color="b2cdf7" x="2340" y="720" />
<entity schema="qiita" name="analysis" color="d0def5" x="225" y="885" />
Expand Down Expand Up @@ -1944,6 +1931,7 @@ Controlled Vocabulary]]></comment>
<entity schema="qiita" name="processing_job" color="b2cdf7" x="1980" y="1335" />
<entity schema="qiita" name="default_workflow_edge_connections" color="b2cdf7" x="2340" y="1455" />
<entity schema="qiita" name="processing_job_validator" color="b2cdf7" x="2190" y="1380" />
<entity schema="qiita" name="study_publication" color="b2cdf7" x="1980" y="705" />
<group name="Group_analyses" color="c4e0f9" >
<comment>analysis tables</comment>
<entity schema="qiita" name="analysis" />
Expand Down
Loading

0 comments on commit c0b5a6f

Please sign in to comment.