From 9231a6f47b9b1c92b898844b6af9c6390a5d2c27 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:00:43 +0100 Subject: [PATCH 01/39] NASA ADS query : initial commit --- astroquery/nasa_ads/__init__.py | 69 +++ astroquery/nasa_ads/core.py | 645 ++++++++++++++++++++++ astroquery/nasa_ads/tests/__init__.py | 0 astroquery/nasa_ads/tests/test_nasaads.py | 0 docs/nasa_ads/nasa_ads.rst | 47 ++ 5 files changed, 761 insertions(+) create mode 100644 astroquery/nasa_ads/__init__.py create mode 100644 astroquery/nasa_ads/core.py create mode 100644 astroquery/nasa_ads/tests/__init__.py create mode 100644 astroquery/nasa_ads/tests/test_nasaads.py create mode 100644 docs/nasa_ads/nasa_ads.rst diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py new file mode 100644 index 0000000000..cb5cd010c1 --- /dev/null +++ b/astroquery/nasa_ads/__init__.py @@ -0,0 +1,69 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +SAO/NASA ADS Query Tool +----------------------------------- + +:Author: Magnus Vilhelm Persson (magnusp@vilhelm.nu) + +""" +from astropy import config as _config + + +class Conf(_config.ConfigNamespace): + """ + Configuration parameters for `astroquery.nasa_ads`. + """ + mirror_urls = _config.ConfigItem( + ['http://adswww.harvard.edu/', + 'http://cdsads.u-strasbg.fr/', + 'http://ukads.nottingham.ac.uk/', + 'http://esoads.eso.org/', + 'http://ads.ari.uni-heidelberg.de/' + 'http://ads.inasan.ru/', + 'http://ads.mao.kiev.ua/', + 'http://ads.astro.puc.cl/', + 'http://ads.nao.ac.jp/', + 'http://ads.bao.ac.cn/', + 'http://ads.iucaa.ernet.in/', + 'http://ads.arsip.lipi.go.id/', + 'http://saaoads.chpc.ac.za/', + 'http://ads.on.br/'], + 'SAO/NASA ADS mirrors around the world' + ) + + advanced_url = _config.ConfigItem( + 'abstract_service.html', + 'Path for advanced query' + ) + + simple_url = _config.ConfigItem( + 'abstract_service.html', + 'Path for advanced query' + ) + + timeout = _config.ConfigItem( + 60, + 'Time limit for connecting to ADS server.' + ) + lines_limit = _config.ConfigItem( + 1000, + 'Limit to number of hits exported.' + ) + +conf = Conf() + +from .core import ADS, ADSClass + +__all__ = ['ADS', 'ADSClass', + 'Conf', 'conf', + ] + + + + + + + +""" +advanced_q = 'abstract_service.html' +""" diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py new file mode 100644 index 0000000000..e68d7001d0 --- /dev/null +++ b/astroquery/nasa_ads/core.py @@ -0,0 +1,645 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +# +# adslib.py +# +# Module to search the ads +# +# Copyright 2012 Magnus Persson +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# version 0.0.1a + +""" +Script to search the NASA ADS directory + +Need : o scipy + o mechanize module (standard in Python >2.6/2.7?) + o urllib2 module (standard Python module, required(?) by mechanize) + o beautiful soup/xml (xml standard in Python >2.6/2.7?) + +""" + +""" +ADSlib - Python Module to interact with NASA ADS +at + +http://adswww.harvard.edu/ + +OR one of the mirrors + +http://cdsads.u-strasbg.fr/ +http://ukads.nottingham.ac.uk/ +http://esoads.eso.org/ +http://ads.ari.uni-heidelberg.de/ +http://ads.inasan.ru/ +http://ads.mao.kiev.ua/ +http://ads.astro.puc.cl/ +http://ads.nao.ac.jp/ +http://ads.bao.ac.cn/ +http://ads.iucaa.ernet.in/ +http://ads.arsip.lipi.go.id/ +http://saaoads.chpc.ac.za/ +http://ads.on.br/ + + + + +""" + +""" +----[ Change log ]---- + +* 2012 Dec 15 + Code cleanup. + +* 2012 Oct 29 + Now only uses mechanize module(!) Yay. + +* 2012 Oct 02 + File created. + + + +""" + +""" +NOTES: +# advanced search query +abstract_service.html + +# quick search +index.html + +""" + + +mirrors = [ + 'http://adswww.harvard.edu/', + 'http://cdsads.u-strasbg.fr/', + 'http://ukads.nottingham.ac.uk/', + 'http://esoads.eso.org/', + 'http://ads.ari.uni-heidelberg.de/' + 'http://ads.inasan.ru/', + 'http://ads.nao.ac.jp/', + 'http://ads.iucaa.ernet.in/', + 'http://ads.arsip.lipi.go.id/', + 'http://saaoads.chpc.ac.za/', + 'http://ads.on.br/' + ] + +advanced_q = 'abstract_service.html' + +def search(query, **kwargs): + """ + query : Normal string to ADS + or dictionary for advanced search + + """ + + + ### test code to get it up and running + + # main access + # TODO : either access via Z39.50 or via URLlib/mecahnise etc + + # wishlist + # TODO : simple search + # TODO : advanced search + # TODO : browse + + + import locale + # this reads the environment and inits the right locale + locale.setlocale(locale.LC_ALL, "") + + + try: + # the mechanize module exports urllib2 as well... + import mechanize + import urllib + except (ImportError): + print 'You need the \"mechanize\" and urllib module' + ' for this script to work.' + + try: + from BeautifulSoup import BeautifulSoup as bfs + except (ImportError): + print 'You need the BeautifulSoup module...' + + + import scipy + import sys + + #from string import lower, upper + # search URL + # http://adsabs.harvard.edu/cgi-bin/nph-basic_connect?qsearch=The+Search+String + + # to parse the search string from "The Search String" to "The+Search+String" + # urllib.quote(url, safe=":/") + + ############################################ + ######## GET THE FORM + + #~ Ping to know which server to use. + working_mirror = 0 + + got_reply = 0 + while not got_reply: + try: + # try to get the form + response = mechanize.urlopen(mirrors[working_mirror] + types_q[search_type]) + except mechanize.URLError: + # if we can't get it, try another mirror + if not i < len(mirrors): + break + else: + working_mirror += 1 + pass + else: + got_reply = True + + if not got_reply and working_mirror >= len(mirrors): + # TODO log output + sys.stderr.write('ERROR : You have to be connected to the internet to access the NASA ADS database and it has to be online (on all mirrors).') + else: + # TODO log output + print ('got reply from : {0}'.format(mirrors[working_mirror])) + + + + + #~ Then check if going for the advanced interface. + #~ advanced = int((type(query) == type({})) + if 'advanced' in kwargs: + # ADVANCED QUERY + # + # Should I use http://adsabs.harvard.edu/abstract_service.html + # or the full ADS Labs? + response = mechanize.urlopen(mirrors[working_mirror] + advanced_q) + forms = mechanize.ParseResponse(response, backwards_compat=False) + response.close() + form = forms[0] + #~ if arg.has_key('dbg_snd_form'): # for test purposes + #~ return form + #~ form['qsearch'] = '^Persson 2012' + + ######## SUBMIT FORM + #~ clicked_form = form.click() + + #~ result = mechanize.urlopen(clicked_form) + + pass + + elif not 'advanced' in kwargs: + # SIMPLE QUERY + baseurl = (mirrors[working_mirror] + + 'cgi-bin/nph-basic_connect?qsearch=') + + result = mechanize.urlopen( urllib.quote(baseurl + query, safe = ":/=?^") ) + # test below + data = urllib.urlencode({'qsearch' : '^Persson'}) + baseurl = (mirrors[working_mirror] + + 'cgi-bin/nph-basic_connect?') + f = urllib.urlopen(baseurl, data) + ############################################ + ######## PARSE RESULTS + + page = result.readlines() + result.close() + + # start parsing the results + t = bfs(' '.join(page)) + tables = t.findAll('table') + + r = tables[1].findAll('td')[0] + y = r.findAll('strong')[0].contents[0] + nres = int(y) + if nres<1: + return 0 + + # get table with results + resulttable = tables[2] + # get the rows of the table + rows = resulttable.findAll('tr') + # get each result entry per list item + entries = [rows[i:i+3][1:] for i in scipy.arange(2,57,3)][:-1] + + ############################################ + ######## GET RESULTLIST + + ###### the problem with this is that web is in UNICODE, + # ie. Jørgensen, æ and åäö and ßü etc are represented by funny numbers and '\' + + #resultlist = [_Result(i) for i in entries] + return _Resultlist(entries) + + +############################################ +######## DEFINE RESULT(S) OBJECT + + +class _Resultlist: + """ + Internal object to represent the result list + """ + def __init__(self, entries): + self.resultlist = [_Result(i) for i in entries] + def sort(self,sortkey = 'author', reverse_bool = False): + from operator import itemgetter, attrgetter + #~ sorted(resultlist, key=attrgetter('author'), reverse=True) + return sorted(self.resultlist, key=attrgetter(sortkey), reverse = reverse_bool) + def __str__(self): + printlist = [] + for i in self.resultlist[:-1]: + printlist.append('Author : {0.author}\n' + 'Title : {0.title}\n' + 'Score : {0.ads_score}\n'.format(i)) + return '\n'.join(printlist) + +class _Result: + """ + Internal object to represent each result + """ + def __init__(self, entry): + #~ def __init__(self, author, + #~ authors, + #~ title, + #~ score, + #~ bibcode, + #~ pubdate, + #~ links): + #~ self.author = author + #~ self.authorlist = authors + #~ self.title = title + #~ self.score = score + #~ self.bibcode = bibcode + #~ self.pubdate = pubdate # parse? + #~ self.links = links # dictionary of all the links + # + td_tags0 = entry[0].findAll('td') + self.bibcode = td_tags0[1].findAll('input')[0]['value'].encode('UTF-8') + self.url_abstract_page = td_tags0[1].findAll('a')[0]['href'].encode('UTF-8') + self.ads_score = float(td_tags0[3].contents[0].encode('UTF-8')) + self.rank = 100 - self.ads_score + self.pubdate = td_tags0[4].contents[0].string.encode('UTF-8') + self.pubday = self.pubdate[:2] + self.pubyear = self.pubdate[3:] + # + self.links = dict() + for link in td_tags0[5].findAll('a'): + self.links[link.string.encode()] = link['href'].encode('UTF-8') + # + td_tags1 = entry[1].findAll('td') + + # second part of the result entry + self.title = td_tags1[3].contents[0].string.encode('UTF-8') + # still in unicode + # TODO need to convert to normal UTF, not unicode + authors = td_tags1[1].contents[0].encode('UTF-8').split(';') + if authors[-1] == ' ': + # so, if the last entry in the authorlist is empty, means + # it split a ';', which in turn means there are more + # authors, need to add that part... + authors[-1] = td_tags1[1].contents[1].contents[0].encode('UTF-8') + ', COAuth' + # + self.authors = [i.split(',') for i in authors] + self.author = ', '.join(self.authors[0]) + # + #~ self. + def __repr__(self): + return repr([self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate]) + def _returnlist_(self): + return [self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate] + +#~ # second part of the result entry +#~ title = td_tags1[3].contents[0].string.replace(u'\xa0', u' ').encode() +#~ # still in unicode +#~ # TODO need to convert to normal UTF, not unicode +#~ authors = td_tags1[1].string.replace(u'\xa0', u' ').encode().split(';') +#~ authors = [i.split(',') for i in authors] +#~ author = authors[0] + + + + +############################################ +######## RETURN SORTABLE OBJECT LIST + +############################################ +######## HOW TO SORT RESULTS +# needs Python 2.6 at least +#~ from operator import itemgetter, attrgetter +#~ +#~ # now to sort it, just use one of the keys +#~ # score, high to low +#~ sorted(resultlist, key=attrgetter('author'), reverse=True) +#~ +#~ # cmp=locale.strcoll new and untested addition +#~ +#~ # authors alphabetical order first and then by score +#~ # i.e. sort by score if same first author +#~ sorted(resultlist, key=attrgetter('ads_score','authors'), reverse=True) + + +######################################################################## +######## NOTES + +### FIELDS +# bibcode +# title +# authors +# score +# pubdate +# possilbe (quick)links : +# A Abstract +# C CITATIONS +# D On-line Data +# E EJOURNAL +# F Printable Article +# G Gif Images +# H HEP/Spires Information +# I Author Comments +# L Library Entries +# M Multimedia +# N NED Objects +# O Associated Articles +# P PDS datasets +# R REFERENCES +# S SIMBAD Objects +# T TOC +# U Also read +# X arXiv e-print +# Z Abstract Custom + + +""" + + +6.3.4 - Embedded Queries + +This section describes how the abstract service can be accessed from embedded forms. The URL for submitting embedded forms is: + +http://adsabs.harvard.edu/cgi-bin/abs_connect + +The syntax is: + +... + +where parami are the names of the parameters and vali are their values. There are no spaces allowed in a URL. Any blanks need to be encoded as a '+' (e.g. between author last and first names). The list of the possible parameters and their possible values is available to build queries. It is advisable to use only the more basic parameters for such queries since the more complicated parameters are more likely to change with future versions of the search system. + +One use of this is for including a link to the bibliography for a particular author in a document. + +To do so, use the following syntax: + +http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.&return_req=no_params + +This sets the author=last, f, and prevents the listing of parameters at the bottom of the page (return_req=no_params). + +If you want to specify the author middle initial in addition to the first initial, use exact author matching (&aut_xct=YES). + +To build a search for two different formats of author names, enter the two author arguments separated with a semicolon. + +http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.m.;last,+first+m.&aut_xct=YES&return_req=no_params + +Such a link will always provide access to the latest bibliography of an author without the need to update anything. + +Sometimes such a list includes articles by somebody else with the same name. You can exclude specific articles from the results list with the command + +exclude=bibcode1,bibcode2,... + +You can also include specific articles with the command + +include=bibcode1,bibcode2,... + +This allows for finely customized bibliographies. + + + +List of ADS query parameter keywords + +author list of semicolon separated authornames as last, f +object list of semicolon separated object names +keyword list of semicolon separated keywords +start_mon starting month as integer (Jan == 1, Dec == 12) +start_year starting year as integer (4 digits) +end_mon ending month as integer (Jan == 1, Dec == 12) +end_year ending year as integer (4 digits) +start_entry_day start entry day of month as integer +start_entry_mon start entry month as integer +start_entry_year start entry year as integer +end_entry_day start entry day of month as integer +end_entry_mon start entry month as integer +end_entry_year start entry year as integer +title title words, any non-alpha-numeric character separates +text abstract words, any non-alpha-numeric character separates +fulltext OCRd fulltext, any non-alpha-numeric character separates +affiliation affiliation words, any non-alpha-numeric character separates +bibcode bibcode for partial bibcode search. If a bibcode is +specified, no other search will be done +nr_to_return how many abstracts to return (default is 50, max 500) +start_nr where to start returning in list of retrieved abstracts +default is 1 +aut_wt floating point weight for author search, default: 1.0 +obj_wt floating point weight for object search, default: 1.0 +kwd_wt floating point weight for keyword search, default: 1.0 +ttl_wt floating point weight for title search, default: 0.3 +txt_wt floating point weight for text search, default: 3.0 +full_wt floating point weight for full search, default: 3.0 +aff_wt floating point weight for affiliation search, default: 1.0 +aut_syn author synonym replacement. aut_syn="YES" turns it on (default is on) +ttl_syn title synonym replacement. ttl_syn="YES" turns it on (default is on) +txt_syn text synonym replacement. txt_syn="YES" turns it on (default is on) +full_syn full text synonym replacement. full_syn="YES" turns it on (default is on) +aff_syn affiliation synonym replacement. aff_syn="YES" turns it on (default is on) +aut_wgt authors used for weighting. aut_wgt="YES" turns it on (default is on) +obj_wgt objects used for weighting. obj_wgt="YES" turns it on (default is on) +kwd_wgt keywords used for weighting. kwd_wgt="YES" turns it on (default is on) +ttl_wgt title used for weighting. ttl_wgt="YES" turns it on (default is on) +txt_wgt text used for weighting. txt_wgt="YES" turns it on (default is on) +full_wgt full text used for weighting. full_wgt="YES" turns it on (default is on) +aff_wgt affiliation used for weighting. aff_wgt="YES" turns it on (default is on) +aut_sco authors weighted scoring. aut_sco="YES" turns it on (default is off) +kwd_sco keywords weighted scoring. kwd_sco="YES" turns it on (default is off) +ttl_sco title weighted scoring. ttl_sco="YES" turns it on (default is on) +txt_sco text weighted scoring. txt_sco="YES" turns it on (default is on) +full_sco text weighted scoring. full_sco="YES" turns it on (default is on) +aff_sco affiliation weighted scoring. aff_sco="YES" turns it on (default is off) +aut_req authors required for results. aut_req="YES" turns it on (default is off) +obj_req objects required for results. obj_req="YES" turns it on (default is off) +kwd_req keywords required for results. kwd_req="YES" turns it on (default is off) +ttl_req title required for results. ttl_req="YES" turns it on (default is off) +txt_req text required for results. txt_req="YES" turns it on (default is off) +full_req text required for results. full_req="YES" turns it on (default is off) +aff_req affiliation required for results. aff_req="YES" turns it on (default is off) +aut_logic +obj_logic +kwd_logic +ttl_logic +txt_logic +full_logic +aff_logic Combination logic: +xxx_logic="AND": combine with AND +xxx_logic="OR": combine with OR (default) +xxx_logic="SIMPLE": simple logic (use +, -) +xxx_logic="BOOL": full boolean logic +xxx_logic="FULLMATCH": do AND query in the selected field +and calculate the score according to how many words in +the field of the selected reference were matched by +the query +return_req requested return: +return_req="result" : return results (default) +return_req="form" : return new query form +return_req="no_params": return results +set default parameters, don't display params +db_key which database to query: db_key="AST" : Astronomy(default) +"PRE": arXiv e-prints +"PHY": Physics, "GEN": General, CFA: CfA Preprints +atcl_only select only OCR pages from articles +jou_pick specify which journals to select: +jou_pick="ALL" : return all journals (default) +jou_pick="NO" : return only refereed journals +jou_pick="EXCL" : return only non-refereed journals +ref_stems list of comma-separated ADS bibstems to return, e.g. ref_stems="ApJ..,AJ..." +min_score minimum score of returned abstracts +(floating point, default 0.0) +data_link return only entries with data. +data_link="YES" turns it on, default is off +abstract return only entries with abstracts. +abstract="YES" turns it on, default is off +alt_abs return only entries with alternate abstracts. +alt_abs="YES" turns it on, default is off +aut_note return only entries with author notes. +aut_note="YES" turns it on, default is off +article return only entries with articles. +article="YES" turns it on, default is off +article_link return only entries with electronic articles. +article_link="YES" turns it on, default is off +simb_obj return only entries with simbad objects. +simb_obj="YES" turns it on, default is off +ned_obj return only entries with ned objects. +ned_obj="YES" turns it on, default is off +gpndb_obj return only entries with gpndb objects. +gpndb_obj="YES" turns it on, default is off +lib_link return only entries with library links. +lib_link="YES" turns it on, default is off +data_and return only entries with all selected data available. +data_and="ALL": no selection, return all refs (default) +data_and="NO" : return entries with AT LEAST ONE of the +data items selected with the above flags +data_and="YES": return only entries that have ALL links +selected with the above flags +version version number for the query form +data_type data type to return +data_type="HTML" return regular list (default) +data_type="PORTABLE" return portable tagged format +data_type="PLAINTEXT" return plain text +data_type="BIBTEX" return bibtex format +data_type="BIBTEXPLUS" return bibtex with abstract +data_type="ENDNOTE" return ENDNOTE format +data_type="DUBLINCORE" return DUBLINCORE format +data_type="XML" return XML format +data_type="SHORT_XML" return short XML format (no abstract) +data_type="VOTABLE" return VOTable format +data_type="RSS" return RSS format +mail_link return only entries with mailorder. +mail_link="YES" turns it on, default is off +toc_link return only entries with tocorder. +toc_link="YES" turns it on, default is off +pds_link return only entries with pds data. +pds_link="YES" turns it on, default is off +multimedia_link return only entries with multimedia data. +multimedia_link="YES" turns it on, default is off +spires_link return only entries with spires data. +spires_link="YES" turns it on, default is off +group_and return only entries from all selected groups. +group_and="ALL":no selection (default) +group_and="NO" :return entries that are in at least one grp +group_and="YES":return only entries from ALL groups +selected with group_bits +group_sel which group to select, e.g. group_sel="Chandra,HST" +ref_link return only entries with reference links. +ref_link="YES" turns it on, default is off +citation_link return only entries with citation links. +citation_link="YES" turns it on, default is off +gif_link return only entries with scanned articles links. +open_link return only entries with open access. +aut_xct exact author search. aut_xct="YES" turns it on +lpi_query lpi_query="YES" query for LPI objects, default is off +sim_query sim_query="YES" query for SIMBAD objects, default is on +ned_query ned_query="YES" query for NED objects, default is on +iau_query iau_query="YES" query for IAU objects, default is off +sort sort options: +"SCORE": sort by score +"AUTHOR": sort by first author +"NDATE": sort by date (most recent first +"ODATE": sort by date (oldest first) +"BIBCODE": sort by bibcode +"ENTRY": sort by entry date in the database +"PAGE": sort by page number +"RPAGE": reverse sort by page number +"CITATIONS": sort by citation count (replaces +score with number of citations) +"NORMCITATIONS": sort by normalized citation count +(replaces score with number of normalized citations) +"AUTHOR_CNT": sort by author count +query_type what to return: query_type=PAPERS returns regular records (default) +query_type=CITES returns citations to selected records +query_type=REFS returns references in selected records +query_type=ALSOREADS returns also-reads in selected records +return_fmt return format: return_fmt="LONG": return full abstract +return_fmt="SHORT": return short listing (default) +type where to return the data (screen, file, printer, etc) +defaultset use default settings (same as ret_req=no_params +but displays query parameters on short form) +format Custom reference format +charset character set for text output +year year field for bibcode matching +bibstem bibstem field for bibcode matching +volume volume field for bibcode matching +page page field for bibcode matching +associated_link return only entries with associated articles. +associated_link="YES" turns it on, default is off +ar_link return only entries with AR links. +ar_link="YES" turns it on, default is off +tables return results with table formatting (overrides pref.) +email_ret email_ret="YES": return query result via email +exclude exclude=bibcode1[,bibcode2...]: exclude specified bibcodes +from results list +include include=bibcode1[,bibcode2...]: include specified bibcodes +in results list +selectfrom selectfrom=bibcode1[,bibcode2...]: include only bibcodes +from specified bibcode list +RA Right ascension for cone search +DEC Declination for cone search +SR Search radius for cone search (default is 10 arcmin) +method form method of query form: GET or POST +nfeedback number of records to use in feedback queries +doi DOI +preprint_link return only entries with preprint data. +preprint_link="YES" turns it on, default is off +refstr reference string to resolve +mimetype mimetype of returned page (default depends on data_type) +qsearch if set, quick search box is displayed in HTML output +arxiv_sel which arxiv categories to select +article_sel select only articles (not catalogs, abstracts, etc) +adsobj_query search object names in abstract text + +""" + + + diff --git a/astroquery/nasa_ads/tests/__init__.py b/astroquery/nasa_ads/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/astroquery/nasa_ads/tests/test_nasaads.py b/astroquery/nasa_ads/tests/test_nasaads.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst new file mode 100644 index 0000000000..e326dd1335 --- /dev/null +++ b/docs/nasa_ads/nasa_ads.rst @@ -0,0 +1,47 @@ +.. doctest-skip-all + +.. _astroquery.nasa_ads: + +**************************************** +NASA ADS Queries (`astroquery.nasa_ads`) +**************************************** + +Getting Started +=============== + +This module provides an interface to the online `SAO/NASA Astrophysics Data System`_ + + +Examples +======== + + +Search works by specific identifier +----------------------------------- + + +Get links +--------- + + +Download publisher/ArXiv PDF +---------------------------- + + +Get Bibtex +---------- + + + + + + + +Reference/API +============= + +#.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: + +.. _nasa_ads: http://adsabs.harvard.edu/ +.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ + From 89bfc0079bf72ea8565cefbc52e6dcd19e63ba7d Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:05:40 +0100 Subject: [PATCH 02/39] NASA ADS query : test format RST to open link in new window --- docs/nasa_ads/nasa_ads.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index e326dd1335..7e53f02180 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -43,5 +43,5 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ +.. _SAO/NASA Astrophysics Data System: raw::html SAO/NASA Astrophysics Data System From 9dc6b7d4df138c6b2344d3a10be40ca08f01ecc5 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:06:45 +0100 Subject: [PATCH 03/39] NASA ADS query : test format RST to open link in new window --- docs/nasa_ads/nasa_ads.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 7e53f02180..66e5046bbe 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -43,5 +43,6 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: raw::html SAO/NASA Astrophysics Data System +.. _SAO/NASA Astrophysics Data System: raw::html + SAO/NASA Astrophysics Data System From cfa1c4d77abe6459365a4bcf1776f77addf1c1ff Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:10:24 +0100 Subject: [PATCH 04/39] NASA ADS query, rst doc : couldn't get link 2 open in new window --- docs/nasa_ads/nasa_ads.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 66e5046bbe..e326dd1335 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -43,6 +43,5 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: raw::html - SAO/NASA Astrophysics Data System +.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ From 8fa5f78c6252fa2d555f5bbbac935419332ee4dd Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:15:27 +0100 Subject: [PATCH 05/39] NASA ADS query - rst doc : new test with links... --- docs/nasa_ads/nasa_ads.rst | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index e326dd1335..efbe530d1b 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -9,7 +9,21 @@ NASA ADS Queries (`astroquery.nasa_ads`) Getting Started =============== -This module provides an interface to the online `SAO/NASA Astrophysics Data System`_ +This module provides an interface to the online |adslink|. +It will check all the ADS mirrors, currently given by + + http://adswww.harvard.edu/|br| + http://cdsads.u-strasbg.fr/|br| + http://ukads.nottingham.ac.uk/|br| + http://esoads.eso.org/|br| + http://ads.ari.uni-heidelberg.de/|br| + http://ads.inasan.ru/|br| + http://ads.nao.ac.jp/|br| + http://ads.iucaa.ernet.in/|br| + http://ads.arsip.lipi.go.id/|br| + http://saaoads.chpc.ac.za/|br| + http://ads.on.br/|br| + Examples @@ -43,5 +57,14 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ +.. |adslink| raw:: html + + SAO/NASA Astrophysics Data System + +.. |br| raw:: html + +
+ + + From 256006cfb0da4374dbaea169c11ed6612d02c247 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 12:24:11 +0100 Subject: [PATCH 06/39] NASA ADS query - rst doc : new test with links --- docs/nasa_ads/nasa_ads.rst | 42 ++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index efbe530d1b..2cc59fb533 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -9,21 +9,30 @@ NASA ADS Queries (`astroquery.nasa_ads`) Getting Started =============== -This module provides an interface to the online |adslink|. +This module provides an interface to the online `SAO/NASA Astrophysics Data System`_. It will check all the ADS mirrors, currently given by - http://adswww.harvard.edu/|br| - http://cdsads.u-strasbg.fr/|br| - http://ukads.nottingham.ac.uk/|br| - http://esoads.eso.org/|br| - http://ads.ari.uni-heidelberg.de/|br| - http://ads.inasan.ru/|br| - http://ads.nao.ac.jp/|br| - http://ads.iucaa.ernet.in/|br| - http://ads.arsip.lipi.go.id/|br| - http://saaoads.chpc.ac.za/|br| - http://ads.on.br/|br| + http://adswww.harvard.edu/ + http://cdsads.u-strasbg.fr/ + + http://ukads.nottingham.ac.uk/ + + http://esoads.eso.org/ + + http://ads.ari.uni-heidelberg.de/ + + http://ads.inasan.ru/ + + http://ads.nao.ac.jp/ + + http://ads.iucaa.ernet.in/ + + http://ads.arsip.lipi.go.id/ + + http://saaoads.chpc.ac.za/ + + http://ads.on.br/ Examples @@ -57,14 +66,7 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. |adslink| raw:: html - - SAO/NASA Astrophysics Data System - -.. |br| raw:: html - -
- +.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ From c776832e9ad8bf7bcde100667bbc799cf3b5f84f Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 12:30:27 +0100 Subject: [PATCH 07/39] NASA ADS query - rst doc : new test with links --- docs/nasa_ads/nasa_ads.rst | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 2cc59fb533..472ec54461 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -10,29 +10,20 @@ Getting Started =============== This module provides an interface to the online `SAO/NASA Astrophysics Data System`_. -It will check all the ADS mirrors, currently given by - - http://adswww.harvard.edu/ - - http://cdsads.u-strasbg.fr/ - - http://ukads.nottingham.ac.uk/ - - http://esoads.eso.org/ - - http://ads.ari.uni-heidelberg.de/ - - http://ads.inasan.ru/ - - http://ads.nao.ac.jp/ - - http://ads.iucaa.ernet.in/ - - http://ads.arsip.lipi.go.id/ - - http://saaoads.chpc.ac.za/ - - http://ads.on.br/ +It will check all the ADS mirrors, currently given by the following list: + +- http://adswww.harvard.edu/ +- http://cdsads.u-strasbg.fr/ +- http://ukads.nottingham.ac.uk/ +- http://esoads.eso.org/ +- http://ads.ari.uni-heidelberg.de/ +- http://ads.inasan.ru/ +- http://ads.nao.ac.jp/ +- http://ads.iucaa.ernet.in/ +- http://ads.arsip.lipi.go.id/ +- http://saaoads.chpc.ac.za/ +- http://ads.on.br/ + Examples From 6c994821997cbfcabf7d2aedc8d4501500d62b53 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 15:44:56 +0100 Subject: [PATCH 08/39] initial code dump nothing working yet. some copy-paste from astroquery.splatalogue --- astroquery/nasa_ads/__init__.py | 24 +- astroquery/nasa_ads/core.py | 480 ++++++++------------------------ 2 files changed, 116 insertions(+), 388 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index cb5cd010c1..04fcd43cd5 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -13,12 +13,12 @@ class Conf(_config.ConfigNamespace): """ Configuration parameters for `astroquery.nasa_ads`. """ - mirror_urls = _config.ConfigItem( + servers = _config.ConfigItem( ['http://adswww.harvard.edu/', 'http://cdsads.u-strasbg.fr/', 'http://ukads.nottingham.ac.uk/', 'http://esoads.eso.org/', - 'http://ads.ari.uni-heidelberg.de/' + 'http://ads.ari.uni-heidelberg.de/', 'http://ads.inasan.ru/', 'http://ads.mao.kiev.ua/', 'http://ads.astro.puc.cl/', @@ -30,24 +30,20 @@ class Conf(_config.ConfigNamespace): 'http://ads.on.br/'], 'SAO/NASA ADS mirrors around the world' ) - + advanced_url = _config.ConfigItem( 'abstract_service.html', 'Path for advanced query' ) simple_url = _config.ConfigItem( - 'abstract_service.html', + 'index.html', 'Path for advanced query' ) timeout = _config.ConfigItem( 60, - 'Time limit for connecting to ADS server.' - ) - lines_limit = _config.ConfigItem( - 1000, - 'Limit to number of hits exported.' + 'Time limit for connecting to ADS server' ) conf = Conf() @@ -57,13 +53,3 @@ class Conf(_config.ConfigNamespace): __all__ = ['ADS', 'ADSClass', 'Conf', 'conf', ] - - - - - - - -""" -advanced_q = 'abstract_service.html' -""" diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index e68d7001d0..4a1ad2aeed 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -1,108 +1,138 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- -# -# adslib.py -# -# Module to search the ads -# -# Copyright 2012 Magnus Persson -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -# MA 02110-1301, USA. -# -# version 0.0.1a - +# Licensed under a 3-clause BSD style license - see LICENSE.rst """ -Script to search the NASA ADS directory +Module to search the SAO/NASA Astrophysics Data System -Need : o scipy - o mechanize module (standard in Python >2.6/2.7?) - o urllib2 module (standard Python module, required(?) by mechanize) - o beautiful soup/xml (xml standard in Python >2.6/2.7?) +:author: Magnus Persson """ -""" -ADSlib - Python Module to interact with NASA ADS -at +import warnings +# example warning +# warnings.warn("Band was specified, so blabla is overridden") +from astropy.io import ascii +from astropy import units as u +from ..query import BaseQuery +from ..utils import commons, async_to_sync +from ..utils.docstr_chompers import prepend_docstr_noreturns +from . import conf -http://adswww.harvard.edu/ -OR one of the mirrors -http://cdsads.u-strasbg.fr/ -http://ukads.nottingham.ac.uk/ -http://esoads.eso.org/ -http://ads.ari.uni-heidelberg.de/ -http://ads.inasan.ru/ -http://ads.mao.kiev.ua/ -http://ads.astro.puc.cl/ -http://ads.nao.ac.jp/ -http://ads.bao.ac.cn/ -http://ads.iucaa.ernet.in/ -http://ads.arsip.lipi.go.id/ -http://saaoads.chpc.ac.za/ -http://ads.on.br/ +__all__ = ['Splatalogue', 'SplatalogueClass'] -""" +@async_to_sync +class ADSClass(BaseQuery): + SERVERS = conf.servers + QUERY_ADVANCED_URL = conf.advanced_url + QUERY_SIMPLE_URL = conf.simple_url + TIMEOUT = conf.timeout + # global constant, not user-configurable + def __init__(self, **kwargs): + """ + Initialize a ADS query class with default arguments set. + Any default keyword arguments (see `query_lines`) can be + overridden here. + """ + self.data = self._default_kwargs() + self.set_default_options(**kwargs) + + def set_default_options(self, **kwargs): + """ + Modify the default options. + """ + self.data.update(self._parse_kwargs(**kwargs)) + def _default_kwargs(self): + kwargs = dict(advanced = False,) + return self._parse_kwargs(**kwargs) + + def _parse_kwargs(self, search=""): + """ + The ADS service returns ... + -""" -----[ Change log ]---- + Parameters + ---------- -* 2012 Dec 15 - Code cleanup. + Other Parameters + ---------------- + -* 2012 Oct 29 - Now only uses mechanize module(!) Yay. + Returns + ------- + Dictionary of the parameters to send to the SPLAT page + payload : dict + A dictionary of keywords + """ -* 2012 Oct 02 - File created. + payload = {'submit': 'Search', + 'frequency_units': 'GHz', + } + payload['qsearch'] = simple -""" + return payload -""" -NOTES: -# advanced search query -abstract_service.html + def _validate_simple_kwargs(self, min_frequency=None, max_frequency=None, + band='any', **kwargs): + """ + Check that a simple search query is input + """ + if band == 'any': + if min_frequency is None or max_frequency is None: + raise ValueError("Must specify a simple search string.") + @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) + def query_simple_async(self, simple, **kwargs): + """ + Returns + ------- + response : `requests.Response` + The response of the HTTP request. + """ + # have to chomp this kwd here... + get_query_payload = (kwargs.pop('get_query_payload') + if 'get_query_payload' in kwargs + else False) + self._validate_kwargs(simple, **kwargs) + + if hasattr(self, 'data'): + data_payload = self.data.copy() + data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + max_frequency=max_frequency, + **kwargs)) + else: + data_payload = self._default_kwargs() + data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + max_frequency=max_frequency, + **kwargs)) + + if get_query_payload: + return data_payload + + response = commons.send_request( + self.QUERY_URL, + data_payload, + self.TIMEOUT) + + self.response = response + + return response +ADS = ADSClass() + + + + + + +######################################################################## -# quick search -index.html -""" -mirrors = [ - 'http://adswww.harvard.edu/', - 'http://cdsads.u-strasbg.fr/', - 'http://ukads.nottingham.ac.uk/', - 'http://esoads.eso.org/', - 'http://ads.ari.uni-heidelberg.de/' - 'http://ads.inasan.ru/', - 'http://ads.nao.ac.jp/', - 'http://ads.iucaa.ernet.in/', - 'http://ads.arsip.lipi.go.id/', - 'http://saaoads.chpc.ac.za/', - 'http://ads.on.br/' - ] -advanced_q = 'abstract_service.html' def search(query, **kwargs): """ @@ -243,7 +273,7 @@ def search(query, **kwargs): ######## GET RESULTLIST ###### the problem with this is that web is in UNICODE, - # ie. Jørgensen, æ and åäö and ßü etc are represented by funny numbers and '\' + # ie. special chars are represented by funny numbers and '\' #resultlist = [_Result(i) for i in entries] return _Resultlist(entries) @@ -354,292 +384,4 @@ def _returnlist_(self): #~ # authors alphabetical order first and then by score #~ # i.e. sort by score if same first author #~ sorted(resultlist, key=attrgetter('ads_score','authors'), reverse=True) - - ######################################################################## -######## NOTES - -### FIELDS -# bibcode -# title -# authors -# score -# pubdate -# possilbe (quick)links : -# A Abstract -# C CITATIONS -# D On-line Data -# E EJOURNAL -# F Printable Article -# G Gif Images -# H HEP/Spires Information -# I Author Comments -# L Library Entries -# M Multimedia -# N NED Objects -# O Associated Articles -# P PDS datasets -# R REFERENCES -# S SIMBAD Objects -# T TOC -# U Also read -# X arXiv e-print -# Z Abstract Custom - - -""" - - -6.3.4 - Embedded Queries - -This section describes how the abstract service can be accessed from embedded forms. The URL for submitting embedded forms is: - -http://adsabs.harvard.edu/cgi-bin/abs_connect - -The syntax is: - -... - -where parami are the names of the parameters and vali are their values. There are no spaces allowed in a URL. Any blanks need to be encoded as a '+' (e.g. between author last and first names). The list of the possible parameters and their possible values is available to build queries. It is advisable to use only the more basic parameters for such queries since the more complicated parameters are more likely to change with future versions of the search system. - -One use of this is for including a link to the bibliography for a particular author in a document. - -To do so, use the following syntax: - -http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.&return_req=no_params - -This sets the author=last, f, and prevents the listing of parameters at the bottom of the page (return_req=no_params). - -If you want to specify the author middle initial in addition to the first initial, use exact author matching (&aut_xct=YES). - -To build a search for two different formats of author names, enter the two author arguments separated with a semicolon. - -http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.m.;last,+first+m.&aut_xct=YES&return_req=no_params - -Such a link will always provide access to the latest bibliography of an author without the need to update anything. - -Sometimes such a list includes articles by somebody else with the same name. You can exclude specific articles from the results list with the command - -exclude=bibcode1,bibcode2,... - -You can also include specific articles with the command - -include=bibcode1,bibcode2,... - -This allows for finely customized bibliographies. - - - -List of ADS query parameter keywords - -author list of semicolon separated authornames as last, f -object list of semicolon separated object names -keyword list of semicolon separated keywords -start_mon starting month as integer (Jan == 1, Dec == 12) -start_year starting year as integer (4 digits) -end_mon ending month as integer (Jan == 1, Dec == 12) -end_year ending year as integer (4 digits) -start_entry_day start entry day of month as integer -start_entry_mon start entry month as integer -start_entry_year start entry year as integer -end_entry_day start entry day of month as integer -end_entry_mon start entry month as integer -end_entry_year start entry year as integer -title title words, any non-alpha-numeric character separates -text abstract words, any non-alpha-numeric character separates -fulltext OCRd fulltext, any non-alpha-numeric character separates -affiliation affiliation words, any non-alpha-numeric character separates -bibcode bibcode for partial bibcode search. If a bibcode is -specified, no other search will be done -nr_to_return how many abstracts to return (default is 50, max 500) -start_nr where to start returning in list of retrieved abstracts -default is 1 -aut_wt floating point weight for author search, default: 1.0 -obj_wt floating point weight for object search, default: 1.0 -kwd_wt floating point weight for keyword search, default: 1.0 -ttl_wt floating point weight for title search, default: 0.3 -txt_wt floating point weight for text search, default: 3.0 -full_wt floating point weight for full search, default: 3.0 -aff_wt floating point weight for affiliation search, default: 1.0 -aut_syn author synonym replacement. aut_syn="YES" turns it on (default is on) -ttl_syn title synonym replacement. ttl_syn="YES" turns it on (default is on) -txt_syn text synonym replacement. txt_syn="YES" turns it on (default is on) -full_syn full text synonym replacement. full_syn="YES" turns it on (default is on) -aff_syn affiliation synonym replacement. aff_syn="YES" turns it on (default is on) -aut_wgt authors used for weighting. aut_wgt="YES" turns it on (default is on) -obj_wgt objects used for weighting. obj_wgt="YES" turns it on (default is on) -kwd_wgt keywords used for weighting. kwd_wgt="YES" turns it on (default is on) -ttl_wgt title used for weighting. ttl_wgt="YES" turns it on (default is on) -txt_wgt text used for weighting. txt_wgt="YES" turns it on (default is on) -full_wgt full text used for weighting. full_wgt="YES" turns it on (default is on) -aff_wgt affiliation used for weighting. aff_wgt="YES" turns it on (default is on) -aut_sco authors weighted scoring. aut_sco="YES" turns it on (default is off) -kwd_sco keywords weighted scoring. kwd_sco="YES" turns it on (default is off) -ttl_sco title weighted scoring. ttl_sco="YES" turns it on (default is on) -txt_sco text weighted scoring. txt_sco="YES" turns it on (default is on) -full_sco text weighted scoring. full_sco="YES" turns it on (default is on) -aff_sco affiliation weighted scoring. aff_sco="YES" turns it on (default is off) -aut_req authors required for results. aut_req="YES" turns it on (default is off) -obj_req objects required for results. obj_req="YES" turns it on (default is off) -kwd_req keywords required for results. kwd_req="YES" turns it on (default is off) -ttl_req title required for results. ttl_req="YES" turns it on (default is off) -txt_req text required for results. txt_req="YES" turns it on (default is off) -full_req text required for results. full_req="YES" turns it on (default is off) -aff_req affiliation required for results. aff_req="YES" turns it on (default is off) -aut_logic -obj_logic -kwd_logic -ttl_logic -txt_logic -full_logic -aff_logic Combination logic: -xxx_logic="AND": combine with AND -xxx_logic="OR": combine with OR (default) -xxx_logic="SIMPLE": simple logic (use +, -) -xxx_logic="BOOL": full boolean logic -xxx_logic="FULLMATCH": do AND query in the selected field -and calculate the score according to how many words in -the field of the selected reference were matched by -the query -return_req requested return: -return_req="result" : return results (default) -return_req="form" : return new query form -return_req="no_params": return results -set default parameters, don't display params -db_key which database to query: db_key="AST" : Astronomy(default) -"PRE": arXiv e-prints -"PHY": Physics, "GEN": General, CFA: CfA Preprints -atcl_only select only OCR pages from articles -jou_pick specify which journals to select: -jou_pick="ALL" : return all journals (default) -jou_pick="NO" : return only refereed journals -jou_pick="EXCL" : return only non-refereed journals -ref_stems list of comma-separated ADS bibstems to return, e.g. ref_stems="ApJ..,AJ..." -min_score minimum score of returned abstracts -(floating point, default 0.0) -data_link return only entries with data. -data_link="YES" turns it on, default is off -abstract return only entries with abstracts. -abstract="YES" turns it on, default is off -alt_abs return only entries with alternate abstracts. -alt_abs="YES" turns it on, default is off -aut_note return only entries with author notes. -aut_note="YES" turns it on, default is off -article return only entries with articles. -article="YES" turns it on, default is off -article_link return only entries with electronic articles. -article_link="YES" turns it on, default is off -simb_obj return only entries with simbad objects. -simb_obj="YES" turns it on, default is off -ned_obj return only entries with ned objects. -ned_obj="YES" turns it on, default is off -gpndb_obj return only entries with gpndb objects. -gpndb_obj="YES" turns it on, default is off -lib_link return only entries with library links. -lib_link="YES" turns it on, default is off -data_and return only entries with all selected data available. -data_and="ALL": no selection, return all refs (default) -data_and="NO" : return entries with AT LEAST ONE of the -data items selected with the above flags -data_and="YES": return only entries that have ALL links -selected with the above flags -version version number for the query form -data_type data type to return -data_type="HTML" return regular list (default) -data_type="PORTABLE" return portable tagged format -data_type="PLAINTEXT" return plain text -data_type="BIBTEX" return bibtex format -data_type="BIBTEXPLUS" return bibtex with abstract -data_type="ENDNOTE" return ENDNOTE format -data_type="DUBLINCORE" return DUBLINCORE format -data_type="XML" return XML format -data_type="SHORT_XML" return short XML format (no abstract) -data_type="VOTABLE" return VOTable format -data_type="RSS" return RSS format -mail_link return only entries with mailorder. -mail_link="YES" turns it on, default is off -toc_link return only entries with tocorder. -toc_link="YES" turns it on, default is off -pds_link return only entries with pds data. -pds_link="YES" turns it on, default is off -multimedia_link return only entries with multimedia data. -multimedia_link="YES" turns it on, default is off -spires_link return only entries with spires data. -spires_link="YES" turns it on, default is off -group_and return only entries from all selected groups. -group_and="ALL":no selection (default) -group_and="NO" :return entries that are in at least one grp -group_and="YES":return only entries from ALL groups -selected with group_bits -group_sel which group to select, e.g. group_sel="Chandra,HST" -ref_link return only entries with reference links. -ref_link="YES" turns it on, default is off -citation_link return only entries with citation links. -citation_link="YES" turns it on, default is off -gif_link return only entries with scanned articles links. -open_link return only entries with open access. -aut_xct exact author search. aut_xct="YES" turns it on -lpi_query lpi_query="YES" query for LPI objects, default is off -sim_query sim_query="YES" query for SIMBAD objects, default is on -ned_query ned_query="YES" query for NED objects, default is on -iau_query iau_query="YES" query for IAU objects, default is off -sort sort options: -"SCORE": sort by score -"AUTHOR": sort by first author -"NDATE": sort by date (most recent first -"ODATE": sort by date (oldest first) -"BIBCODE": sort by bibcode -"ENTRY": sort by entry date in the database -"PAGE": sort by page number -"RPAGE": reverse sort by page number -"CITATIONS": sort by citation count (replaces -score with number of citations) -"NORMCITATIONS": sort by normalized citation count -(replaces score with number of normalized citations) -"AUTHOR_CNT": sort by author count -query_type what to return: query_type=PAPERS returns regular records (default) -query_type=CITES returns citations to selected records -query_type=REFS returns references in selected records -query_type=ALSOREADS returns also-reads in selected records -return_fmt return format: return_fmt="LONG": return full abstract -return_fmt="SHORT": return short listing (default) -type where to return the data (screen, file, printer, etc) -defaultset use default settings (same as ret_req=no_params -but displays query parameters on short form) -format Custom reference format -charset character set for text output -year year field for bibcode matching -bibstem bibstem field for bibcode matching -volume volume field for bibcode matching -page page field for bibcode matching -associated_link return only entries with associated articles. -associated_link="YES" turns it on, default is off -ar_link return only entries with AR links. -ar_link="YES" turns it on, default is off -tables return results with table formatting (overrides pref.) -email_ret email_ret="YES": return query result via email -exclude exclude=bibcode1[,bibcode2...]: exclude specified bibcodes -from results list -include include=bibcode1[,bibcode2...]: include specified bibcodes -in results list -selectfrom selectfrom=bibcode1[,bibcode2...]: include only bibcodes -from specified bibcode list -RA Right ascension for cone search -DEC Declination for cone search -SR Search radius for cone search (default is 10 arcmin) -method form method of query form: GET or POST -nfeedback number of records to use in feedback queries -doi DOI -preprint_link return only entries with preprint data. -preprint_link="YES" turns it on, default is off -refstr reference string to resolve -mimetype mimetype of returned page (default depends on data_type) -qsearch if set, quick search box is displayed in HTML output -arxiv_sel which arxiv categories to select -article_sel select only articles (not catalogs, abstracts, etc) -adsobj_query search object names in abstract text - -""" - - - From e6dc2303201265f1fbfec521ddaabd02b18c2d42 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 15:46:04 +0100 Subject: [PATCH 09/39] fix import --- astroquery/nasa_ads/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 4a1ad2aeed..97f03eb0a5 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -20,7 +20,7 @@ -__all__ = ['Splatalogue', 'SplatalogueClass'] +__all__ = ['ADS', 'ADSClass'] @async_to_sync From a1af1528a54ebc0045e05801af1ac3bf9f800534 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Mon, 12 Jan 2015 16:09:32 +0100 Subject: [PATCH 10/39] NASA ADS simple text search working. Results NOT parsed. --- astroquery/nasa_ads/__init__.py | 118 ++++++++++++----- astroquery/nasa_ads/core.py | 223 ++++++++++++++++++-------------- 2 files changed, 210 insertions(+), 131 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index 04fcd43cd5..c728c312f3 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -6,50 +6,98 @@ :Author: Magnus Vilhelm Persson (magnusp@vilhelm.nu) """ -from astropy import config as _config +#~ from astropy.config import ConfigurationItem + +from astropy import config as _config class Conf(_config.ConfigNamespace): """ Configuration parameters for `astroquery.nasa_ads`. """ - servers = _config.ConfigItem( - ['http://adswww.harvard.edu/', - 'http://cdsads.u-strasbg.fr/', - 'http://ukads.nottingham.ac.uk/', - 'http://esoads.eso.org/', - 'http://ads.ari.uni-heidelberg.de/', - 'http://ads.inasan.ru/', - 'http://ads.mao.kiev.ua/', - 'http://ads.astro.puc.cl/', - 'http://ads.nao.ac.jp/', - 'http://ads.bao.ac.cn/', - 'http://ads.iucaa.ernet.in/', - 'http://ads.arsip.lipi.go.id/', - 'http://saaoads.chpc.ac.za/', - 'http://ads.on.br/'], - 'SAO/NASA ADS mirrors around the world' - ) + server = _config.ConfigItem( + 'http://adswww.harvard.edu', + 'SAO/NASA ADS main server.' + ) + mirrors = _config.ConfigItem( + ['http://cdsads.u-strasbg.fr', + 'http://ukads.nottingham.ac.uk', + 'http://esoads.eso.org', + 'http://ads.ari.uni-heidelberg.de', + 'http://ads.inasan.ru', + 'http://ads.mao.kiev.ua', + 'http://ads.astro.puc.cl', + 'http://ads.nao.ac.jp', + 'http://ads.bao.ac.cn', + 'http://ads.iucaa.ernet.in', + 'http://ads.arsip.lipi.go.id', + 'http://saaoads.chpc.ac.za', + 'http://ads.on.br'], + 'SAO/NASA ADS mirrors around the world' + ) + advanced_path = _config.ConfigItem( + '/cgi-bin/nph-abs_connect', + 'Path for advanced query' + ) + + simple_path = _config.ConfigItem( + '/cgi-bin/nph-basic_connect', + 'Path for simple query' + ) - advanced_url = _config.ConfigItem( - 'abstract_service.html', - 'Path for advanced query' - ) + timeout = _config.ConfigItem( + 60, + 'Time limit for connecting to ADS server' + ) - simple_url = _config.ConfigItem( - 'index.html', - 'Path for advanced query' - ) +conf = Conf() - timeout = _config.ConfigItem( - 60, - 'Time limit for connecting to ADS server' - ) -conf = Conf() +from .core import ADSClass, ADS -from .core import ADS, ADSClass +__all__ = ['ADSClass', 'ADS', + 'Conf', 'conf'] -__all__ = ['ADS', 'ADSClass', - 'Conf', 'conf', - ] +#~ class Conf(_config.ConfigNamespace): + #~ """ + #~ Configuration parameters for `astroquery.nasa_ads`. + #~ """ + #~ servers = _config.ConfigItem( + #~ ['http://adswww.harvard.edu', + #~ 'http://cdsads.u-strasbg.fr', + #~ 'http://ukads.nottingham.ac.uk', + #~ 'http://esoads.eso.org', + #~ 'http://ads.ari.uni-heidelberg.de', + #~ 'http://ads.inasan.ru', + #~ 'http://ads.mao.kiev.ua', + #~ 'http://ads.astro.puc.cl', + #~ 'http://ads.nao.ac.jp', + #~ 'http://ads.bao.ac.cn', + #~ 'http://ads.iucaa.ernet.in', + #~ 'http://ads.arsip.lipi.go.id', + #~ 'http://saaoads.chpc.ac.za', + #~ 'http://ads.on.br'], + #~ 'SAO/NASA ADS mirrors around the world' + #~ ) + #~ + #~ advanced_url = _config.ConfigurationItem( + #~ '/cgi-bin/nph-abs_connect', + #~ 'Path for advanced query' + #~ ) + #~ + #~ simple_url = _config.ConfigurationItem( + #~ '/cgi-bin/nph-basic_connect', + #~ 'Path for simple query' + #~ ) +#~ + #~ timeout = _config.ConfigurationItem( + #~ 60, + #~ 'Time limit for connecting to ADS server' + #~ ) +#~ +#~ conf = Conf() +#~ +#~ from .core import ADS, ADSClass +#~ +#~ __all__ = ['ADS', 'ADSClass', + #~ 'Conf', 'conf'] diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 97f03eb0a5..aa8ea8fc6a 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -6,126 +6,157 @@ """ -import warnings +#~ import warnings # example warning # warnings.warn("Band was specified, so blabla is overridden") -from astropy.io import ascii -from astropy import units as u +#~ from astropy.io import ascii +#~ from astropy import units as u from ..query import BaseQuery -from ..utils import commons, async_to_sync +#~ from ..utils import commons, async_to_sync from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf - - +from ..utils.class_or_instance import class_or_instance +from ..utils import commons, async_to_sync __all__ = ['ADS', 'ADSClass'] - - +#~ +#~ @async_to_sync class ADSClass(BaseQuery): - SERVERS = conf.servers - QUERY_ADVANCED_URL = conf.advanced_url - QUERY_SIMPLE_URL = conf.simple_url + + ####### FROM SPLATALOGUE + SERVER = conf.server + QUERY_ADVANCED_PATH = conf.advanced_path + QUERY_SIMPLE_PATH = conf.simple_path TIMEOUT = conf.timeout - # global constant, not user-configurable - def __init__(self, **kwargs): - """ - Initialize a ADS query class with default arguments set. - Any default keyword arguments (see `query_lines`) can be - overridden here. - """ - self.data = self._default_kwargs() - self.set_default_options(**kwargs) - def set_default_options(self, **kwargs): - """ - Modify the default options. - """ - self.data.update(self._parse_kwargs(**kwargs)) - def _default_kwargs(self): - kwargs = dict(advanced = False,) - return self._parse_kwargs(**kwargs) - - def _parse_kwargs(self, search=""): - """ - The ADS service returns ... - + QUERY_SIMPLE_URL = SERVER + QUERY_SIMPLE_PATH + QUERY_ADVANCED_URL = SERVER + QUERY_ADVANCED_PATH - Parameters - ---------- - - Other Parameters - ---------------- - - - Returns - ------- - Dictionary of the parameters to send to the SPLAT page - payload : dict - A dictionary of keywords - """ - - payload = {'submit': 'Search', - 'frequency_units': 'GHz', - } - - payload['qsearch'] = simple - - - return payload - - def _validate_simple_kwargs(self, min_frequency=None, max_frequency=None, - band='any', **kwargs): - """ - Check that a simple search query is input - """ - if band == 'any': - if min_frequency is None or max_frequency is None: - raise ValueError("Must specify a simple search string.") - @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) - def query_simple_async(self, simple, **kwargs): - """ - Returns - ------- - response : `requests.Response` - The response of the HTTP request. - """ - # have to chomp this kwd here... - get_query_payload = (kwargs.pop('get_query_payload') - if 'get_query_payload' in kwargs - else False) - self._validate_kwargs(simple, **kwargs) - - if hasattr(self, 'data'): - data_payload = self.data.copy() - data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - max_frequency=max_frequency, - **kwargs)) - else: - data_payload = self._default_kwargs() - data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - max_frequency=max_frequency, - **kwargs)) + ######## FROM API DOCS + def __init__(self, *args): + """ set some parameters """ + pass + + @class_or_instance + def query_simple(self, query_string, get_query_payload=False): + + request_payload = self._args_to_payload(query_string) + + response = commons.send_request(self.QUERY_SIMPLE_URL, request_payload, self.TIMEOUT) + # primarily for debug purposes, but also useful if you want to send + # someone a URL linking directly to the data if get_query_payload: - return data_payload + return request_payload - response = commons.send_request( - self.QUERY_URL, - data_payload, - self.TIMEOUT) + return response - self.response = response + def _parse_result(self, result): + # do something, probably with regexp's + return result - return response -ADS = ADSClass() + def _args_to_payload(self, query_string): + # convert arguments to a valid requests payload + # i.e. a dictionary + return {'qsearch' : query_string} +ADS = ADSClass() + +#~ + #~ # global constant, not user-configurable + #~ def __init__(self, **kwargs): + #~ """ + #~ Initialize a ADS query class with default arguments set. + #~ Any default keyword arguments (see `query_lines`) can be + #~ overridden here. + #~ """ + #~ self.data = self._default_kwargs() + #~ self.set_default_options(**kwargs) + #~ + #~ def set_default_options(self, **kwargs): + #~ """ + #~ Modify the default options. + #~ """ + #~ self.data.update(self._parse_kwargs(**kwargs)) + #~ + #~ def _default_kwargs(self): + #~ kwargs = dict() + #~ return self._parse_kwargs(**kwargs) +#~ + #~ def _parse_kwargs(self, search=""): + #~ """ + #~ The ADS service returns. + #~ + #~ Parameters + #~ ---------- + #~ + #~ Other Parameters + #~ ---------------- + #~ + #~ Returns + #~ ------- + #~ Dictionary of the parameters to send to the SPLAT page + #~ payload : dict + #~ A dictionary of keywords + #~ """ +#~ + #~ payload = { 'qsearch': search } +#~ + #~ return payload +#~ + #~ def _validate_simple_kwargs(self, search=None, **kwargs): + #~ """ + #~ Check that a simple search query is input + #~ """ + #~ if search is None: + #~ raise ValueError("Must specify a search string.") + #~ + #~ @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) + #~ def query_simple_async(self, search, **kwargs): + #~ """ + #~ Returns + #~ ------- + #~ response : `requests.Response` + #~ The response of the HTTP request. + #~ """ + #~ # have to chomp this kwd here... + #~ get_query_payload = (kwargs.pop('get_query_payload') + #~ if 'get_query_payload' in kwargs + #~ else False) + #~ self._validate_kwargs(simple, **kwargs) +#~ + #~ if hasattr(self, 'data'): + #~ data_payload = self.data.copy() + #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + #~ max_frequency=max_frequency, + #~ **kwargs)) + #~ else: + #~ data_payload = self._default_kwargs() + #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + #~ max_frequency=max_frequency, + #~ **kwargs)) +#~ + #~ if get_query_payload: + #~ return data_payload +#~ + #~ response = commons.send_request( + #~ self.QUERY_URL, + #~ data_payload, + #~ self.TIMEOUT) +#~ + #~ self.response = response +#~ + #~ return response + + + ######################################################################## From acd282b72bd8b748667119a710783ca3306dec2c Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Mon, 12 Jan 2015 16:17:16 +0100 Subject: [PATCH 11/39] Error in documentation for API. query_region_async function should return response, or parsed version of response --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index 7cc0728496..1630b4aed4 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -158,7 +158,7 @@ Directory Structure:: if get_query_payload: return request_payload - return result + return response @class_or_instance def get_images_async(self, *args): From d525a2de8372ea33ec6b1c39e59dad622e682374 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Mon, 12 Jan 2015 17:16:49 +0100 Subject: [PATCH 12/39] NASA ADS : Started parsing simple query XML results into AstroPy table. --- astroquery/nasa_ads/__init__.py | 10 ++- astroquery/nasa_ads/core.py | 122 +++++++------------------------- 2 files changed, 34 insertions(+), 98 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index c728c312f3..9ccc9ff2e9 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -37,12 +37,16 @@ class Conf(_config.ConfigNamespace): ) advanced_path = _config.ConfigItem( '/cgi-bin/nph-abs_connect', - 'Path for advanced query' + 'Path for advanced query (unconfirmed)' ) + #~ simple_path = _config.ConfigItem( + #~ '/cgi-bin/nph-basic_connect', + #~ 'Path for simple query' + #~ ) simple_path = _config.ConfigItem( - '/cgi-bin/nph-basic_connect', - 'Path for simple query' + '/cgi-bin/basic_connect', + 'Path for simple query (return XML)' ) timeout = _config.ConfigItem( diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index aa8ea8fc6a..afb4409117 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -6,19 +6,20 @@ """ -#~ import warnings +import warnings # example warning # warnings.warn("Band was specified, so blabla is overridden") #~ from astropy.io import ascii #~ from astropy import units as u from ..query import BaseQuery -#~ from ..utils import commons, async_to_sync -from ..utils.docstr_chompers import prepend_docstr_noreturns +from ..utils import commons, async_to_sync +#~ from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync +from BeautifulSoup import BeautifulSoup as bfs __all__ = ['ADS', 'ADSClass'] #~ @@ -46,22 +47,40 @@ def query_simple(self, query_string, get_query_payload=False): request_payload = self._args_to_payload(query_string) response = commons.send_request(self.QUERY_SIMPLE_URL, request_payload, self.TIMEOUT) - + # primarily for debug purposes, but also useful if you want to send # someone a URL linking directly to the data if get_query_payload: return request_payload - return response + return self._parse_response(response) - def _parse_result(self, result): + def _parse_result(self, response): # do something, probably with regexp's - return result + + adssoup_raw = bfs(response.text) + adssoup_cooked = adssoup_raw.findAll('record') + + # number of hits + nhits = len(adssoup_cooked) + if nhits == 0: + warnings.warn("No hits for {0}".format(self.)) + return None + + """ + Developer, how do you get list with all the fields? + Like this: + [tag.name for tag in adssoup_cooked[0].findAll()[0]] + """ + + + #~ return result + return None def _args_to_payload(self, query_string): # convert arguments to a valid requests payload # i.e. a dictionary - return {'qsearch' : query_string} + return {'qsearch' : query_string, 'data_type' : 'XML'} @@ -69,93 +88,6 @@ def _args_to_payload(self, query_string): ADS = ADSClass() -#~ - #~ # global constant, not user-configurable - #~ def __init__(self, **kwargs): - #~ """ - #~ Initialize a ADS query class with default arguments set. - #~ Any default keyword arguments (see `query_lines`) can be - #~ overridden here. - #~ """ - #~ self.data = self._default_kwargs() - #~ self.set_default_options(**kwargs) - #~ - #~ def set_default_options(self, **kwargs): - #~ """ - #~ Modify the default options. - #~ """ - #~ self.data.update(self._parse_kwargs(**kwargs)) - #~ - #~ def _default_kwargs(self): - #~ kwargs = dict() - #~ return self._parse_kwargs(**kwargs) -#~ - #~ def _parse_kwargs(self, search=""): - #~ """ - #~ The ADS service returns. - #~ - #~ Parameters - #~ ---------- - #~ - #~ Other Parameters - #~ ---------------- - #~ - #~ Returns - #~ ------- - #~ Dictionary of the parameters to send to the SPLAT page - #~ payload : dict - #~ A dictionary of keywords - #~ """ -#~ - #~ payload = { 'qsearch': search } -#~ - #~ return payload -#~ - #~ def _validate_simple_kwargs(self, search=None, **kwargs): - #~ """ - #~ Check that a simple search query is input - #~ """ - #~ if search is None: - #~ raise ValueError("Must specify a search string.") - #~ - #~ @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) - #~ def query_simple_async(self, search, **kwargs): - #~ """ - #~ Returns - #~ ------- - #~ response : `requests.Response` - #~ The response of the HTTP request. - #~ """ - #~ # have to chomp this kwd here... - #~ get_query_payload = (kwargs.pop('get_query_payload') - #~ if 'get_query_payload' in kwargs - #~ else False) - #~ self._validate_kwargs(simple, **kwargs) -#~ - #~ if hasattr(self, 'data'): - #~ data_payload = self.data.copy() - #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - #~ max_frequency=max_frequency, - #~ **kwargs)) - #~ else: - #~ data_payload = self._default_kwargs() - #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - #~ max_frequency=max_frequency, - #~ **kwargs)) -#~ - #~ if get_query_payload: - #~ return data_payload -#~ - #~ response = commons.send_request( - #~ self.QUERY_URL, - #~ data_payload, - #~ self.TIMEOUT) -#~ - #~ self.response = response -#~ - #~ return response - - ######################################################################## From b3652fe7c1435b2762db7ec600bd04813ee748b6 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Tue, 13 Jan 2015 13:41:32 +0100 Subject: [PATCH 13/39] NASA ADS : Initial parsing supported. --- astroquery/nasa_ads/__init__.py | 12 +- astroquery/nasa_ads/core.py | 281 +------------------------------- 2 files changed, 18 insertions(+), 275 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index 9ccc9ff2e9..5bf30f69e0 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -10,11 +10,14 @@ #~ from astropy.config import ConfigurationItem from astropy import config as _config +from .utils import * + class Conf(_config.ConfigNamespace): """ Configuration parameters for `astroquery.nasa_ads`. """ + server = _config.ConfigItem( 'http://adswww.harvard.edu', 'SAO/NASA ADS main server.' @@ -57,6 +60,11 @@ class Conf(_config.ConfigNamespace): conf = Conf() +conf.adsfields = ['bibcode', 'title', 'author', 'affiliation', + 'journal', 'volume', 'pubdate', 'page', 'lastpage', 'keywords', 'keyword', + 'origin', 'copyright', 'link', 'name', 'url', 'count', 'score', 'citations', + 'abstract', 'doi', 'eprindit'] + from .core import ADSClass, ADS __all__ = ['ADSClass', 'ADS', @@ -78,9 +86,9 @@ class Conf(_config.ConfigNamespace): #~ 'http://ads.nao.ac.jp', #~ 'http://ads.bao.ac.cn', #~ 'http://ads.iucaa.ernet.in', - #~ 'http://ads.arsip.lipi.go.id', + #~ 'http://ads.arsip.lipi.go.id',lastpage #~ 'http://saaoads.chpc.ac.za', - #~ 'http://ads.on.br'], + #~ 'http://ads.on.br'],lastpage #~ 'SAO/NASA ADS mirrors around the world' #~ ) #~ diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index afb4409117..357979ded8 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -15,6 +15,7 @@ from ..utils import commons, async_to_sync #~ from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf +#~ from .utils import * from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync @@ -22,8 +23,7 @@ from BeautifulSoup import BeautifulSoup as bfs __all__ = ['ADS', 'ADSClass'] -#~ -#~ + @async_to_sync class ADSClass(BaseQuery): @@ -43,7 +43,7 @@ def __init__(self, *args): @class_or_instance def query_simple(self, query_string, get_query_payload=False): - + self.query_string = query_string request_payload = self._args_to_payload(query_string) response = commons.send_request(self.QUERY_SIMPLE_URL, request_payload, self.TIMEOUT) @@ -55,27 +55,25 @@ def query_simple(self, query_string, get_query_payload=False): return self._parse_response(response) - def _parse_result(self, response): + def _parse_response(self, response): # do something, probably with regexp's adssoup_raw = bfs(response.text) adssoup_cooked = adssoup_raw.findAll('record') + result = adssoup_cooked # number of hits nhits = len(adssoup_cooked) if nhits == 0: - warnings.warn("No hits for {0}".format(self.)) + warnings.warn("No hits for the search \'{0}\'".format(self.query_string)) return None """ Developer, how do you get list with all the fields? Like this: - [tag.name for tag in adssoup_cooked[0].findAll()[0]] + [tag.name for tag in adssoup_cooked[0].findAll()] """ - - - #~ return result - return None + return result def _args_to_payload(self, query_string): # convert arguments to a valid requests payload @@ -84,267 +82,4 @@ def _args_to_payload(self, query_string): - - ADS = ADSClass() - - - -######################################################################## - - - - - - -def search(query, **kwargs): - """ - query : Normal string to ADS - or dictionary for advanced search - - """ - - - ### test code to get it up and running - - # main access - # TODO : either access via Z39.50 or via URLlib/mecahnise etc - - # wishlist - # TODO : simple search - # TODO : advanced search - # TODO : browse - - - import locale - # this reads the environment and inits the right locale - locale.setlocale(locale.LC_ALL, "") - - - try: - # the mechanize module exports urllib2 as well... - import mechanize - import urllib - except (ImportError): - print 'You need the \"mechanize\" and urllib module' - ' for this script to work.' - - try: - from BeautifulSoup import BeautifulSoup as bfs - except (ImportError): - print 'You need the BeautifulSoup module...' - - - import scipy - import sys - - #from string import lower, upper - # search URL - # http://adsabs.harvard.edu/cgi-bin/nph-basic_connect?qsearch=The+Search+String - - # to parse the search string from "The Search String" to "The+Search+String" - # urllib.quote(url, safe=":/") - - ############################################ - ######## GET THE FORM - - #~ Ping to know which server to use. - working_mirror = 0 - - got_reply = 0 - while not got_reply: - try: - # try to get the form - response = mechanize.urlopen(mirrors[working_mirror] + types_q[search_type]) - except mechanize.URLError: - # if we can't get it, try another mirror - if not i < len(mirrors): - break - else: - working_mirror += 1 - pass - else: - got_reply = True - - if not got_reply and working_mirror >= len(mirrors): - # TODO log output - sys.stderr.write('ERROR : You have to be connected to the internet to access the NASA ADS database and it has to be online (on all mirrors).') - else: - # TODO log output - print ('got reply from : {0}'.format(mirrors[working_mirror])) - - - - - #~ Then check if going for the advanced interface. - #~ advanced = int((type(query) == type({})) - if 'advanced' in kwargs: - # ADVANCED QUERY - # - # Should I use http://adsabs.harvard.edu/abstract_service.html - # or the full ADS Labs? - response = mechanize.urlopen(mirrors[working_mirror] + advanced_q) - forms = mechanize.ParseResponse(response, backwards_compat=False) - response.close() - form = forms[0] - #~ if arg.has_key('dbg_snd_form'): # for test purposes - #~ return form - #~ form['qsearch'] = '^Persson 2012' - - ######## SUBMIT FORM - #~ clicked_form = form.click() - - #~ result = mechanize.urlopen(clicked_form) - - pass - - elif not 'advanced' in kwargs: - # SIMPLE QUERY - baseurl = (mirrors[working_mirror] + - 'cgi-bin/nph-basic_connect?qsearch=') - - result = mechanize.urlopen( urllib.quote(baseurl + query, safe = ":/=?^") ) - # test below - data = urllib.urlencode({'qsearch' : '^Persson'}) - baseurl = (mirrors[working_mirror] + - 'cgi-bin/nph-basic_connect?') - f = urllib.urlopen(baseurl, data) - ############################################ - ######## PARSE RESULTS - - page = result.readlines() - result.close() - - # start parsing the results - t = bfs(' '.join(page)) - tables = t.findAll('table') - - r = tables[1].findAll('td')[0] - y = r.findAll('strong')[0].contents[0] - nres = int(y) - if nres<1: - return 0 - - # get table with results - resulttable = tables[2] - # get the rows of the table - rows = resulttable.findAll('tr') - # get each result entry per list item - entries = [rows[i:i+3][1:] for i in scipy.arange(2,57,3)][:-1] - - ############################################ - ######## GET RESULTLIST - - ###### the problem with this is that web is in UNICODE, - # ie. special chars are represented by funny numbers and '\' - - #resultlist = [_Result(i) for i in entries] - return _Resultlist(entries) - - -############################################ -######## DEFINE RESULT(S) OBJECT - - -class _Resultlist: - """ - Internal object to represent the result list - """ - def __init__(self, entries): - self.resultlist = [_Result(i) for i in entries] - def sort(self,sortkey = 'author', reverse_bool = False): - from operator import itemgetter, attrgetter - #~ sorted(resultlist, key=attrgetter('author'), reverse=True) - return sorted(self.resultlist, key=attrgetter(sortkey), reverse = reverse_bool) - def __str__(self): - printlist = [] - for i in self.resultlist[:-1]: - printlist.append('Author : {0.author}\n' - 'Title : {0.title}\n' - 'Score : {0.ads_score}\n'.format(i)) - return '\n'.join(printlist) - -class _Result: - """ - Internal object to represent each result - """ - def __init__(self, entry): - #~ def __init__(self, author, - #~ authors, - #~ title, - #~ score, - #~ bibcode, - #~ pubdate, - #~ links): - #~ self.author = author - #~ self.authorlist = authors - #~ self.title = title - #~ self.score = score - #~ self.bibcode = bibcode - #~ self.pubdate = pubdate # parse? - #~ self.links = links # dictionary of all the links - # - td_tags0 = entry[0].findAll('td') - self.bibcode = td_tags0[1].findAll('input')[0]['value'].encode('UTF-8') - self.url_abstract_page = td_tags0[1].findAll('a')[0]['href'].encode('UTF-8') - self.ads_score = float(td_tags0[3].contents[0].encode('UTF-8')) - self.rank = 100 - self.ads_score - self.pubdate = td_tags0[4].contents[0].string.encode('UTF-8') - self.pubday = self.pubdate[:2] - self.pubyear = self.pubdate[3:] - # - self.links = dict() - for link in td_tags0[5].findAll('a'): - self.links[link.string.encode()] = link['href'].encode('UTF-8') - # - td_tags1 = entry[1].findAll('td') - - # second part of the result entry - self.title = td_tags1[3].contents[0].string.encode('UTF-8') - # still in unicode - # TODO need to convert to normal UTF, not unicode - authors = td_tags1[1].contents[0].encode('UTF-8').split(';') - if authors[-1] == ' ': - # so, if the last entry in the authorlist is empty, means - # it split a ';', which in turn means there are more - # authors, need to add that part... - authors[-1] = td_tags1[1].contents[1].contents[0].encode('UTF-8') + ', COAuth' - # - self.authors = [i.split(',') for i in authors] - self.author = ', '.join(self.authors[0]) - # - #~ self. - def __repr__(self): - return repr([self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate]) - def _returnlist_(self): - return [self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate] - -#~ # second part of the result entry -#~ title = td_tags1[3].contents[0].string.replace(u'\xa0', u' ').encode() -#~ # still in unicode -#~ # TODO need to convert to normal UTF, not unicode -#~ authors = td_tags1[1].string.replace(u'\xa0', u' ').encode().split(';') -#~ authors = [i.split(',') for i in authors] -#~ author = authors[0] - - - - -############################################ -######## RETURN SORTABLE OBJECT LIST - -############################################ -######## HOW TO SORT RESULTS -# needs Python 2.6 at least -#~ from operator import itemgetter, attrgetter -#~ -#~ # now to sort it, just use one of the keys -#~ # score, high to low -#~ sorted(resultlist, key=attrgetter('author'), reverse=True) -#~ -#~ # cmp=locale.strcoll new and untested addition -#~ -#~ # authors alphabetical order first and then by score -#~ # i.e. sort by score if same first author -#~ sorted(resultlist, key=attrgetter('ads_score','authors'), reverse=True) -######################################################################## From bcdb4d0350e58af8fc8bc9ab9a0535a63d4969d0 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Thu, 15 Jan 2015 12:12:19 +0100 Subject: [PATCH 14/39] moved function to extract field from BFS structure to separate file utils.py --- astroquery/nasa_ads/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 astroquery/nasa_ads/utils.py diff --git a/astroquery/nasa_ads/utils.py b/astroquery/nasa_ads/utils.py new file mode 100644 index 0000000000..f9ac553400 --- /dev/null +++ b/astroquery/nasa_ads/utils.py @@ -0,0 +1,10 @@ + + + + +def get_field(record, field): + value = record.findAll(field) + if len(value) == 0: + return "" + else: + return value[0].text.encode("utf-8") From 8c5bed62f08303a8f25d4b202e44f7f7d635d2bf Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Sun, 15 Feb 2015 12:53:09 +0100 Subject: [PATCH 15/39] moved over to standard Python xml library, started parsing results into a AstroPy Table instance. --- astroquery/nasa_ads/__init__.py | 4 ++-- astroquery/nasa_ads/core.py | 24 +++++++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index 5bf30f69e0..cd9ff1bc0d 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -53,7 +53,7 @@ class Conf(_config.ConfigNamespace): ) timeout = _config.ConfigItem( - 60, + 120, 'Time limit for connecting to ADS server' ) @@ -63,7 +63,7 @@ class Conf(_config.ConfigNamespace): conf.adsfields = ['bibcode', 'title', 'author', 'affiliation', 'journal', 'volume', 'pubdate', 'page', 'lastpage', 'keywords', 'keyword', 'origin', 'copyright', 'link', 'name', 'url', 'count', 'score', 'citations', - 'abstract', 'doi', 'eprindit'] + 'abstract', 'doi', 'eprintid'] from .core import ADSClass, ADS diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 357979ded8..e86bb8017d 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -16,11 +16,14 @@ #~ from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf #~ from .utils import * +from astropy.table import Table, Column from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync -from BeautifulSoup import BeautifulSoup as bfs +#~ from BeautifulSoup import BeautifulSoup as bfs + +from xml.dom import minidom __all__ = ['ADS', 'ADSClass'] @@ -42,7 +45,7 @@ def __init__(self, *args): pass @class_or_instance - def query_simple(self, query_string, get_query_payload=False): + def query_simple(self, query_string, get_query_payload=False, get_raw_response=False): self.query_string = query_string request_payload = self._args_to_payload(query_string) @@ -52,10 +55,21 @@ def query_simple(self, query_string, get_query_payload=False): # someone a URL linking directly to the data if get_query_payload: return request_payload - - return self._parse_response(response) - + if get_raw_response: + return response + # parse the XML response into Beautiful Soup + #~ response_bfs = self._parse_response_to_bfs(response) + # + #self._parse_bfs_to_table(response_bfs) + self._parse_response(response) + + return response + def _parse_response(self, response): + xmlrepr = minidom.parseString(response.text.encode('utf-8')) + + + def _parse_response_to_bfs(self, response): # do something, probably with regexp's adssoup_raw = bfs(response.text) From a5ca7c7a031a5b08a9fd470882bf1d719eb95e08 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Sun, 15 Mar 2015 15:25:22 +0100 Subject: [PATCH 16/39] now results are parsed into a Astropy Table table --- astroquery/nasa_ads/__init__.py | 1 - astroquery/nasa_ads/core.py | 125 ++++++++++++++++++++++++++------ astroquery/nasa_ads/utils.py | 21 ++++-- 3 files changed, 116 insertions(+), 31 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index cd9ff1bc0d..7bbe4d5ebd 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -10,7 +10,6 @@ #~ from astropy.config import ConfigurationItem from astropy import config as _config -from .utils import * class Conf(_config.ConfigNamespace): diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index e86bb8017d..f4f0b9ade7 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -20,7 +20,7 @@ from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync - +from .utils import * #~ from BeautifulSoup import BeautifulSoup as bfs from xml.dom import minidom @@ -61,34 +61,50 @@ def query_simple(self, query_string, get_query_payload=False, get_raw_response=F #~ response_bfs = self._parse_response_to_bfs(response) # #self._parse_bfs_to_table(response_bfs) - self._parse_response(response) + resulttable = self._parse_response(response) - return response + return resulttable def _parse_response(self, response): xmlrepr = minidom.parseString(response.text.encode('utf-8')) + # Check if there are any results! + # get the list of hits + hitlist = xmlrepr.childNodes[0].childNodes + hitlist = hitlist[1::2] # every second hit is a "line break" - def _parse_response_to_bfs(self, response): - # do something, probably with regexp's - - adssoup_raw = bfs(response.text) - adssoup_cooked = adssoup_raw.findAll('record') - result = adssoup_cooked - - # number of hits - nhits = len(adssoup_cooked) - if nhits == 0: - warnings.warn("No hits for the search \'{0}\'".format(self.query_string)) - return None + # Parse the results + # first single items + titles = get_data_from_xml(hitlist, 'title') + bibcode = get_data_from_xml(hitlist, 'bibcode') + journal = get_data_from_xml(hitlist, 'journal') + volume = get_data_from_xml(hitlist, 'volume') + pubdate = get_data_from_xml(hitlist, 'pubdate') + page = get_data_from_xml(hitlist, 'page') + score = get_data_from_xml(hitlist, 'score') + citations = get_data_from_xml(hitlist, 'citations') + abstract = get_data_from_xml(hitlist, 'abstract') + doi = get_data_from_xml(hitlist, 'DOI') + eprintid = get_data_from_xml(hitlist, 'eprintid') + #~ = get_data_from_xml(hitlist, '') + authors = get_data_from_xml(hitlist, 'author') + + t = Table() + t['title'] = titles + t['bibcode'] = bibcode + t['journal'] = journal + t['volume'] = volume + t['pubdate'] = pubdate + t['page'] = page + t['score'] = score + t['citations'] = citations + t['abstract'] = abstract + t['doi'] = doi + t['eprintid'] = eprintid + t['authors'] = authors - """ - Developer, how do you get list with all the fields? - Like this: - [tag.name for tag in adssoup_cooked[0].findAll()] - """ - return result - + return t + def _args_to_payload(self, query_string): # convert arguments to a valid requests payload # i.e. a dictionary @@ -97,3 +113,68 @@ def _args_to_payload(self, query_string): ADS = ADSClass() + + +""" +typical fields available: + +[u'bibcode', + u'title', + u'author', + u'author', + u'author', + u'affiliation', + u'journal', + u'volume', + u'pubdate', + u'page', + u'keywords', + u'keyword', + u'keyword', + u'keyword', + u'keyword', + u'keyword', + u'origin', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'url', + u'score', + u'citations', + u'abstract', + u'doi', + u'eprintid'] + + +""" diff --git a/astroquery/nasa_ads/utils.py b/astroquery/nasa_ads/utils.py index f9ac553400..c3c98eab38 100644 --- a/astroquery/nasa_ads/utils.py +++ b/astroquery/nasa_ads/utils.py @@ -1,10 +1,15 @@ - - -def get_field(record, field): - value = record.findAll(field) - if len(value) == 0: - return "" - else: - return value[0].text.encode("utf-8") +def get_data_from_xml(doclist, fieldname, nohitreturn=None): + result = [] + for element in doclist: + fieldlist = element.getElementsByTagName(fieldname) + try: + tmp = fieldlist[0] + except IndexError: + fields = [nohitreturn] + fields = [] + for field in fieldlist: # this is useful for e.g. author field + fields.append(field.childNodes[0].data.encode("utf-8")) + result.append(fields) + return result From d74d87380cf786bff03ae30388c8860915247915 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:00:43 +0100 Subject: [PATCH 17/39] NASA ADS query : initial commit --- astroquery/nasa_ads/__init__.py | 69 +++ astroquery/nasa_ads/core.py | 645 ++++++++++++++++++++++ astroquery/nasa_ads/tests/__init__.py | 0 astroquery/nasa_ads/tests/test_nasaads.py | 0 docs/nasa_ads/nasa_ads.rst | 47 ++ 5 files changed, 761 insertions(+) create mode 100644 astroquery/nasa_ads/__init__.py create mode 100644 astroquery/nasa_ads/core.py create mode 100644 astroquery/nasa_ads/tests/__init__.py create mode 100644 astroquery/nasa_ads/tests/test_nasaads.py create mode 100644 docs/nasa_ads/nasa_ads.rst diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py new file mode 100644 index 0000000000..cb5cd010c1 --- /dev/null +++ b/astroquery/nasa_ads/__init__.py @@ -0,0 +1,69 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +SAO/NASA ADS Query Tool +----------------------------------- + +:Author: Magnus Vilhelm Persson (magnusp@vilhelm.nu) + +""" +from astropy import config as _config + + +class Conf(_config.ConfigNamespace): + """ + Configuration parameters for `astroquery.nasa_ads`. + """ + mirror_urls = _config.ConfigItem( + ['http://adswww.harvard.edu/', + 'http://cdsads.u-strasbg.fr/', + 'http://ukads.nottingham.ac.uk/', + 'http://esoads.eso.org/', + 'http://ads.ari.uni-heidelberg.de/' + 'http://ads.inasan.ru/', + 'http://ads.mao.kiev.ua/', + 'http://ads.astro.puc.cl/', + 'http://ads.nao.ac.jp/', + 'http://ads.bao.ac.cn/', + 'http://ads.iucaa.ernet.in/', + 'http://ads.arsip.lipi.go.id/', + 'http://saaoads.chpc.ac.za/', + 'http://ads.on.br/'], + 'SAO/NASA ADS mirrors around the world' + ) + + advanced_url = _config.ConfigItem( + 'abstract_service.html', + 'Path for advanced query' + ) + + simple_url = _config.ConfigItem( + 'abstract_service.html', + 'Path for advanced query' + ) + + timeout = _config.ConfigItem( + 60, + 'Time limit for connecting to ADS server.' + ) + lines_limit = _config.ConfigItem( + 1000, + 'Limit to number of hits exported.' + ) + +conf = Conf() + +from .core import ADS, ADSClass + +__all__ = ['ADS', 'ADSClass', + 'Conf', 'conf', + ] + + + + + + + +""" +advanced_q = 'abstract_service.html' +""" diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py new file mode 100644 index 0000000000..e68d7001d0 --- /dev/null +++ b/astroquery/nasa_ads/core.py @@ -0,0 +1,645 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- +# +# adslib.py +# +# Module to search the ads +# +# Copyright 2012 Magnus Persson +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +# MA 02110-1301, USA. +# +# version 0.0.1a + +""" +Script to search the NASA ADS directory + +Need : o scipy + o mechanize module (standard in Python >2.6/2.7?) + o urllib2 module (standard Python module, required(?) by mechanize) + o beautiful soup/xml (xml standard in Python >2.6/2.7?) + +""" + +""" +ADSlib - Python Module to interact with NASA ADS +at + +http://adswww.harvard.edu/ + +OR one of the mirrors + +http://cdsads.u-strasbg.fr/ +http://ukads.nottingham.ac.uk/ +http://esoads.eso.org/ +http://ads.ari.uni-heidelberg.de/ +http://ads.inasan.ru/ +http://ads.mao.kiev.ua/ +http://ads.astro.puc.cl/ +http://ads.nao.ac.jp/ +http://ads.bao.ac.cn/ +http://ads.iucaa.ernet.in/ +http://ads.arsip.lipi.go.id/ +http://saaoads.chpc.ac.za/ +http://ads.on.br/ + + + + +""" + +""" +----[ Change log ]---- + +* 2012 Dec 15 + Code cleanup. + +* 2012 Oct 29 + Now only uses mechanize module(!) Yay. + +* 2012 Oct 02 + File created. + + + +""" + +""" +NOTES: +# advanced search query +abstract_service.html + +# quick search +index.html + +""" + + +mirrors = [ + 'http://adswww.harvard.edu/', + 'http://cdsads.u-strasbg.fr/', + 'http://ukads.nottingham.ac.uk/', + 'http://esoads.eso.org/', + 'http://ads.ari.uni-heidelberg.de/' + 'http://ads.inasan.ru/', + 'http://ads.nao.ac.jp/', + 'http://ads.iucaa.ernet.in/', + 'http://ads.arsip.lipi.go.id/', + 'http://saaoads.chpc.ac.za/', + 'http://ads.on.br/' + ] + +advanced_q = 'abstract_service.html' + +def search(query, **kwargs): + """ + query : Normal string to ADS + or dictionary for advanced search + + """ + + + ### test code to get it up and running + + # main access + # TODO : either access via Z39.50 or via URLlib/mecahnise etc + + # wishlist + # TODO : simple search + # TODO : advanced search + # TODO : browse + + + import locale + # this reads the environment and inits the right locale + locale.setlocale(locale.LC_ALL, "") + + + try: + # the mechanize module exports urllib2 as well... + import mechanize + import urllib + except (ImportError): + print 'You need the \"mechanize\" and urllib module' + ' for this script to work.' + + try: + from BeautifulSoup import BeautifulSoup as bfs + except (ImportError): + print 'You need the BeautifulSoup module...' + + + import scipy + import sys + + #from string import lower, upper + # search URL + # http://adsabs.harvard.edu/cgi-bin/nph-basic_connect?qsearch=The+Search+String + + # to parse the search string from "The Search String" to "The+Search+String" + # urllib.quote(url, safe=":/") + + ############################################ + ######## GET THE FORM + + #~ Ping to know which server to use. + working_mirror = 0 + + got_reply = 0 + while not got_reply: + try: + # try to get the form + response = mechanize.urlopen(mirrors[working_mirror] + types_q[search_type]) + except mechanize.URLError: + # if we can't get it, try another mirror + if not i < len(mirrors): + break + else: + working_mirror += 1 + pass + else: + got_reply = True + + if not got_reply and working_mirror >= len(mirrors): + # TODO log output + sys.stderr.write('ERROR : You have to be connected to the internet to access the NASA ADS database and it has to be online (on all mirrors).') + else: + # TODO log output + print ('got reply from : {0}'.format(mirrors[working_mirror])) + + + + + #~ Then check if going for the advanced interface. + #~ advanced = int((type(query) == type({})) + if 'advanced' in kwargs: + # ADVANCED QUERY + # + # Should I use http://adsabs.harvard.edu/abstract_service.html + # or the full ADS Labs? + response = mechanize.urlopen(mirrors[working_mirror] + advanced_q) + forms = mechanize.ParseResponse(response, backwards_compat=False) + response.close() + form = forms[0] + #~ if arg.has_key('dbg_snd_form'): # for test purposes + #~ return form + #~ form['qsearch'] = '^Persson 2012' + + ######## SUBMIT FORM + #~ clicked_form = form.click() + + #~ result = mechanize.urlopen(clicked_form) + + pass + + elif not 'advanced' in kwargs: + # SIMPLE QUERY + baseurl = (mirrors[working_mirror] + + 'cgi-bin/nph-basic_connect?qsearch=') + + result = mechanize.urlopen( urllib.quote(baseurl + query, safe = ":/=?^") ) + # test below + data = urllib.urlencode({'qsearch' : '^Persson'}) + baseurl = (mirrors[working_mirror] + + 'cgi-bin/nph-basic_connect?') + f = urllib.urlopen(baseurl, data) + ############################################ + ######## PARSE RESULTS + + page = result.readlines() + result.close() + + # start parsing the results + t = bfs(' '.join(page)) + tables = t.findAll('table') + + r = tables[1].findAll('td')[0] + y = r.findAll('strong')[0].contents[0] + nres = int(y) + if nres<1: + return 0 + + # get table with results + resulttable = tables[2] + # get the rows of the table + rows = resulttable.findAll('tr') + # get each result entry per list item + entries = [rows[i:i+3][1:] for i in scipy.arange(2,57,3)][:-1] + + ############################################ + ######## GET RESULTLIST + + ###### the problem with this is that web is in UNICODE, + # ie. Jørgensen, æ and åäö and ßü etc are represented by funny numbers and '\' + + #resultlist = [_Result(i) for i in entries] + return _Resultlist(entries) + + +############################################ +######## DEFINE RESULT(S) OBJECT + + +class _Resultlist: + """ + Internal object to represent the result list + """ + def __init__(self, entries): + self.resultlist = [_Result(i) for i in entries] + def sort(self,sortkey = 'author', reverse_bool = False): + from operator import itemgetter, attrgetter + #~ sorted(resultlist, key=attrgetter('author'), reverse=True) + return sorted(self.resultlist, key=attrgetter(sortkey), reverse = reverse_bool) + def __str__(self): + printlist = [] + for i in self.resultlist[:-1]: + printlist.append('Author : {0.author}\n' + 'Title : {0.title}\n' + 'Score : {0.ads_score}\n'.format(i)) + return '\n'.join(printlist) + +class _Result: + """ + Internal object to represent each result + """ + def __init__(self, entry): + #~ def __init__(self, author, + #~ authors, + #~ title, + #~ score, + #~ bibcode, + #~ pubdate, + #~ links): + #~ self.author = author + #~ self.authorlist = authors + #~ self.title = title + #~ self.score = score + #~ self.bibcode = bibcode + #~ self.pubdate = pubdate # parse? + #~ self.links = links # dictionary of all the links + # + td_tags0 = entry[0].findAll('td') + self.bibcode = td_tags0[1].findAll('input')[0]['value'].encode('UTF-8') + self.url_abstract_page = td_tags0[1].findAll('a')[0]['href'].encode('UTF-8') + self.ads_score = float(td_tags0[3].contents[0].encode('UTF-8')) + self.rank = 100 - self.ads_score + self.pubdate = td_tags0[4].contents[0].string.encode('UTF-8') + self.pubday = self.pubdate[:2] + self.pubyear = self.pubdate[3:] + # + self.links = dict() + for link in td_tags0[5].findAll('a'): + self.links[link.string.encode()] = link['href'].encode('UTF-8') + # + td_tags1 = entry[1].findAll('td') + + # second part of the result entry + self.title = td_tags1[3].contents[0].string.encode('UTF-8') + # still in unicode + # TODO need to convert to normal UTF, not unicode + authors = td_tags1[1].contents[0].encode('UTF-8').split(';') + if authors[-1] == ' ': + # so, if the last entry in the authorlist is empty, means + # it split a ';', which in turn means there are more + # authors, need to add that part... + authors[-1] = td_tags1[1].contents[1].contents[0].encode('UTF-8') + ', COAuth' + # + self.authors = [i.split(',') for i in authors] + self.author = ', '.join(self.authors[0]) + # + #~ self. + def __repr__(self): + return repr([self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate]) + def _returnlist_(self): + return [self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate] + +#~ # second part of the result entry +#~ title = td_tags1[3].contents[0].string.replace(u'\xa0', u' ').encode() +#~ # still in unicode +#~ # TODO need to convert to normal UTF, not unicode +#~ authors = td_tags1[1].string.replace(u'\xa0', u' ').encode().split(';') +#~ authors = [i.split(',') for i in authors] +#~ author = authors[0] + + + + +############################################ +######## RETURN SORTABLE OBJECT LIST + +############################################ +######## HOW TO SORT RESULTS +# needs Python 2.6 at least +#~ from operator import itemgetter, attrgetter +#~ +#~ # now to sort it, just use one of the keys +#~ # score, high to low +#~ sorted(resultlist, key=attrgetter('author'), reverse=True) +#~ +#~ # cmp=locale.strcoll new and untested addition +#~ +#~ # authors alphabetical order first and then by score +#~ # i.e. sort by score if same first author +#~ sorted(resultlist, key=attrgetter('ads_score','authors'), reverse=True) + + +######################################################################## +######## NOTES + +### FIELDS +# bibcode +# title +# authors +# score +# pubdate +# possilbe (quick)links : +# A Abstract +# C CITATIONS +# D On-line Data +# E EJOURNAL +# F Printable Article +# G Gif Images +# H HEP/Spires Information +# I Author Comments +# L Library Entries +# M Multimedia +# N NED Objects +# O Associated Articles +# P PDS datasets +# R REFERENCES +# S SIMBAD Objects +# T TOC +# U Also read +# X arXiv e-print +# Z Abstract Custom + + +""" + + +6.3.4 - Embedded Queries + +This section describes how the abstract service can be accessed from embedded forms. The URL for submitting embedded forms is: + +http://adsabs.harvard.edu/cgi-bin/abs_connect + +The syntax is: + +... + +where parami are the names of the parameters and vali are their values. There are no spaces allowed in a URL. Any blanks need to be encoded as a '+' (e.g. between author last and first names). The list of the possible parameters and their possible values is available to build queries. It is advisable to use only the more basic parameters for such queries since the more complicated parameters are more likely to change with future versions of the search system. + +One use of this is for including a link to the bibliography for a particular author in a document. + +To do so, use the following syntax: + +http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.&return_req=no_params + +This sets the author=last, f, and prevents the listing of parameters at the bottom of the page (return_req=no_params). + +If you want to specify the author middle initial in addition to the first initial, use exact author matching (&aut_xct=YES). + +To build a search for two different formats of author names, enter the two author arguments separated with a semicolon. + +http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.m.;last,+first+m.&aut_xct=YES&return_req=no_params + +Such a link will always provide access to the latest bibliography of an author without the need to update anything. + +Sometimes such a list includes articles by somebody else with the same name. You can exclude specific articles from the results list with the command + +exclude=bibcode1,bibcode2,... + +You can also include specific articles with the command + +include=bibcode1,bibcode2,... + +This allows for finely customized bibliographies. + + + +List of ADS query parameter keywords + +author list of semicolon separated authornames as last, f +object list of semicolon separated object names +keyword list of semicolon separated keywords +start_mon starting month as integer (Jan == 1, Dec == 12) +start_year starting year as integer (4 digits) +end_mon ending month as integer (Jan == 1, Dec == 12) +end_year ending year as integer (4 digits) +start_entry_day start entry day of month as integer +start_entry_mon start entry month as integer +start_entry_year start entry year as integer +end_entry_day start entry day of month as integer +end_entry_mon start entry month as integer +end_entry_year start entry year as integer +title title words, any non-alpha-numeric character separates +text abstract words, any non-alpha-numeric character separates +fulltext OCRd fulltext, any non-alpha-numeric character separates +affiliation affiliation words, any non-alpha-numeric character separates +bibcode bibcode for partial bibcode search. If a bibcode is +specified, no other search will be done +nr_to_return how many abstracts to return (default is 50, max 500) +start_nr where to start returning in list of retrieved abstracts +default is 1 +aut_wt floating point weight for author search, default: 1.0 +obj_wt floating point weight for object search, default: 1.0 +kwd_wt floating point weight for keyword search, default: 1.0 +ttl_wt floating point weight for title search, default: 0.3 +txt_wt floating point weight for text search, default: 3.0 +full_wt floating point weight for full search, default: 3.0 +aff_wt floating point weight for affiliation search, default: 1.0 +aut_syn author synonym replacement. aut_syn="YES" turns it on (default is on) +ttl_syn title synonym replacement. ttl_syn="YES" turns it on (default is on) +txt_syn text synonym replacement. txt_syn="YES" turns it on (default is on) +full_syn full text synonym replacement. full_syn="YES" turns it on (default is on) +aff_syn affiliation synonym replacement. aff_syn="YES" turns it on (default is on) +aut_wgt authors used for weighting. aut_wgt="YES" turns it on (default is on) +obj_wgt objects used for weighting. obj_wgt="YES" turns it on (default is on) +kwd_wgt keywords used for weighting. kwd_wgt="YES" turns it on (default is on) +ttl_wgt title used for weighting. ttl_wgt="YES" turns it on (default is on) +txt_wgt text used for weighting. txt_wgt="YES" turns it on (default is on) +full_wgt full text used for weighting. full_wgt="YES" turns it on (default is on) +aff_wgt affiliation used for weighting. aff_wgt="YES" turns it on (default is on) +aut_sco authors weighted scoring. aut_sco="YES" turns it on (default is off) +kwd_sco keywords weighted scoring. kwd_sco="YES" turns it on (default is off) +ttl_sco title weighted scoring. ttl_sco="YES" turns it on (default is on) +txt_sco text weighted scoring. txt_sco="YES" turns it on (default is on) +full_sco text weighted scoring. full_sco="YES" turns it on (default is on) +aff_sco affiliation weighted scoring. aff_sco="YES" turns it on (default is off) +aut_req authors required for results. aut_req="YES" turns it on (default is off) +obj_req objects required for results. obj_req="YES" turns it on (default is off) +kwd_req keywords required for results. kwd_req="YES" turns it on (default is off) +ttl_req title required for results. ttl_req="YES" turns it on (default is off) +txt_req text required for results. txt_req="YES" turns it on (default is off) +full_req text required for results. full_req="YES" turns it on (default is off) +aff_req affiliation required for results. aff_req="YES" turns it on (default is off) +aut_logic +obj_logic +kwd_logic +ttl_logic +txt_logic +full_logic +aff_logic Combination logic: +xxx_logic="AND": combine with AND +xxx_logic="OR": combine with OR (default) +xxx_logic="SIMPLE": simple logic (use +, -) +xxx_logic="BOOL": full boolean logic +xxx_logic="FULLMATCH": do AND query in the selected field +and calculate the score according to how many words in +the field of the selected reference were matched by +the query +return_req requested return: +return_req="result" : return results (default) +return_req="form" : return new query form +return_req="no_params": return results +set default parameters, don't display params +db_key which database to query: db_key="AST" : Astronomy(default) +"PRE": arXiv e-prints +"PHY": Physics, "GEN": General, CFA: CfA Preprints +atcl_only select only OCR pages from articles +jou_pick specify which journals to select: +jou_pick="ALL" : return all journals (default) +jou_pick="NO" : return only refereed journals +jou_pick="EXCL" : return only non-refereed journals +ref_stems list of comma-separated ADS bibstems to return, e.g. ref_stems="ApJ..,AJ..." +min_score minimum score of returned abstracts +(floating point, default 0.0) +data_link return only entries with data. +data_link="YES" turns it on, default is off +abstract return only entries with abstracts. +abstract="YES" turns it on, default is off +alt_abs return only entries with alternate abstracts. +alt_abs="YES" turns it on, default is off +aut_note return only entries with author notes. +aut_note="YES" turns it on, default is off +article return only entries with articles. +article="YES" turns it on, default is off +article_link return only entries with electronic articles. +article_link="YES" turns it on, default is off +simb_obj return only entries with simbad objects. +simb_obj="YES" turns it on, default is off +ned_obj return only entries with ned objects. +ned_obj="YES" turns it on, default is off +gpndb_obj return only entries with gpndb objects. +gpndb_obj="YES" turns it on, default is off +lib_link return only entries with library links. +lib_link="YES" turns it on, default is off +data_and return only entries with all selected data available. +data_and="ALL": no selection, return all refs (default) +data_and="NO" : return entries with AT LEAST ONE of the +data items selected with the above flags +data_and="YES": return only entries that have ALL links +selected with the above flags +version version number for the query form +data_type data type to return +data_type="HTML" return regular list (default) +data_type="PORTABLE" return portable tagged format +data_type="PLAINTEXT" return plain text +data_type="BIBTEX" return bibtex format +data_type="BIBTEXPLUS" return bibtex with abstract +data_type="ENDNOTE" return ENDNOTE format +data_type="DUBLINCORE" return DUBLINCORE format +data_type="XML" return XML format +data_type="SHORT_XML" return short XML format (no abstract) +data_type="VOTABLE" return VOTable format +data_type="RSS" return RSS format +mail_link return only entries with mailorder. +mail_link="YES" turns it on, default is off +toc_link return only entries with tocorder. +toc_link="YES" turns it on, default is off +pds_link return only entries with pds data. +pds_link="YES" turns it on, default is off +multimedia_link return only entries with multimedia data. +multimedia_link="YES" turns it on, default is off +spires_link return only entries with spires data. +spires_link="YES" turns it on, default is off +group_and return only entries from all selected groups. +group_and="ALL":no selection (default) +group_and="NO" :return entries that are in at least one grp +group_and="YES":return only entries from ALL groups +selected with group_bits +group_sel which group to select, e.g. group_sel="Chandra,HST" +ref_link return only entries with reference links. +ref_link="YES" turns it on, default is off +citation_link return only entries with citation links. +citation_link="YES" turns it on, default is off +gif_link return only entries with scanned articles links. +open_link return only entries with open access. +aut_xct exact author search. aut_xct="YES" turns it on +lpi_query lpi_query="YES" query for LPI objects, default is off +sim_query sim_query="YES" query for SIMBAD objects, default is on +ned_query ned_query="YES" query for NED objects, default is on +iau_query iau_query="YES" query for IAU objects, default is off +sort sort options: +"SCORE": sort by score +"AUTHOR": sort by first author +"NDATE": sort by date (most recent first +"ODATE": sort by date (oldest first) +"BIBCODE": sort by bibcode +"ENTRY": sort by entry date in the database +"PAGE": sort by page number +"RPAGE": reverse sort by page number +"CITATIONS": sort by citation count (replaces +score with number of citations) +"NORMCITATIONS": sort by normalized citation count +(replaces score with number of normalized citations) +"AUTHOR_CNT": sort by author count +query_type what to return: query_type=PAPERS returns regular records (default) +query_type=CITES returns citations to selected records +query_type=REFS returns references in selected records +query_type=ALSOREADS returns also-reads in selected records +return_fmt return format: return_fmt="LONG": return full abstract +return_fmt="SHORT": return short listing (default) +type where to return the data (screen, file, printer, etc) +defaultset use default settings (same as ret_req=no_params +but displays query parameters on short form) +format Custom reference format +charset character set for text output +year year field for bibcode matching +bibstem bibstem field for bibcode matching +volume volume field for bibcode matching +page page field for bibcode matching +associated_link return only entries with associated articles. +associated_link="YES" turns it on, default is off +ar_link return only entries with AR links. +ar_link="YES" turns it on, default is off +tables return results with table formatting (overrides pref.) +email_ret email_ret="YES": return query result via email +exclude exclude=bibcode1[,bibcode2...]: exclude specified bibcodes +from results list +include include=bibcode1[,bibcode2...]: include specified bibcodes +in results list +selectfrom selectfrom=bibcode1[,bibcode2...]: include only bibcodes +from specified bibcode list +RA Right ascension for cone search +DEC Declination for cone search +SR Search radius for cone search (default is 10 arcmin) +method form method of query form: GET or POST +nfeedback number of records to use in feedback queries +doi DOI +preprint_link return only entries with preprint data. +preprint_link="YES" turns it on, default is off +refstr reference string to resolve +mimetype mimetype of returned page (default depends on data_type) +qsearch if set, quick search box is displayed in HTML output +arxiv_sel which arxiv categories to select +article_sel select only articles (not catalogs, abstracts, etc) +adsobj_query search object names in abstract text + +""" + + + diff --git a/astroquery/nasa_ads/tests/__init__.py b/astroquery/nasa_ads/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/astroquery/nasa_ads/tests/test_nasaads.py b/astroquery/nasa_ads/tests/test_nasaads.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst new file mode 100644 index 0000000000..e326dd1335 --- /dev/null +++ b/docs/nasa_ads/nasa_ads.rst @@ -0,0 +1,47 @@ +.. doctest-skip-all + +.. _astroquery.nasa_ads: + +**************************************** +NASA ADS Queries (`astroquery.nasa_ads`) +**************************************** + +Getting Started +=============== + +This module provides an interface to the online `SAO/NASA Astrophysics Data System`_ + + +Examples +======== + + +Search works by specific identifier +----------------------------------- + + +Get links +--------- + + +Download publisher/ArXiv PDF +---------------------------- + + +Get Bibtex +---------- + + + + + + + +Reference/API +============= + +#.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: + +.. _nasa_ads: http://adsabs.harvard.edu/ +.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ + From ee153fbc27b43455ec4aae5acd2d204116bc6879 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:05:40 +0100 Subject: [PATCH 18/39] NASA ADS query : test format RST to open link in new window --- docs/nasa_ads/nasa_ads.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index e326dd1335..7e53f02180 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -43,5 +43,5 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ +.. _SAO/NASA Astrophysics Data System: raw::html SAO/NASA Astrophysics Data System From 6e383f8b502247fd6a9fd975647bf403811fe774 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:06:45 +0100 Subject: [PATCH 19/39] NASA ADS query : test format RST to open link in new window --- docs/nasa_ads/nasa_ads.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 7e53f02180..66e5046bbe 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -43,5 +43,6 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: raw::html SAO/NASA Astrophysics Data System +.. _SAO/NASA Astrophysics Data System: raw::html + SAO/NASA Astrophysics Data System From e3140d50f99b88760c456d33101a8d19bd7f4e5e Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:10:24 +0100 Subject: [PATCH 20/39] NASA ADS query, rst doc : couldn't get link 2 open in new window --- docs/nasa_ads/nasa_ads.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 66e5046bbe..e326dd1335 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -43,6 +43,5 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: raw::html - SAO/NASA Astrophysics Data System +.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ From 69f0753c4e0b83213b184a9454f931afa3fad172 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 11:15:27 +0100 Subject: [PATCH 21/39] NASA ADS query - rst doc : new test with links... --- docs/nasa_ads/nasa_ads.rst | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index e326dd1335..efbe530d1b 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -9,7 +9,21 @@ NASA ADS Queries (`astroquery.nasa_ads`) Getting Started =============== -This module provides an interface to the online `SAO/NASA Astrophysics Data System`_ +This module provides an interface to the online |adslink|. +It will check all the ADS mirrors, currently given by + + http://adswww.harvard.edu/|br| + http://cdsads.u-strasbg.fr/|br| + http://ukads.nottingham.ac.uk/|br| + http://esoads.eso.org/|br| + http://ads.ari.uni-heidelberg.de/|br| + http://ads.inasan.ru/|br| + http://ads.nao.ac.jp/|br| + http://ads.iucaa.ernet.in/|br| + http://ads.arsip.lipi.go.id/|br| + http://saaoads.chpc.ac.za/|br| + http://ads.on.br/|br| + Examples @@ -43,5 +57,14 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ +.. |adslink| raw:: html + + SAO/NASA Astrophysics Data System + +.. |br| raw:: html + +
+ + + From 78ede102d9dc581101e68800f8077b15b9359887 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 12:24:11 +0100 Subject: [PATCH 22/39] NASA ADS query - rst doc : new test with links --- docs/nasa_ads/nasa_ads.rst | 42 ++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index efbe530d1b..2cc59fb533 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -9,21 +9,30 @@ NASA ADS Queries (`astroquery.nasa_ads`) Getting Started =============== -This module provides an interface to the online |adslink|. +This module provides an interface to the online `SAO/NASA Astrophysics Data System`_. It will check all the ADS mirrors, currently given by - http://adswww.harvard.edu/|br| - http://cdsads.u-strasbg.fr/|br| - http://ukads.nottingham.ac.uk/|br| - http://esoads.eso.org/|br| - http://ads.ari.uni-heidelberg.de/|br| - http://ads.inasan.ru/|br| - http://ads.nao.ac.jp/|br| - http://ads.iucaa.ernet.in/|br| - http://ads.arsip.lipi.go.id/|br| - http://saaoads.chpc.ac.za/|br| - http://ads.on.br/|br| + http://adswww.harvard.edu/ + http://cdsads.u-strasbg.fr/ + + http://ukads.nottingham.ac.uk/ + + http://esoads.eso.org/ + + http://ads.ari.uni-heidelberg.de/ + + http://ads.inasan.ru/ + + http://ads.nao.ac.jp/ + + http://ads.iucaa.ernet.in/ + + http://ads.arsip.lipi.go.id/ + + http://saaoads.chpc.ac.za/ + + http://ads.on.br/ Examples @@ -57,14 +66,7 @@ Reference/API #.. automodapi:: astroquery.nasa_ads:no-inheritance-diagram: .. _nasa_ads: http://adsabs.harvard.edu/ -.. |adslink| raw:: html - - SAO/NASA Astrophysics Data System - -.. |br| raw:: html - -
- +.. _SAO/NASA Astrophysics Data System: http://adsabs.harvard.edu/ From 84975b06bc80d42b8fe1b5dc193bbf59819413f6 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 12:30:27 +0100 Subject: [PATCH 23/39] NASA ADS query - rst doc : new test with links --- docs/nasa_ads/nasa_ads.rst | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 2cc59fb533..472ec54461 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -10,29 +10,20 @@ Getting Started =============== This module provides an interface to the online `SAO/NASA Astrophysics Data System`_. -It will check all the ADS mirrors, currently given by - - http://adswww.harvard.edu/ - - http://cdsads.u-strasbg.fr/ - - http://ukads.nottingham.ac.uk/ - - http://esoads.eso.org/ - - http://ads.ari.uni-heidelberg.de/ - - http://ads.inasan.ru/ - - http://ads.nao.ac.jp/ - - http://ads.iucaa.ernet.in/ - - http://ads.arsip.lipi.go.id/ - - http://saaoads.chpc.ac.za/ - - http://ads.on.br/ +It will check all the ADS mirrors, currently given by the following list: + +- http://adswww.harvard.edu/ +- http://cdsads.u-strasbg.fr/ +- http://ukads.nottingham.ac.uk/ +- http://esoads.eso.org/ +- http://ads.ari.uni-heidelberg.de/ +- http://ads.inasan.ru/ +- http://ads.nao.ac.jp/ +- http://ads.iucaa.ernet.in/ +- http://ads.arsip.lipi.go.id/ +- http://saaoads.chpc.ac.za/ +- http://ads.on.br/ + Examples From b74df42a4de285330faf1798bc7bba6c8947b96b Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 15:44:56 +0100 Subject: [PATCH 24/39] initial code dump nothing working yet. some copy-paste from astroquery.splatalogue --- astroquery/nasa_ads/__init__.py | 24 +- astroquery/nasa_ads/core.py | 480 ++++++++------------------------ 2 files changed, 116 insertions(+), 388 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index cb5cd010c1..04fcd43cd5 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -13,12 +13,12 @@ class Conf(_config.ConfigNamespace): """ Configuration parameters for `astroquery.nasa_ads`. """ - mirror_urls = _config.ConfigItem( + servers = _config.ConfigItem( ['http://adswww.harvard.edu/', 'http://cdsads.u-strasbg.fr/', 'http://ukads.nottingham.ac.uk/', 'http://esoads.eso.org/', - 'http://ads.ari.uni-heidelberg.de/' + 'http://ads.ari.uni-heidelberg.de/', 'http://ads.inasan.ru/', 'http://ads.mao.kiev.ua/', 'http://ads.astro.puc.cl/', @@ -30,24 +30,20 @@ class Conf(_config.ConfigNamespace): 'http://ads.on.br/'], 'SAO/NASA ADS mirrors around the world' ) - + advanced_url = _config.ConfigItem( 'abstract_service.html', 'Path for advanced query' ) simple_url = _config.ConfigItem( - 'abstract_service.html', + 'index.html', 'Path for advanced query' ) timeout = _config.ConfigItem( 60, - 'Time limit for connecting to ADS server.' - ) - lines_limit = _config.ConfigItem( - 1000, - 'Limit to number of hits exported.' + 'Time limit for connecting to ADS server' ) conf = Conf() @@ -57,13 +53,3 @@ class Conf(_config.ConfigNamespace): __all__ = ['ADS', 'ADSClass', 'Conf', 'conf', ] - - - - - - - -""" -advanced_q = 'abstract_service.html' -""" diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index e68d7001d0..4a1ad2aeed 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -1,108 +1,138 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- -# -# adslib.py -# -# Module to search the ads -# -# Copyright 2012 Magnus Persson -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -# MA 02110-1301, USA. -# -# version 0.0.1a - +# Licensed under a 3-clause BSD style license - see LICENSE.rst """ -Script to search the NASA ADS directory +Module to search the SAO/NASA Astrophysics Data System -Need : o scipy - o mechanize module (standard in Python >2.6/2.7?) - o urllib2 module (standard Python module, required(?) by mechanize) - o beautiful soup/xml (xml standard in Python >2.6/2.7?) +:author: Magnus Persson """ -""" -ADSlib - Python Module to interact with NASA ADS -at +import warnings +# example warning +# warnings.warn("Band was specified, so blabla is overridden") +from astropy.io import ascii +from astropy import units as u +from ..query import BaseQuery +from ..utils import commons, async_to_sync +from ..utils.docstr_chompers import prepend_docstr_noreturns +from . import conf -http://adswww.harvard.edu/ -OR one of the mirrors -http://cdsads.u-strasbg.fr/ -http://ukads.nottingham.ac.uk/ -http://esoads.eso.org/ -http://ads.ari.uni-heidelberg.de/ -http://ads.inasan.ru/ -http://ads.mao.kiev.ua/ -http://ads.astro.puc.cl/ -http://ads.nao.ac.jp/ -http://ads.bao.ac.cn/ -http://ads.iucaa.ernet.in/ -http://ads.arsip.lipi.go.id/ -http://saaoads.chpc.ac.za/ -http://ads.on.br/ +__all__ = ['Splatalogue', 'SplatalogueClass'] -""" +@async_to_sync +class ADSClass(BaseQuery): + SERVERS = conf.servers + QUERY_ADVANCED_URL = conf.advanced_url + QUERY_SIMPLE_URL = conf.simple_url + TIMEOUT = conf.timeout + # global constant, not user-configurable + def __init__(self, **kwargs): + """ + Initialize a ADS query class with default arguments set. + Any default keyword arguments (see `query_lines`) can be + overridden here. + """ + self.data = self._default_kwargs() + self.set_default_options(**kwargs) + + def set_default_options(self, **kwargs): + """ + Modify the default options. + """ + self.data.update(self._parse_kwargs(**kwargs)) + def _default_kwargs(self): + kwargs = dict(advanced = False,) + return self._parse_kwargs(**kwargs) + + def _parse_kwargs(self, search=""): + """ + The ADS service returns ... + -""" -----[ Change log ]---- + Parameters + ---------- -* 2012 Dec 15 - Code cleanup. + Other Parameters + ---------------- + -* 2012 Oct 29 - Now only uses mechanize module(!) Yay. + Returns + ------- + Dictionary of the parameters to send to the SPLAT page + payload : dict + A dictionary of keywords + """ -* 2012 Oct 02 - File created. + payload = {'submit': 'Search', + 'frequency_units': 'GHz', + } + payload['qsearch'] = simple -""" + return payload -""" -NOTES: -# advanced search query -abstract_service.html + def _validate_simple_kwargs(self, min_frequency=None, max_frequency=None, + band='any', **kwargs): + """ + Check that a simple search query is input + """ + if band == 'any': + if min_frequency is None or max_frequency is None: + raise ValueError("Must specify a simple search string.") + @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) + def query_simple_async(self, simple, **kwargs): + """ + Returns + ------- + response : `requests.Response` + The response of the HTTP request. + """ + # have to chomp this kwd here... + get_query_payload = (kwargs.pop('get_query_payload') + if 'get_query_payload' in kwargs + else False) + self._validate_kwargs(simple, **kwargs) + + if hasattr(self, 'data'): + data_payload = self.data.copy() + data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + max_frequency=max_frequency, + **kwargs)) + else: + data_payload = self._default_kwargs() + data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + max_frequency=max_frequency, + **kwargs)) + + if get_query_payload: + return data_payload + + response = commons.send_request( + self.QUERY_URL, + data_payload, + self.TIMEOUT) + + self.response = response + + return response +ADS = ADSClass() + + + + + + +######################################################################## -# quick search -index.html -""" -mirrors = [ - 'http://adswww.harvard.edu/', - 'http://cdsads.u-strasbg.fr/', - 'http://ukads.nottingham.ac.uk/', - 'http://esoads.eso.org/', - 'http://ads.ari.uni-heidelberg.de/' - 'http://ads.inasan.ru/', - 'http://ads.nao.ac.jp/', - 'http://ads.iucaa.ernet.in/', - 'http://ads.arsip.lipi.go.id/', - 'http://saaoads.chpc.ac.za/', - 'http://ads.on.br/' - ] -advanced_q = 'abstract_service.html' def search(query, **kwargs): """ @@ -243,7 +273,7 @@ def search(query, **kwargs): ######## GET RESULTLIST ###### the problem with this is that web is in UNICODE, - # ie. Jørgensen, æ and åäö and ßü etc are represented by funny numbers and '\' + # ie. special chars are represented by funny numbers and '\' #resultlist = [_Result(i) for i in entries] return _Resultlist(entries) @@ -354,292 +384,4 @@ def _returnlist_(self): #~ # authors alphabetical order first and then by score #~ # i.e. sort by score if same first author #~ sorted(resultlist, key=attrgetter('ads_score','authors'), reverse=True) - - ######################################################################## -######## NOTES - -### FIELDS -# bibcode -# title -# authors -# score -# pubdate -# possilbe (quick)links : -# A Abstract -# C CITATIONS -# D On-line Data -# E EJOURNAL -# F Printable Article -# G Gif Images -# H HEP/Spires Information -# I Author Comments -# L Library Entries -# M Multimedia -# N NED Objects -# O Associated Articles -# P PDS datasets -# R REFERENCES -# S SIMBAD Objects -# T TOC -# U Also read -# X arXiv e-print -# Z Abstract Custom - - -""" - - -6.3.4 - Embedded Queries - -This section describes how the abstract service can be accessed from embedded forms. The URL for submitting embedded forms is: - -http://adsabs.harvard.edu/cgi-bin/abs_connect - -The syntax is: - -... - -where parami are the names of the parameters and vali are their values. There are no spaces allowed in a URL. Any blanks need to be encoded as a '+' (e.g. between author last and first names). The list of the possible parameters and their possible values is available to build queries. It is advisable to use only the more basic parameters for such queries since the more complicated parameters are more likely to change with future versions of the search system. - -One use of this is for including a link to the bibliography for a particular author in a document. - -To do so, use the following syntax: - -http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.&return_req=no_params - -This sets the author=last, f, and prevents the listing of parameters at the bottom of the page (return_req=no_params). - -If you want to specify the author middle initial in addition to the first initial, use exact author matching (&aut_xct=YES). - -To build a search for two different formats of author names, enter the two author arguments separated with a semicolon. - -http://adsabs.harvard.edu/cgi-bin/abs_connect?author=last,+f.m.;last,+first+m.&aut_xct=YES&return_req=no_params - -Such a link will always provide access to the latest bibliography of an author without the need to update anything. - -Sometimes such a list includes articles by somebody else with the same name. You can exclude specific articles from the results list with the command - -exclude=bibcode1,bibcode2,... - -You can also include specific articles with the command - -include=bibcode1,bibcode2,... - -This allows for finely customized bibliographies. - - - -List of ADS query parameter keywords - -author list of semicolon separated authornames as last, f -object list of semicolon separated object names -keyword list of semicolon separated keywords -start_mon starting month as integer (Jan == 1, Dec == 12) -start_year starting year as integer (4 digits) -end_mon ending month as integer (Jan == 1, Dec == 12) -end_year ending year as integer (4 digits) -start_entry_day start entry day of month as integer -start_entry_mon start entry month as integer -start_entry_year start entry year as integer -end_entry_day start entry day of month as integer -end_entry_mon start entry month as integer -end_entry_year start entry year as integer -title title words, any non-alpha-numeric character separates -text abstract words, any non-alpha-numeric character separates -fulltext OCRd fulltext, any non-alpha-numeric character separates -affiliation affiliation words, any non-alpha-numeric character separates -bibcode bibcode for partial bibcode search. If a bibcode is -specified, no other search will be done -nr_to_return how many abstracts to return (default is 50, max 500) -start_nr where to start returning in list of retrieved abstracts -default is 1 -aut_wt floating point weight for author search, default: 1.0 -obj_wt floating point weight for object search, default: 1.0 -kwd_wt floating point weight for keyword search, default: 1.0 -ttl_wt floating point weight for title search, default: 0.3 -txt_wt floating point weight for text search, default: 3.0 -full_wt floating point weight for full search, default: 3.0 -aff_wt floating point weight for affiliation search, default: 1.0 -aut_syn author synonym replacement. aut_syn="YES" turns it on (default is on) -ttl_syn title synonym replacement. ttl_syn="YES" turns it on (default is on) -txt_syn text synonym replacement. txt_syn="YES" turns it on (default is on) -full_syn full text synonym replacement. full_syn="YES" turns it on (default is on) -aff_syn affiliation synonym replacement. aff_syn="YES" turns it on (default is on) -aut_wgt authors used for weighting. aut_wgt="YES" turns it on (default is on) -obj_wgt objects used for weighting. obj_wgt="YES" turns it on (default is on) -kwd_wgt keywords used for weighting. kwd_wgt="YES" turns it on (default is on) -ttl_wgt title used for weighting. ttl_wgt="YES" turns it on (default is on) -txt_wgt text used for weighting. txt_wgt="YES" turns it on (default is on) -full_wgt full text used for weighting. full_wgt="YES" turns it on (default is on) -aff_wgt affiliation used for weighting. aff_wgt="YES" turns it on (default is on) -aut_sco authors weighted scoring. aut_sco="YES" turns it on (default is off) -kwd_sco keywords weighted scoring. kwd_sco="YES" turns it on (default is off) -ttl_sco title weighted scoring. ttl_sco="YES" turns it on (default is on) -txt_sco text weighted scoring. txt_sco="YES" turns it on (default is on) -full_sco text weighted scoring. full_sco="YES" turns it on (default is on) -aff_sco affiliation weighted scoring. aff_sco="YES" turns it on (default is off) -aut_req authors required for results. aut_req="YES" turns it on (default is off) -obj_req objects required for results. obj_req="YES" turns it on (default is off) -kwd_req keywords required for results. kwd_req="YES" turns it on (default is off) -ttl_req title required for results. ttl_req="YES" turns it on (default is off) -txt_req text required for results. txt_req="YES" turns it on (default is off) -full_req text required for results. full_req="YES" turns it on (default is off) -aff_req affiliation required for results. aff_req="YES" turns it on (default is off) -aut_logic -obj_logic -kwd_logic -ttl_logic -txt_logic -full_logic -aff_logic Combination logic: -xxx_logic="AND": combine with AND -xxx_logic="OR": combine with OR (default) -xxx_logic="SIMPLE": simple logic (use +, -) -xxx_logic="BOOL": full boolean logic -xxx_logic="FULLMATCH": do AND query in the selected field -and calculate the score according to how many words in -the field of the selected reference were matched by -the query -return_req requested return: -return_req="result" : return results (default) -return_req="form" : return new query form -return_req="no_params": return results -set default parameters, don't display params -db_key which database to query: db_key="AST" : Astronomy(default) -"PRE": arXiv e-prints -"PHY": Physics, "GEN": General, CFA: CfA Preprints -atcl_only select only OCR pages from articles -jou_pick specify which journals to select: -jou_pick="ALL" : return all journals (default) -jou_pick="NO" : return only refereed journals -jou_pick="EXCL" : return only non-refereed journals -ref_stems list of comma-separated ADS bibstems to return, e.g. ref_stems="ApJ..,AJ..." -min_score minimum score of returned abstracts -(floating point, default 0.0) -data_link return only entries with data. -data_link="YES" turns it on, default is off -abstract return only entries with abstracts. -abstract="YES" turns it on, default is off -alt_abs return only entries with alternate abstracts. -alt_abs="YES" turns it on, default is off -aut_note return only entries with author notes. -aut_note="YES" turns it on, default is off -article return only entries with articles. -article="YES" turns it on, default is off -article_link return only entries with electronic articles. -article_link="YES" turns it on, default is off -simb_obj return only entries with simbad objects. -simb_obj="YES" turns it on, default is off -ned_obj return only entries with ned objects. -ned_obj="YES" turns it on, default is off -gpndb_obj return only entries with gpndb objects. -gpndb_obj="YES" turns it on, default is off -lib_link return only entries with library links. -lib_link="YES" turns it on, default is off -data_and return only entries with all selected data available. -data_and="ALL": no selection, return all refs (default) -data_and="NO" : return entries with AT LEAST ONE of the -data items selected with the above flags -data_and="YES": return only entries that have ALL links -selected with the above flags -version version number for the query form -data_type data type to return -data_type="HTML" return regular list (default) -data_type="PORTABLE" return portable tagged format -data_type="PLAINTEXT" return plain text -data_type="BIBTEX" return bibtex format -data_type="BIBTEXPLUS" return bibtex with abstract -data_type="ENDNOTE" return ENDNOTE format -data_type="DUBLINCORE" return DUBLINCORE format -data_type="XML" return XML format -data_type="SHORT_XML" return short XML format (no abstract) -data_type="VOTABLE" return VOTable format -data_type="RSS" return RSS format -mail_link return only entries with mailorder. -mail_link="YES" turns it on, default is off -toc_link return only entries with tocorder. -toc_link="YES" turns it on, default is off -pds_link return only entries with pds data. -pds_link="YES" turns it on, default is off -multimedia_link return only entries with multimedia data. -multimedia_link="YES" turns it on, default is off -spires_link return only entries with spires data. -spires_link="YES" turns it on, default is off -group_and return only entries from all selected groups. -group_and="ALL":no selection (default) -group_and="NO" :return entries that are in at least one grp -group_and="YES":return only entries from ALL groups -selected with group_bits -group_sel which group to select, e.g. group_sel="Chandra,HST" -ref_link return only entries with reference links. -ref_link="YES" turns it on, default is off -citation_link return only entries with citation links. -citation_link="YES" turns it on, default is off -gif_link return only entries with scanned articles links. -open_link return only entries with open access. -aut_xct exact author search. aut_xct="YES" turns it on -lpi_query lpi_query="YES" query for LPI objects, default is off -sim_query sim_query="YES" query for SIMBAD objects, default is on -ned_query ned_query="YES" query for NED objects, default is on -iau_query iau_query="YES" query for IAU objects, default is off -sort sort options: -"SCORE": sort by score -"AUTHOR": sort by first author -"NDATE": sort by date (most recent first -"ODATE": sort by date (oldest first) -"BIBCODE": sort by bibcode -"ENTRY": sort by entry date in the database -"PAGE": sort by page number -"RPAGE": reverse sort by page number -"CITATIONS": sort by citation count (replaces -score with number of citations) -"NORMCITATIONS": sort by normalized citation count -(replaces score with number of normalized citations) -"AUTHOR_CNT": sort by author count -query_type what to return: query_type=PAPERS returns regular records (default) -query_type=CITES returns citations to selected records -query_type=REFS returns references in selected records -query_type=ALSOREADS returns also-reads in selected records -return_fmt return format: return_fmt="LONG": return full abstract -return_fmt="SHORT": return short listing (default) -type where to return the data (screen, file, printer, etc) -defaultset use default settings (same as ret_req=no_params -but displays query parameters on short form) -format Custom reference format -charset character set for text output -year year field for bibcode matching -bibstem bibstem field for bibcode matching -volume volume field for bibcode matching -page page field for bibcode matching -associated_link return only entries with associated articles. -associated_link="YES" turns it on, default is off -ar_link return only entries with AR links. -ar_link="YES" turns it on, default is off -tables return results with table formatting (overrides pref.) -email_ret email_ret="YES": return query result via email -exclude exclude=bibcode1[,bibcode2...]: exclude specified bibcodes -from results list -include include=bibcode1[,bibcode2...]: include specified bibcodes -in results list -selectfrom selectfrom=bibcode1[,bibcode2...]: include only bibcodes -from specified bibcode list -RA Right ascension for cone search -DEC Declination for cone search -SR Search radius for cone search (default is 10 arcmin) -method form method of query form: GET or POST -nfeedback number of records to use in feedback queries -doi DOI -preprint_link return only entries with preprint data. -preprint_link="YES" turns it on, default is off -refstr reference string to resolve -mimetype mimetype of returned page (default depends on data_type) -qsearch if set, quick search box is displayed in HTML output -arxiv_sel which arxiv categories to select -article_sel select only articles (not catalogs, abstracts, etc) -adsobj_query search object names in abstract text - -""" - - - From b90fc84c5e235dbb95ffe4d9cd653b3f8392e46d Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Fri, 19 Dec 2014 15:46:04 +0100 Subject: [PATCH 25/39] fix import --- astroquery/nasa_ads/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 4a1ad2aeed..97f03eb0a5 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -20,7 +20,7 @@ -__all__ = ['Splatalogue', 'SplatalogueClass'] +__all__ = ['ADS', 'ADSClass'] @async_to_sync From 82cd81d652da3186dd19cf24b618cc7b61af6d83 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Mon, 12 Jan 2015 16:09:32 +0100 Subject: [PATCH 26/39] NASA ADS simple text search working. Results NOT parsed. --- astroquery/nasa_ads/__init__.py | 118 ++++++++++++----- astroquery/nasa_ads/core.py | 223 ++++++++++++++++++-------------- 2 files changed, 210 insertions(+), 131 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index 04fcd43cd5..c728c312f3 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -6,50 +6,98 @@ :Author: Magnus Vilhelm Persson (magnusp@vilhelm.nu) """ -from astropy import config as _config +#~ from astropy.config import ConfigurationItem + +from astropy import config as _config class Conf(_config.ConfigNamespace): """ Configuration parameters for `astroquery.nasa_ads`. """ - servers = _config.ConfigItem( - ['http://adswww.harvard.edu/', - 'http://cdsads.u-strasbg.fr/', - 'http://ukads.nottingham.ac.uk/', - 'http://esoads.eso.org/', - 'http://ads.ari.uni-heidelberg.de/', - 'http://ads.inasan.ru/', - 'http://ads.mao.kiev.ua/', - 'http://ads.astro.puc.cl/', - 'http://ads.nao.ac.jp/', - 'http://ads.bao.ac.cn/', - 'http://ads.iucaa.ernet.in/', - 'http://ads.arsip.lipi.go.id/', - 'http://saaoads.chpc.ac.za/', - 'http://ads.on.br/'], - 'SAO/NASA ADS mirrors around the world' - ) + server = _config.ConfigItem( + 'http://adswww.harvard.edu', + 'SAO/NASA ADS main server.' + ) + mirrors = _config.ConfigItem( + ['http://cdsads.u-strasbg.fr', + 'http://ukads.nottingham.ac.uk', + 'http://esoads.eso.org', + 'http://ads.ari.uni-heidelberg.de', + 'http://ads.inasan.ru', + 'http://ads.mao.kiev.ua', + 'http://ads.astro.puc.cl', + 'http://ads.nao.ac.jp', + 'http://ads.bao.ac.cn', + 'http://ads.iucaa.ernet.in', + 'http://ads.arsip.lipi.go.id', + 'http://saaoads.chpc.ac.za', + 'http://ads.on.br'], + 'SAO/NASA ADS mirrors around the world' + ) + advanced_path = _config.ConfigItem( + '/cgi-bin/nph-abs_connect', + 'Path for advanced query' + ) + + simple_path = _config.ConfigItem( + '/cgi-bin/nph-basic_connect', + 'Path for simple query' + ) - advanced_url = _config.ConfigItem( - 'abstract_service.html', - 'Path for advanced query' - ) + timeout = _config.ConfigItem( + 60, + 'Time limit for connecting to ADS server' + ) - simple_url = _config.ConfigItem( - 'index.html', - 'Path for advanced query' - ) +conf = Conf() - timeout = _config.ConfigItem( - 60, - 'Time limit for connecting to ADS server' - ) -conf = Conf() +from .core import ADSClass, ADS -from .core import ADS, ADSClass +__all__ = ['ADSClass', 'ADS', + 'Conf', 'conf'] -__all__ = ['ADS', 'ADSClass', - 'Conf', 'conf', - ] +#~ class Conf(_config.ConfigNamespace): + #~ """ + #~ Configuration parameters for `astroquery.nasa_ads`. + #~ """ + #~ servers = _config.ConfigItem( + #~ ['http://adswww.harvard.edu', + #~ 'http://cdsads.u-strasbg.fr', + #~ 'http://ukads.nottingham.ac.uk', + #~ 'http://esoads.eso.org', + #~ 'http://ads.ari.uni-heidelberg.de', + #~ 'http://ads.inasan.ru', + #~ 'http://ads.mao.kiev.ua', + #~ 'http://ads.astro.puc.cl', + #~ 'http://ads.nao.ac.jp', + #~ 'http://ads.bao.ac.cn', + #~ 'http://ads.iucaa.ernet.in', + #~ 'http://ads.arsip.lipi.go.id', + #~ 'http://saaoads.chpc.ac.za', + #~ 'http://ads.on.br'], + #~ 'SAO/NASA ADS mirrors around the world' + #~ ) + #~ + #~ advanced_url = _config.ConfigurationItem( + #~ '/cgi-bin/nph-abs_connect', + #~ 'Path for advanced query' + #~ ) + #~ + #~ simple_url = _config.ConfigurationItem( + #~ '/cgi-bin/nph-basic_connect', + #~ 'Path for simple query' + #~ ) +#~ + #~ timeout = _config.ConfigurationItem( + #~ 60, + #~ 'Time limit for connecting to ADS server' + #~ ) +#~ +#~ conf = Conf() +#~ +#~ from .core import ADS, ADSClass +#~ +#~ __all__ = ['ADS', 'ADSClass', + #~ 'Conf', 'conf'] diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 97f03eb0a5..aa8ea8fc6a 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -6,126 +6,157 @@ """ -import warnings +#~ import warnings # example warning # warnings.warn("Band was specified, so blabla is overridden") -from astropy.io import ascii -from astropy import units as u +#~ from astropy.io import ascii +#~ from astropy import units as u from ..query import BaseQuery -from ..utils import commons, async_to_sync +#~ from ..utils import commons, async_to_sync from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf - - +from ..utils.class_or_instance import class_or_instance +from ..utils import commons, async_to_sync __all__ = ['ADS', 'ADSClass'] - - +#~ +#~ @async_to_sync class ADSClass(BaseQuery): - SERVERS = conf.servers - QUERY_ADVANCED_URL = conf.advanced_url - QUERY_SIMPLE_URL = conf.simple_url + + ####### FROM SPLATALOGUE + SERVER = conf.server + QUERY_ADVANCED_PATH = conf.advanced_path + QUERY_SIMPLE_PATH = conf.simple_path TIMEOUT = conf.timeout - # global constant, not user-configurable - def __init__(self, **kwargs): - """ - Initialize a ADS query class with default arguments set. - Any default keyword arguments (see `query_lines`) can be - overridden here. - """ - self.data = self._default_kwargs() - self.set_default_options(**kwargs) - def set_default_options(self, **kwargs): - """ - Modify the default options. - """ - self.data.update(self._parse_kwargs(**kwargs)) - def _default_kwargs(self): - kwargs = dict(advanced = False,) - return self._parse_kwargs(**kwargs) - - def _parse_kwargs(self, search=""): - """ - The ADS service returns ... - + QUERY_SIMPLE_URL = SERVER + QUERY_SIMPLE_PATH + QUERY_ADVANCED_URL = SERVER + QUERY_ADVANCED_PATH - Parameters - ---------- - - Other Parameters - ---------------- - - - Returns - ------- - Dictionary of the parameters to send to the SPLAT page - payload : dict - A dictionary of keywords - """ - - payload = {'submit': 'Search', - 'frequency_units': 'GHz', - } - - payload['qsearch'] = simple - - - return payload - - def _validate_simple_kwargs(self, min_frequency=None, max_frequency=None, - band='any', **kwargs): - """ - Check that a simple search query is input - """ - if band == 'any': - if min_frequency is None or max_frequency is None: - raise ValueError("Must specify a simple search string.") - @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) - def query_simple_async(self, simple, **kwargs): - """ - Returns - ------- - response : `requests.Response` - The response of the HTTP request. - """ - # have to chomp this kwd here... - get_query_payload = (kwargs.pop('get_query_payload') - if 'get_query_payload' in kwargs - else False) - self._validate_kwargs(simple, **kwargs) - - if hasattr(self, 'data'): - data_payload = self.data.copy() - data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - max_frequency=max_frequency, - **kwargs)) - else: - data_payload = self._default_kwargs() - data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - max_frequency=max_frequency, - **kwargs)) + ######## FROM API DOCS + def __init__(self, *args): + """ set some parameters """ + pass + + @class_or_instance + def query_simple(self, query_string, get_query_payload=False): + + request_payload = self._args_to_payload(query_string) + + response = commons.send_request(self.QUERY_SIMPLE_URL, request_payload, self.TIMEOUT) + # primarily for debug purposes, but also useful if you want to send + # someone a URL linking directly to the data if get_query_payload: - return data_payload + return request_payload - response = commons.send_request( - self.QUERY_URL, - data_payload, - self.TIMEOUT) + return response - self.response = response + def _parse_result(self, result): + # do something, probably with regexp's + return result - return response -ADS = ADSClass() + def _args_to_payload(self, query_string): + # convert arguments to a valid requests payload + # i.e. a dictionary + return {'qsearch' : query_string} +ADS = ADSClass() + +#~ + #~ # global constant, not user-configurable + #~ def __init__(self, **kwargs): + #~ """ + #~ Initialize a ADS query class with default arguments set. + #~ Any default keyword arguments (see `query_lines`) can be + #~ overridden here. + #~ """ + #~ self.data = self._default_kwargs() + #~ self.set_default_options(**kwargs) + #~ + #~ def set_default_options(self, **kwargs): + #~ """ + #~ Modify the default options. + #~ """ + #~ self.data.update(self._parse_kwargs(**kwargs)) + #~ + #~ def _default_kwargs(self): + #~ kwargs = dict() + #~ return self._parse_kwargs(**kwargs) +#~ + #~ def _parse_kwargs(self, search=""): + #~ """ + #~ The ADS service returns. + #~ + #~ Parameters + #~ ---------- + #~ + #~ Other Parameters + #~ ---------------- + #~ + #~ Returns + #~ ------- + #~ Dictionary of the parameters to send to the SPLAT page + #~ payload : dict + #~ A dictionary of keywords + #~ """ +#~ + #~ payload = { 'qsearch': search } +#~ + #~ return payload +#~ + #~ def _validate_simple_kwargs(self, search=None, **kwargs): + #~ """ + #~ Check that a simple search query is input + #~ """ + #~ if search is None: + #~ raise ValueError("Must specify a search string.") + #~ + #~ @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) + #~ def query_simple_async(self, search, **kwargs): + #~ """ + #~ Returns + #~ ------- + #~ response : `requests.Response` + #~ The response of the HTTP request. + #~ """ + #~ # have to chomp this kwd here... + #~ get_query_payload = (kwargs.pop('get_query_payload') + #~ if 'get_query_payload' in kwargs + #~ else False) + #~ self._validate_kwargs(simple, **kwargs) +#~ + #~ if hasattr(self, 'data'): + #~ data_payload = self.data.copy() + #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + #~ max_frequency=max_frequency, + #~ **kwargs)) + #~ else: + #~ data_payload = self._default_kwargs() + #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, + #~ max_frequency=max_frequency, + #~ **kwargs)) +#~ + #~ if get_query_payload: + #~ return data_payload +#~ + #~ response = commons.send_request( + #~ self.QUERY_URL, + #~ data_payload, + #~ self.TIMEOUT) +#~ + #~ self.response = response +#~ + #~ return response + + + ######################################################################## From 5b43fd840c4e02121be5696773768356e2fa918f Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Mon, 12 Jan 2015 16:17:16 +0100 Subject: [PATCH 27/39] Error in documentation for API. query_region_async function should return response, or parsed version of response --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index 83adbf4849..e889a82f68 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -171,7 +171,7 @@ Directory Structure:: if get_query_payload: return request_payload - return result + return response @class_or_instance def get_images_async(self, *args): From 207702a14acbda4765c4d8e475ddd49b2c2f4086 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Mon, 12 Jan 2015 17:16:49 +0100 Subject: [PATCH 28/39] NASA ADS : Started parsing simple query XML results into AstroPy table. --- astroquery/nasa_ads/__init__.py | 10 ++- astroquery/nasa_ads/core.py | 122 +++++++------------------------- 2 files changed, 34 insertions(+), 98 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index c728c312f3..9ccc9ff2e9 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -37,12 +37,16 @@ class Conf(_config.ConfigNamespace): ) advanced_path = _config.ConfigItem( '/cgi-bin/nph-abs_connect', - 'Path for advanced query' + 'Path for advanced query (unconfirmed)' ) + #~ simple_path = _config.ConfigItem( + #~ '/cgi-bin/nph-basic_connect', + #~ 'Path for simple query' + #~ ) simple_path = _config.ConfigItem( - '/cgi-bin/nph-basic_connect', - 'Path for simple query' + '/cgi-bin/basic_connect', + 'Path for simple query (return XML)' ) timeout = _config.ConfigItem( diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index aa8ea8fc6a..afb4409117 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -6,19 +6,20 @@ """ -#~ import warnings +import warnings # example warning # warnings.warn("Band was specified, so blabla is overridden") #~ from astropy.io import ascii #~ from astropy import units as u from ..query import BaseQuery -#~ from ..utils import commons, async_to_sync -from ..utils.docstr_chompers import prepend_docstr_noreturns +from ..utils import commons, async_to_sync +#~ from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync +from BeautifulSoup import BeautifulSoup as bfs __all__ = ['ADS', 'ADSClass'] #~ @@ -46,22 +47,40 @@ def query_simple(self, query_string, get_query_payload=False): request_payload = self._args_to_payload(query_string) response = commons.send_request(self.QUERY_SIMPLE_URL, request_payload, self.TIMEOUT) - + # primarily for debug purposes, but also useful if you want to send # someone a URL linking directly to the data if get_query_payload: return request_payload - return response + return self._parse_response(response) - def _parse_result(self, result): + def _parse_result(self, response): # do something, probably with regexp's - return result + + adssoup_raw = bfs(response.text) + adssoup_cooked = adssoup_raw.findAll('record') + + # number of hits + nhits = len(adssoup_cooked) + if nhits == 0: + warnings.warn("No hits for {0}".format(self.)) + return None + + """ + Developer, how do you get list with all the fields? + Like this: + [tag.name for tag in adssoup_cooked[0].findAll()[0]] + """ + + + #~ return result + return None def _args_to_payload(self, query_string): # convert arguments to a valid requests payload # i.e. a dictionary - return {'qsearch' : query_string} + return {'qsearch' : query_string, 'data_type' : 'XML'} @@ -69,93 +88,6 @@ def _args_to_payload(self, query_string): ADS = ADSClass() -#~ - #~ # global constant, not user-configurable - #~ def __init__(self, **kwargs): - #~ """ - #~ Initialize a ADS query class with default arguments set. - #~ Any default keyword arguments (see `query_lines`) can be - #~ overridden here. - #~ """ - #~ self.data = self._default_kwargs() - #~ self.set_default_options(**kwargs) - #~ - #~ def set_default_options(self, **kwargs): - #~ """ - #~ Modify the default options. - #~ """ - #~ self.data.update(self._parse_kwargs(**kwargs)) - #~ - #~ def _default_kwargs(self): - #~ kwargs = dict() - #~ return self._parse_kwargs(**kwargs) -#~ - #~ def _parse_kwargs(self, search=""): - #~ """ - #~ The ADS service returns. - #~ - #~ Parameters - #~ ---------- - #~ - #~ Other Parameters - #~ ---------------- - #~ - #~ Returns - #~ ------- - #~ Dictionary of the parameters to send to the SPLAT page - #~ payload : dict - #~ A dictionary of keywords - #~ """ -#~ - #~ payload = { 'qsearch': search } -#~ - #~ return payload -#~ - #~ def _validate_simple_kwargs(self, search=None, **kwargs): - #~ """ - #~ Check that a simple search query is input - #~ """ - #~ if search is None: - #~ raise ValueError("Must specify a search string.") - #~ - #~ @prepend_docstr_noreturns("\n" + _parse_kwargs.__doc__) - #~ def query_simple_async(self, search, **kwargs): - #~ """ - #~ Returns - #~ ------- - #~ response : `requests.Response` - #~ The response of the HTTP request. - #~ """ - #~ # have to chomp this kwd here... - #~ get_query_payload = (kwargs.pop('get_query_payload') - #~ if 'get_query_payload' in kwargs - #~ else False) - #~ self._validate_kwargs(simple, **kwargs) -#~ - #~ if hasattr(self, 'data'): - #~ data_payload = self.data.copy() - #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - #~ max_frequency=max_frequency, - #~ **kwargs)) - #~ else: - #~ data_payload = self._default_kwargs() - #~ data_payload.update(self._parse_kwargs(min_frequency=min_frequency, - #~ max_frequency=max_frequency, - #~ **kwargs)) -#~ - #~ if get_query_payload: - #~ return data_payload -#~ - #~ response = commons.send_request( - #~ self.QUERY_URL, - #~ data_payload, - #~ self.TIMEOUT) -#~ - #~ self.response = response -#~ - #~ return response - - ######################################################################## From bd515131737411f99825f7cd1601cf046701b87a Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Tue, 13 Jan 2015 13:41:32 +0100 Subject: [PATCH 29/39] NASA ADS : Initial parsing supported. --- astroquery/nasa_ads/__init__.py | 12 +- astroquery/nasa_ads/core.py | 281 +------------------------------- 2 files changed, 18 insertions(+), 275 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index 9ccc9ff2e9..5bf30f69e0 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -10,11 +10,14 @@ #~ from astropy.config import ConfigurationItem from astropy import config as _config +from .utils import * + class Conf(_config.ConfigNamespace): """ Configuration parameters for `astroquery.nasa_ads`. """ + server = _config.ConfigItem( 'http://adswww.harvard.edu', 'SAO/NASA ADS main server.' @@ -57,6 +60,11 @@ class Conf(_config.ConfigNamespace): conf = Conf() +conf.adsfields = ['bibcode', 'title', 'author', 'affiliation', + 'journal', 'volume', 'pubdate', 'page', 'lastpage', 'keywords', 'keyword', + 'origin', 'copyright', 'link', 'name', 'url', 'count', 'score', 'citations', + 'abstract', 'doi', 'eprindit'] + from .core import ADSClass, ADS __all__ = ['ADSClass', 'ADS', @@ -78,9 +86,9 @@ class Conf(_config.ConfigNamespace): #~ 'http://ads.nao.ac.jp', #~ 'http://ads.bao.ac.cn', #~ 'http://ads.iucaa.ernet.in', - #~ 'http://ads.arsip.lipi.go.id', + #~ 'http://ads.arsip.lipi.go.id',lastpage #~ 'http://saaoads.chpc.ac.za', - #~ 'http://ads.on.br'], + #~ 'http://ads.on.br'],lastpage #~ 'SAO/NASA ADS mirrors around the world' #~ ) #~ diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index afb4409117..357979ded8 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -15,6 +15,7 @@ from ..utils import commons, async_to_sync #~ from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf +#~ from .utils import * from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync @@ -22,8 +23,7 @@ from BeautifulSoup import BeautifulSoup as bfs __all__ = ['ADS', 'ADSClass'] -#~ -#~ + @async_to_sync class ADSClass(BaseQuery): @@ -43,7 +43,7 @@ def __init__(self, *args): @class_or_instance def query_simple(self, query_string, get_query_payload=False): - + self.query_string = query_string request_payload = self._args_to_payload(query_string) response = commons.send_request(self.QUERY_SIMPLE_URL, request_payload, self.TIMEOUT) @@ -55,27 +55,25 @@ def query_simple(self, query_string, get_query_payload=False): return self._parse_response(response) - def _parse_result(self, response): + def _parse_response(self, response): # do something, probably with regexp's adssoup_raw = bfs(response.text) adssoup_cooked = adssoup_raw.findAll('record') + result = adssoup_cooked # number of hits nhits = len(adssoup_cooked) if nhits == 0: - warnings.warn("No hits for {0}".format(self.)) + warnings.warn("No hits for the search \'{0}\'".format(self.query_string)) return None """ Developer, how do you get list with all the fields? Like this: - [tag.name for tag in adssoup_cooked[0].findAll()[0]] + [tag.name for tag in adssoup_cooked[0].findAll()] """ - - - #~ return result - return None + return result def _args_to_payload(self, query_string): # convert arguments to a valid requests payload @@ -84,267 +82,4 @@ def _args_to_payload(self, query_string): - - ADS = ADSClass() - - - -######################################################################## - - - - - - -def search(query, **kwargs): - """ - query : Normal string to ADS - or dictionary for advanced search - - """ - - - ### test code to get it up and running - - # main access - # TODO : either access via Z39.50 or via URLlib/mecahnise etc - - # wishlist - # TODO : simple search - # TODO : advanced search - # TODO : browse - - - import locale - # this reads the environment and inits the right locale - locale.setlocale(locale.LC_ALL, "") - - - try: - # the mechanize module exports urllib2 as well... - import mechanize - import urllib - except (ImportError): - print 'You need the \"mechanize\" and urllib module' - ' for this script to work.' - - try: - from BeautifulSoup import BeautifulSoup as bfs - except (ImportError): - print 'You need the BeautifulSoup module...' - - - import scipy - import sys - - #from string import lower, upper - # search URL - # http://adsabs.harvard.edu/cgi-bin/nph-basic_connect?qsearch=The+Search+String - - # to parse the search string from "The Search String" to "The+Search+String" - # urllib.quote(url, safe=":/") - - ############################################ - ######## GET THE FORM - - #~ Ping to know which server to use. - working_mirror = 0 - - got_reply = 0 - while not got_reply: - try: - # try to get the form - response = mechanize.urlopen(mirrors[working_mirror] + types_q[search_type]) - except mechanize.URLError: - # if we can't get it, try another mirror - if not i < len(mirrors): - break - else: - working_mirror += 1 - pass - else: - got_reply = True - - if not got_reply and working_mirror >= len(mirrors): - # TODO log output - sys.stderr.write('ERROR : You have to be connected to the internet to access the NASA ADS database and it has to be online (on all mirrors).') - else: - # TODO log output - print ('got reply from : {0}'.format(mirrors[working_mirror])) - - - - - #~ Then check if going for the advanced interface. - #~ advanced = int((type(query) == type({})) - if 'advanced' in kwargs: - # ADVANCED QUERY - # - # Should I use http://adsabs.harvard.edu/abstract_service.html - # or the full ADS Labs? - response = mechanize.urlopen(mirrors[working_mirror] + advanced_q) - forms = mechanize.ParseResponse(response, backwards_compat=False) - response.close() - form = forms[0] - #~ if arg.has_key('dbg_snd_form'): # for test purposes - #~ return form - #~ form['qsearch'] = '^Persson 2012' - - ######## SUBMIT FORM - #~ clicked_form = form.click() - - #~ result = mechanize.urlopen(clicked_form) - - pass - - elif not 'advanced' in kwargs: - # SIMPLE QUERY - baseurl = (mirrors[working_mirror] + - 'cgi-bin/nph-basic_connect?qsearch=') - - result = mechanize.urlopen( urllib.quote(baseurl + query, safe = ":/=?^") ) - # test below - data = urllib.urlencode({'qsearch' : '^Persson'}) - baseurl = (mirrors[working_mirror] + - 'cgi-bin/nph-basic_connect?') - f = urllib.urlopen(baseurl, data) - ############################################ - ######## PARSE RESULTS - - page = result.readlines() - result.close() - - # start parsing the results - t = bfs(' '.join(page)) - tables = t.findAll('table') - - r = tables[1].findAll('td')[0] - y = r.findAll('strong')[0].contents[0] - nres = int(y) - if nres<1: - return 0 - - # get table with results - resulttable = tables[2] - # get the rows of the table - rows = resulttable.findAll('tr') - # get each result entry per list item - entries = [rows[i:i+3][1:] for i in scipy.arange(2,57,3)][:-1] - - ############################################ - ######## GET RESULTLIST - - ###### the problem with this is that web is in UNICODE, - # ie. special chars are represented by funny numbers and '\' - - #resultlist = [_Result(i) for i in entries] - return _Resultlist(entries) - - -############################################ -######## DEFINE RESULT(S) OBJECT - - -class _Resultlist: - """ - Internal object to represent the result list - """ - def __init__(self, entries): - self.resultlist = [_Result(i) for i in entries] - def sort(self,sortkey = 'author', reverse_bool = False): - from operator import itemgetter, attrgetter - #~ sorted(resultlist, key=attrgetter('author'), reverse=True) - return sorted(self.resultlist, key=attrgetter(sortkey), reverse = reverse_bool) - def __str__(self): - printlist = [] - for i in self.resultlist[:-1]: - printlist.append('Author : {0.author}\n' - 'Title : {0.title}\n' - 'Score : {0.ads_score}\n'.format(i)) - return '\n'.join(printlist) - -class _Result: - """ - Internal object to represent each result - """ - def __init__(self, entry): - #~ def __init__(self, author, - #~ authors, - #~ title, - #~ score, - #~ bibcode, - #~ pubdate, - #~ links): - #~ self.author = author - #~ self.authorlist = authors - #~ self.title = title - #~ self.score = score - #~ self.bibcode = bibcode - #~ self.pubdate = pubdate # parse? - #~ self.links = links # dictionary of all the links - # - td_tags0 = entry[0].findAll('td') - self.bibcode = td_tags0[1].findAll('input')[0]['value'].encode('UTF-8') - self.url_abstract_page = td_tags0[1].findAll('a')[0]['href'].encode('UTF-8') - self.ads_score = float(td_tags0[3].contents[0].encode('UTF-8')) - self.rank = 100 - self.ads_score - self.pubdate = td_tags0[4].contents[0].string.encode('UTF-8') - self.pubday = self.pubdate[:2] - self.pubyear = self.pubdate[3:] - # - self.links = dict() - for link in td_tags0[5].findAll('a'): - self.links[link.string.encode()] = link['href'].encode('UTF-8') - # - td_tags1 = entry[1].findAll('td') - - # second part of the result entry - self.title = td_tags1[3].contents[0].string.encode('UTF-8') - # still in unicode - # TODO need to convert to normal UTF, not unicode - authors = td_tags1[1].contents[0].encode('UTF-8').split(';') - if authors[-1] == ' ': - # so, if the last entry in the authorlist is empty, means - # it split a ';', which in turn means there are more - # authors, need to add that part... - authors[-1] = td_tags1[1].contents[1].contents[0].encode('UTF-8') + ', COAuth' - # - self.authors = [i.split(',') for i in authors] - self.author = ', '.join(self.authors[0]) - # - #~ self. - def __repr__(self): - return repr([self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate]) - def _returnlist_(self): - return [self.author, self.authors, self.title, self.url_abstract_page, self.ads_score, self.links, self.bibcode, self.pubdate] - -#~ # second part of the result entry -#~ title = td_tags1[3].contents[0].string.replace(u'\xa0', u' ').encode() -#~ # still in unicode -#~ # TODO need to convert to normal UTF, not unicode -#~ authors = td_tags1[1].string.replace(u'\xa0', u' ').encode().split(';') -#~ authors = [i.split(',') for i in authors] -#~ author = authors[0] - - - - -############################################ -######## RETURN SORTABLE OBJECT LIST - -############################################ -######## HOW TO SORT RESULTS -# needs Python 2.6 at least -#~ from operator import itemgetter, attrgetter -#~ -#~ # now to sort it, just use one of the keys -#~ # score, high to low -#~ sorted(resultlist, key=attrgetter('author'), reverse=True) -#~ -#~ # cmp=locale.strcoll new and untested addition -#~ -#~ # authors alphabetical order first and then by score -#~ # i.e. sort by score if same first author -#~ sorted(resultlist, key=attrgetter('ads_score','authors'), reverse=True) -######################################################################## From e86eca5858cbe694e68a4e38ad9a38f24cf2871c Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Thu, 15 Jan 2015 12:12:19 +0100 Subject: [PATCH 30/39] moved function to extract field from BFS structure to separate file utils.py --- astroquery/nasa_ads/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 astroquery/nasa_ads/utils.py diff --git a/astroquery/nasa_ads/utils.py b/astroquery/nasa_ads/utils.py new file mode 100644 index 0000000000..f9ac553400 --- /dev/null +++ b/astroquery/nasa_ads/utils.py @@ -0,0 +1,10 @@ + + + + +def get_field(record, field): + value = record.findAll(field) + if len(value) == 0: + return "" + else: + return value[0].text.encode("utf-8") From 40daf15fa439d1563ba3279ef2301653a238bcc3 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Sun, 15 Feb 2015 12:53:09 +0100 Subject: [PATCH 31/39] moved over to standard Python xml library, started parsing results into a AstroPy Table instance. --- astroquery/nasa_ads/__init__.py | 4 ++-- astroquery/nasa_ads/core.py | 24 +++++++++++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index 5bf30f69e0..cd9ff1bc0d 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -53,7 +53,7 @@ class Conf(_config.ConfigNamespace): ) timeout = _config.ConfigItem( - 60, + 120, 'Time limit for connecting to ADS server' ) @@ -63,7 +63,7 @@ class Conf(_config.ConfigNamespace): conf.adsfields = ['bibcode', 'title', 'author', 'affiliation', 'journal', 'volume', 'pubdate', 'page', 'lastpage', 'keywords', 'keyword', 'origin', 'copyright', 'link', 'name', 'url', 'count', 'score', 'citations', - 'abstract', 'doi', 'eprindit'] + 'abstract', 'doi', 'eprintid'] from .core import ADSClass, ADS diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 357979ded8..e86bb8017d 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -16,11 +16,14 @@ #~ from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf #~ from .utils import * +from astropy.table import Table, Column from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync -from BeautifulSoup import BeautifulSoup as bfs +#~ from BeautifulSoup import BeautifulSoup as bfs + +from xml.dom import minidom __all__ = ['ADS', 'ADSClass'] @@ -42,7 +45,7 @@ def __init__(self, *args): pass @class_or_instance - def query_simple(self, query_string, get_query_payload=False): + def query_simple(self, query_string, get_query_payload=False, get_raw_response=False): self.query_string = query_string request_payload = self._args_to_payload(query_string) @@ -52,10 +55,21 @@ def query_simple(self, query_string, get_query_payload=False): # someone a URL linking directly to the data if get_query_payload: return request_payload - - return self._parse_response(response) - + if get_raw_response: + return response + # parse the XML response into Beautiful Soup + #~ response_bfs = self._parse_response_to_bfs(response) + # + #self._parse_bfs_to_table(response_bfs) + self._parse_response(response) + + return response + def _parse_response(self, response): + xmlrepr = minidom.parseString(response.text.encode('utf-8')) + + + def _parse_response_to_bfs(self, response): # do something, probably with regexp's adssoup_raw = bfs(response.text) From cc157a497916986f611b61af39832f96ef659ae0 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Sun, 15 Mar 2015 15:25:22 +0100 Subject: [PATCH 32/39] now results are parsed into a Astropy Table table --- astroquery/nasa_ads/__init__.py | 1 - astroquery/nasa_ads/core.py | 125 ++++++++++++++++++++++++++------ astroquery/nasa_ads/utils.py | 21 ++++-- 3 files changed, 116 insertions(+), 31 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index cd9ff1bc0d..7bbe4d5ebd 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -10,7 +10,6 @@ #~ from astropy.config import ConfigurationItem from astropy import config as _config -from .utils import * class Conf(_config.ConfigNamespace): diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index e86bb8017d..f4f0b9ade7 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -20,7 +20,7 @@ from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync - +from .utils import * #~ from BeautifulSoup import BeautifulSoup as bfs from xml.dom import minidom @@ -61,34 +61,50 @@ def query_simple(self, query_string, get_query_payload=False, get_raw_response=F #~ response_bfs = self._parse_response_to_bfs(response) # #self._parse_bfs_to_table(response_bfs) - self._parse_response(response) + resulttable = self._parse_response(response) - return response + return resulttable def _parse_response(self, response): xmlrepr = minidom.parseString(response.text.encode('utf-8')) + # Check if there are any results! + # get the list of hits + hitlist = xmlrepr.childNodes[0].childNodes + hitlist = hitlist[1::2] # every second hit is a "line break" - def _parse_response_to_bfs(self, response): - # do something, probably with regexp's - - adssoup_raw = bfs(response.text) - adssoup_cooked = adssoup_raw.findAll('record') - result = adssoup_cooked - - # number of hits - nhits = len(adssoup_cooked) - if nhits == 0: - warnings.warn("No hits for the search \'{0}\'".format(self.query_string)) - return None + # Parse the results + # first single items + titles = get_data_from_xml(hitlist, 'title') + bibcode = get_data_from_xml(hitlist, 'bibcode') + journal = get_data_from_xml(hitlist, 'journal') + volume = get_data_from_xml(hitlist, 'volume') + pubdate = get_data_from_xml(hitlist, 'pubdate') + page = get_data_from_xml(hitlist, 'page') + score = get_data_from_xml(hitlist, 'score') + citations = get_data_from_xml(hitlist, 'citations') + abstract = get_data_from_xml(hitlist, 'abstract') + doi = get_data_from_xml(hitlist, 'DOI') + eprintid = get_data_from_xml(hitlist, 'eprintid') + #~ = get_data_from_xml(hitlist, '') + authors = get_data_from_xml(hitlist, 'author') + + t = Table() + t['title'] = titles + t['bibcode'] = bibcode + t['journal'] = journal + t['volume'] = volume + t['pubdate'] = pubdate + t['page'] = page + t['score'] = score + t['citations'] = citations + t['abstract'] = abstract + t['doi'] = doi + t['eprintid'] = eprintid + t['authors'] = authors - """ - Developer, how do you get list with all the fields? - Like this: - [tag.name for tag in adssoup_cooked[0].findAll()] - """ - return result - + return t + def _args_to_payload(self, query_string): # convert arguments to a valid requests payload # i.e. a dictionary @@ -97,3 +113,68 @@ def _args_to_payload(self, query_string): ADS = ADSClass() + + +""" +typical fields available: + +[u'bibcode', + u'title', + u'author', + u'author', + u'author', + u'affiliation', + u'journal', + u'volume', + u'pubdate', + u'page', + u'keywords', + u'keyword', + u'keyword', + u'keyword', + u'keyword', + u'keyword', + u'origin', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'link', + u'name', + u'url', + u'count', + u'link', + u'name', + u'url', + u'url', + u'score', + u'citations', + u'abstract', + u'doi', + u'eprintid'] + + +""" diff --git a/astroquery/nasa_ads/utils.py b/astroquery/nasa_ads/utils.py index f9ac553400..c3c98eab38 100644 --- a/astroquery/nasa_ads/utils.py +++ b/astroquery/nasa_ads/utils.py @@ -1,10 +1,15 @@ - - -def get_field(record, field): - value = record.findAll(field) - if len(value) == 0: - return "" - else: - return value[0].text.encode("utf-8") +def get_data_from_xml(doclist, fieldname, nohitreturn=None): + result = [] + for element in doclist: + fieldlist = element.getElementsByTagName(fieldname) + try: + tmp = fieldlist[0] + except IndexError: + fields = [nohitreturn] + fields = [] + for field in fieldlist: # this is useful for e.g. author field + fields.append(field.childNodes[0].data.encode("utf-8")) + result.append(fields) + return result From 1a659ec27cbcc27b296c7e5bea90f14b4efa18ed Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Mon, 13 Apr 2015 09:55:03 +0200 Subject: [PATCH 33/39] added nasa_ads to index.rst and cleaned up the docs for nasa_ads.rst a bit --- docs/index.rst | 2 ++ docs/nasa_ads/nasa_ads.rst | 15 --------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index ce8bd9f7d4..ab0a50b25a 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -132,6 +132,7 @@ The following modules have been completed using a common API: atomic/atomic.rst alma/alma.rst skyview/skyview.rst + nasa_ads/nasa_ads.rst These others are functional, but do not follow a common & consistent API: @@ -225,6 +226,7 @@ above categories. lamda/lamda.rst nist/nist.rst splatalogue/splatalogue.rst + nasa_ads/nasa_ads.rst Developer documentation diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 472ec54461..1d20c526dd 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -10,21 +10,6 @@ Getting Started =============== This module provides an interface to the online `SAO/NASA Astrophysics Data System`_. -It will check all the ADS mirrors, currently given by the following list: - -- http://adswww.harvard.edu/ -- http://cdsads.u-strasbg.fr/ -- http://ukads.nottingham.ac.uk/ -- http://esoads.eso.org/ -- http://ads.ari.uni-heidelberg.de/ -- http://ads.inasan.ru/ -- http://ads.nao.ac.jp/ -- http://ads.iucaa.ernet.in/ -- http://ads.arsip.lipi.go.id/ -- http://saaoads.chpc.ac.za/ -- http://ads.on.br/ - - Examples ======== From c4b63c5d93b0b206d637589d97b122bba637423c Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Tue, 21 Apr 2015 15:26:18 +0200 Subject: [PATCH 34/39] added some docs and one test (my first test ever!). --- astroquery/nasa_ads/tests/test_nasaads.py | 5 +++++ docs/nasa_ads/nasa_ads.rst | 24 +++++++++++++++++++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/astroquery/nasa_ads/tests/test_nasaads.py b/astroquery/nasa_ads/tests/test_nasaads.py index e69de29bb2..3acde388d5 100644 --- a/astroquery/nasa_ads/tests/test_nasaads.py +++ b/astroquery/nasa_ads/tests/test_nasaads.py @@ -0,0 +1,5 @@ +from ... import nasa_ads + +def test_simple(patch_post): + x = nasa_ads.ADS.query_simple('^Persson Origin of water around deeply embedded low-mass protostars') + assert x[-1]['authors'][0] == 'Persson, M. V.' diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index 1d20c526dd..bdd54493d5 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -10,26 +10,42 @@ Getting Started =============== This module provides an interface to the online `SAO/NASA Astrophysics Data System`_. +At the moment only the "simple search", i.e. omni-box search is available, and only +a subset of the results are accessible. Examples ======== - Search works by specific identifier ----------------------------------- +from astroquery import nasa_ads as na +# the "^" makes ADS to return only papers where Persson +# is first author +results = na.ADS.query_simple('^Persson Origin of water\ + around deeply embedded low-mass protostars') results[0].title + +# to sort after publication date +results.sort(['pubdate']) + +# get the title of the last hit +title = results[-1]['title'][0] + +# printout the authors of the last hit +print results[-1]['authors'] + Get links --------- - +Not yet implemented. Download publisher/ArXiv PDF ---------------------------- - +Not yet implemented. Get Bibtex ---------- - +Not yet implemented. From 47cffb062cd82a4624bdb22400a57290a8f559ad Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Tue, 21 Apr 2015 15:43:13 +0200 Subject: [PATCH 35/39] changed the test, removed patch_post --- astroquery/nasa_ads/tests/test_nasaads.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/astroquery/nasa_ads/tests/test_nasaads.py b/astroquery/nasa_ads/tests/test_nasaads.py index 3acde388d5..04817787e8 100644 --- a/astroquery/nasa_ads/tests/test_nasaads.py +++ b/astroquery/nasa_ads/tests/test_nasaads.py @@ -1,5 +1,6 @@ from ... import nasa_ads +from astropy.tests.helper import pytest -def test_simple(patch_post): +def test_simple(): x = nasa_ads.ADS.query_simple('^Persson Origin of water around deeply embedded low-mass protostars') assert x[-1]['authors'][0] == 'Persson, M. V.' From 9f3e83ceca7ffa9164f4825def52dc90cd6d3f64 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Tue, 21 Apr 2015 16:12:02 +0200 Subject: [PATCH 36/39] changed the test, added decorator remote_data --- astroquery/nasa_ads/tests/test_nasaads.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/astroquery/nasa_ads/tests/test_nasaads.py b/astroquery/nasa_ads/tests/test_nasaads.py index 04817787e8..9258d879e4 100644 --- a/astroquery/nasa_ads/tests/test_nasaads.py +++ b/astroquery/nasa_ads/tests/test_nasaads.py @@ -1,6 +1,7 @@ from ... import nasa_ads -from astropy.tests.helper import pytest +from astropy.tests.helper import remote_data +@remote_data def test_simple(): x = nasa_ads.ADS.query_simple('^Persson Origin of water around deeply embedded low-mass protostars') assert x[-1]['authors'][0] == 'Persson, M. V.' From bd3422424f92c456e40eaabd1b96d0452c06be8e Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Thu, 23 Apr 2015 14:31:12 +0200 Subject: [PATCH 37/39] corrected docs/nasa_ads/nasa_ads.rst code block --- astroquery/nasa_ads/core.py | 6 +++--- astroquery/nasa_ads/utils.py | 3 ++- docs/nasa_ads/nasa_ads.rst | 27 ++++++++++++++------------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index f4f0b9ade7..9f1e418ae5 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -61,12 +61,12 @@ def query_simple(self, query_string, get_query_payload=False, get_raw_response=F #~ response_bfs = self._parse_response_to_bfs(response) # #self._parse_bfs_to_table(response_bfs) - resulttable = self._parse_response(response) + resulttable = self._parse_response(response.encode(results.encoding).decode('utf-8')) - return resulttable + return resulttable def _parse_response(self, response): - xmlrepr = minidom.parseString(response.text.encode('utf-8')) + xmlrepr = minidom.parseString(response.text) # Check if there are any results! # get the list of hits diff --git a/astroquery/nasa_ads/utils.py b/astroquery/nasa_ads/utils.py index c3c98eab38..3f963b0f2f 100644 --- a/astroquery/nasa_ads/utils.py +++ b/astroquery/nasa_ads/utils.py @@ -10,6 +10,7 @@ def get_data_from_xml(doclist, fieldname, nohitreturn=None): fields = [nohitreturn] fields = [] for field in fieldlist: # this is useful for e.g. author field - fields.append(field.childNodes[0].data.encode("utf-8")) + #~ fields.append(field.childNodes[0].data.decode("utf-8")) + fields.append(field.childNodes[0].data) result.append(fields) return result diff --git a/docs/nasa_ads/nasa_ads.rst b/docs/nasa_ads/nasa_ads.rst index bdd54493d5..033ef9fbb6 100644 --- a/docs/nasa_ads/nasa_ads.rst +++ b/docs/nasa_ads/nasa_ads.rst @@ -18,21 +18,22 @@ Examples Search works by specific identifier ----------------------------------- +.. code-block:: python -from astroquery import nasa_ads as na -# the "^" makes ADS to return only papers where Persson -# is first author -results = na.ADS.query_simple('^Persson Origin of water\ + from astroquery import nasa_ads as na + # the "^" makes ADS to return only papers where Persson + # is first author + results = na.ADS.query_simple('^Persson Origin of water\ around deeply embedded low-mass protostars') results[0].title - -# to sort after publication date -results.sort(['pubdate']) - -# get the title of the last hit -title = results[-1]['title'][0] - -# printout the authors of the last hit -print results[-1]['authors'] + + # to sort after publication date + results.sort(['pubdate']) + + # get the title of the last hit + title = results[-1]['title'][0] + + # printout the authors of the last hit + print results[-1]['authors'] Get links From ef9412fef835d300fafbc0a9924a49da73833e15 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Thu, 23 Apr 2015 17:56:02 +0200 Subject: [PATCH 38/39] cleanup of comments --- astroquery/nasa_ads/__init__.py | 52 --------------- astroquery/nasa_ads/core.py | 115 +++++--------------------------- astroquery/nasa_ads/utils.py | 6 +- 3 files changed, 22 insertions(+), 151 deletions(-) diff --git a/astroquery/nasa_ads/__init__.py b/astroquery/nasa_ads/__init__.py index 7bbe4d5ebd..f5cf998f7e 100644 --- a/astroquery/nasa_ads/__init__.py +++ b/astroquery/nasa_ads/__init__.py @@ -7,8 +7,6 @@ """ -#~ from astropy.config import ConfigurationItem - from astropy import config as _config @@ -41,16 +39,10 @@ class Conf(_config.ConfigNamespace): '/cgi-bin/nph-abs_connect', 'Path for advanced query (unconfirmed)' ) - - #~ simple_path = _config.ConfigItem( - #~ '/cgi-bin/nph-basic_connect', - #~ 'Path for simple query' - #~ ) simple_path = _config.ConfigItem( '/cgi-bin/basic_connect', 'Path for simple query (return XML)' ) - timeout = _config.ConfigItem( 120, 'Time limit for connecting to ADS server' @@ -68,47 +60,3 @@ class Conf(_config.ConfigNamespace): __all__ = ['ADSClass', 'ADS', 'Conf', 'conf'] - -#~ class Conf(_config.ConfigNamespace): - #~ """ - #~ Configuration parameters for `astroquery.nasa_ads`. - #~ """ - #~ servers = _config.ConfigItem( - #~ ['http://adswww.harvard.edu', - #~ 'http://cdsads.u-strasbg.fr', - #~ 'http://ukads.nottingham.ac.uk', - #~ 'http://esoads.eso.org', - #~ 'http://ads.ari.uni-heidelberg.de', - #~ 'http://ads.inasan.ru', - #~ 'http://ads.mao.kiev.ua', - #~ 'http://ads.astro.puc.cl', - #~ 'http://ads.nao.ac.jp', - #~ 'http://ads.bao.ac.cn', - #~ 'http://ads.iucaa.ernet.in', - #~ 'http://ads.arsip.lipi.go.id',lastpage - #~ 'http://saaoads.chpc.ac.za', - #~ 'http://ads.on.br'],lastpage - #~ 'SAO/NASA ADS mirrors around the world' - #~ ) - #~ - #~ advanced_url = _config.ConfigurationItem( - #~ '/cgi-bin/nph-abs_connect', - #~ 'Path for advanced query' - #~ ) - #~ - #~ simple_url = _config.ConfigurationItem( - #~ '/cgi-bin/nph-basic_connect', - #~ 'Path for simple query' - #~ ) -#~ - #~ timeout = _config.ConfigurationItem( - #~ 60, - #~ 'Time limit for connecting to ADS server' - #~ ) -#~ -#~ conf = Conf() -#~ -#~ from .core import ADS, ADSClass -#~ -#~ __all__ = ['ADS', 'ADSClass', - #~ 'Conf', 'conf'] diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 9f1e418ae5..82e7b03e5d 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -7,21 +7,14 @@ """ import warnings -# example warning -# warnings.warn("Band was specified, so blabla is overridden") -#~ from astropy.io import ascii -#~ from astropy import units as u from ..query import BaseQuery from ..utils import commons, async_to_sync -#~ from ..utils.docstr_chompers import prepend_docstr_noreturns from . import conf -#~ from .utils import * from astropy.table import Table, Column from ..utils.class_or_instance import class_or_instance from ..utils import commons, async_to_sync from .utils import * -#~ from BeautifulSoup import BeautifulSoup as bfs from xml.dom import minidom @@ -29,8 +22,7 @@ @async_to_sync class ADSClass(BaseQuery): - - ####### FROM SPLATALOGUE + SERVER = conf.server QUERY_ADVANCED_PATH = conf.advanced_path QUERY_SIMPLE_PATH = conf.simple_path @@ -39,7 +31,6 @@ class ADSClass(BaseQuery): QUERY_SIMPLE_URL = SERVER + QUERY_SIMPLE_PATH QUERY_ADVANCED_URL = SERVER + QUERY_ADVANCED_PATH - ######## FROM API DOCS def __init__(self, *args): """ set some parameters """ pass @@ -50,17 +41,14 @@ def query_simple(self, query_string, get_query_payload=False, get_raw_response=F request_payload = self._args_to_payload(query_string) response = commons.send_request(self.QUERY_SIMPLE_URL, request_payload, self.TIMEOUT) - + # primarily for debug purposes, but also useful if you want to send # someone a URL linking directly to the data if get_query_payload: return request_payload if get_raw_response: return response - # parse the XML response into Beautiful Soup - #~ response_bfs = self._parse_response_to_bfs(response) - # - #self._parse_bfs_to_table(response_bfs) + # parse the XML response into AstroPy Table resulttable = self._parse_response(response.encode(results.encoding).decode('utf-8')) return resulttable @@ -73,22 +61,20 @@ def _parse_response(self, response): hitlist = xmlrepr.childNodes[0].childNodes hitlist = hitlist[1::2] # every second hit is a "line break" - # Parse the results - # first single items - titles = get_data_from_xml(hitlist, 'title') - bibcode = get_data_from_xml(hitlist, 'bibcode') - journal = get_data_from_xml(hitlist, 'journal') - volume = get_data_from_xml(hitlist, 'volume') - pubdate = get_data_from_xml(hitlist, 'pubdate') - page = get_data_from_xml(hitlist, 'page') - score = get_data_from_xml(hitlist, 'score') - citations = get_data_from_xml(hitlist, 'citations') - abstract = get_data_from_xml(hitlist, 'abstract') - doi = get_data_from_xml(hitlist, 'DOI') - eprintid = get_data_from_xml(hitlist, 'eprintid') - #~ = get_data_from_xml(hitlist, '') - authors = get_data_from_xml(hitlist, 'author') - + # Grab the various fields + titles = _get_data_from_xml(hitlist, 'title') + bibcode = _get_data_from_xml(hitlist, 'bibcode') + journal = _get_data_from_xml(hitlist, 'journal') + volume = _get_data_from_xml(hitlist, 'volume') + pubdate = _get_data_from_xml(hitlist, 'pubdate') + page = _get_data_from_xml(hitlist, 'page') + score = _get_data_from_xml(hitlist, 'score') + citations = _get_data_from_xml(hitlist, 'citations') + abstract = _get_data_from_xml(hitlist, 'abstract') + doi = _get_data_from_xml(hitlist, 'DOI') + eprintid = _get_data_from_xml(hitlist, 'eprintid') + authors = _get_data_from_xml(hitlist, 'author') + # put into AstroPy Table t = Table() t['title'] = titles t['bibcode'] = bibcode @@ -112,69 +98,4 @@ def _args_to_payload(self, query_string): -ADS = ADSClass() - - -""" -typical fields available: - -[u'bibcode', - u'title', - u'author', - u'author', - u'author', - u'affiliation', - u'journal', - u'volume', - u'pubdate', - u'page', - u'keywords', - u'keyword', - u'keyword', - u'keyword', - u'keyword', - u'keyword', - u'origin', - u'link', - u'name', - u'url', - u'link', - u'name', - u'url', - u'link', - u'name', - u'url', - u'link', - u'name', - u'url', - u'link', - u'name', - u'url', - u'count', - u'link', - u'name', - u'url', - u'count', - u'link', - u'name', - u'url', - u'count', - u'link', - u'name', - u'url', - u'link', - u'name', - u'url', - u'count', - u'link', - u'name', - u'url', - u'url', - u'score', - u'citations', - u'abstract', - u'doi', - u'eprintid'] - - -""" +ADS = ADSClass() \ No newline at end of file diff --git a/astroquery/nasa_ads/utils.py b/astroquery/nasa_ads/utils.py index 3f963b0f2f..b8da20a56f 100644 --- a/astroquery/nasa_ads/utils.py +++ b/astroquery/nasa_ads/utils.py @@ -1,6 +1,9 @@ -def get_data_from_xml(doclist, fieldname, nohitreturn=None): +def _get_data_from_xml(doclist, fieldname, nohitreturn=None): + """Get the fieldname (i.e. author, title etc) + from minidom.parseString().childNodes[0].childNodes list + """ result = [] for element in doclist: fieldlist = element.getElementsByTagName(fieldname) @@ -10,7 +13,6 @@ def get_data_from_xml(doclist, fieldname, nohitreturn=None): fields = [nohitreturn] fields = [] for field in fieldlist: # this is useful for e.g. author field - #~ fields.append(field.childNodes[0].data.decode("utf-8")) fields.append(field.childNodes[0].data) result.append(fields) return result From 2aae04ff497af434732674cf13381a7fba0dd430 Mon Sep 17 00:00:00 2001 From: Magnus Persson Date: Thu, 23 Apr 2015 17:57:09 +0200 Subject: [PATCH 39/39] cleanup of comments --- astroquery/nasa_ads/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/astroquery/nasa_ads/core.py b/astroquery/nasa_ads/core.py index 82e7b03e5d..b93f155b4d 100644 --- a/astroquery/nasa_ads/core.py +++ b/astroquery/nasa_ads/core.py @@ -98,4 +98,5 @@ def _args_to_payload(self, query_string): -ADS = ADSClass() \ No newline at end of file +ADS = ADSClass() +