In [46]:
import requests
import bs4
from pubmed_lookup import PubMedLookup, Publication
from elsapy.elsclient import ElsClient
from elsapy.elsdoc import FullDoc

## Load configuration for Elsevier
con_file = open("config.json")
config = json.load(con_file)
con_file.close()

## Initialize client for Elsevier
client = ElsClient(config['apikey'])

## scrape ABIDEII site for publication links

r = requests.get('http://fcon_1000.projects.nitrc.org/indi/abide/manuscripts.html')

soup = bs4.BeautifulSoup(r.text,'html.parser')

## Get list of links that are not subjects lists,
## that have pubmed ids at end, without ieee papers

pmids = [link['href'] for link in soup.find_all('a') \
         if (link['href'][-3:].isdigit()) \
         and ('sublist' not in link['href']) \
         and not (link['href'].startswith('http://ieeexplore.ieee.org'))
        ]

with open('summary.txt','a') as outfile:

    # NCBI will contact user by email if excessive queries are detected
    email = 'alistair.walsh@unimelb.edu.au'
    for url in pmids:
        interface = url.rsplit('/',2)[-2] # second last subdomain is either pubmed or pii 
        
        if interface == 'pubmed':
            print(url)
            lookup = PubMedLookup(url, email)
            try:
                publication = Publication(lookup) 

                print(
                """
TITLE:{title}
AUTHORS:{authors}
JOURNAL:{journal}
YEAR:{year}
URL:{url}
PUBMED:{pubmed}
ABSTRACT:\n{abstract}\n
                """
                .format(**{
                    'title': publication.title,
                    'authors': publication.authors,
                    'journal': publication.journal,
                    'year': publication.year,
                    'url': publication.url,
                    'pubmed': publication.pubmed_url,
                    'abstract': repr(publication.abstract),
                }),file=outfile)

            except:
                print("###### couldn't get url = ",url)
        
        elif interface == 'pii':
            print(url)
            id_num = url.rsplit('/',2)[-1] # elsapy requires id number (pubmed_lookup requires url)
            ## ScienceDirect (full-text) document example using PII
            pii_doc = FullDoc(sd_pii = id_num)
            if pii_doc.read(client):
                print(
                """
TITLE:{title}
AUTHORS:{authors}
JOURNAL:{journal}
YEAR:{year}
URL:{url}
PII:{sciencedirect}
ABSTRACT:\n{abstract}
                """
                .format(**{
                    'title': pii_doc.title,
                    'authors': '; '.join([a['$'] for a in pii_doc.data['coredata']['dc:creator']]),
                    'journal': pii_doc.data['coredata']['prism:publicationName'],
                    'year': pii_doc.data['coredata']['prism:coverDate'].split('-')[1],
                    'url': pii_doc.data['coredata']['prism:url'],
                    'sciencedirect': id_num,
                    'abstract': pii_doc.data['coredata']['dc:description'],
                }),file=outfile)
                
                print ("pii_doc.title: ", pii_doc.title)
                pii_doc.write()   
            else:
                print ("Read document failed.")
        else:
            print('failed')

http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
###### couldn't get url =  http://www.ncbi.nlm.nih.gov/pubmed/27358766
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.nlm.nih.gov in pubmed
http://www.ncbi.n