In [1]:
import sys, signal
from PyQt4 import QtCore, QtGui, QtWebKit

class WebPage(QtWebKit.QWebPage):  
    def __init__(self, db):
        self._db = db
        QtWebKit.QWebPage.__init__(self)
        self.mainFrame().loadFinished.connect(self.handleLoadFinished)

    def process(self, items):
        self._items = iter(items)
        self.fetchNext() 
    
    def fetchNext(self):
        try:
            self._url, self._func = next(self._items)
            self.mainFrame().load(QtCore.QUrl(self._url))
        except StopIteration:
            return False
        return True
    
    def handleLoadFinished(self):
        self._func(self._url, self.mainFrame().toHtml())
        if not self.fetchNext():
            print('CONGRATULATIONS # processing complete')
            QtGui.qApp.quit()

    

In [2]:
# load the list of editions conveniently saved as json

import json

load_path = "data/ploscompbio_editions_throughSeptember2018.json"

with open(load_path, 'r') as infile:  
    list_of_editions = json.load(infile)
    
#print(list_of_editions)

In [3]:
# initialize database

import sqlite3

db = sqlite3.connect('data/plos_cb_abstracts.db')
cursor = db.cursor()
cursor.execute('''CREATE TABLE plos_cb_abstracts(id INTEGER PRIMARYKEY,
                    url TEXT, authors TEXT, date TEXT, title TEXT, abstract TEXT, author_summary TEXT)
                    ''')


db.commit()





In [None]:
# note - using global db variable here

import urllib
from bs4 import BeautifulSoup

def process_url(url,html):
    print(url)
    print('testing database entry...')
    
    # get the list of articles from the edition
    r = str(html.toAscii())
    edition_soup = BeautifulSoup(r,"lxml")
    edition_sections = edition_soup.select('.section')
    for section in edition_sections:
        sectionID = section.select('a')[0]['id']
        print(sectionID)
        if sectionID == "Research_Article":
            research_articles_section = section
    research_articles = research_articles_section.select('.item')
        
    #try:
    #    research_articles_section = edition_soup.select('.section')[4]
    #except:
    #    research_articles_section = edition_soup.select('.section')[3] # for the javascript loaded pages
    #research_articles = research_articles_section.select('.item')

    for sample_article in research_articles:
        url = sample_article.select('a')[1].text
        print(url)
        
        authors = sample_article.select('p')[0].text
        #authors = authors.split(',') # todo normalize names for whitespace, \n, etc # wait, no do this on removal from database

        date = sample_article.select('p')[1].text.split('|')[0]
        date = date.split('\n')[1] # todo express this as a datetime object (check formatting across articles)

        title = sample_article.select('a')[0].text
        journal = "PLOS Computational Biology" 
        
        print("requesting article...")
        # go the the article url and read text
        r = urllib.urlopen(url).read()
        article_soup = BeautifulSoup(r,"lxml")
        try:
            abstract = article_soup.select("div.abstract")[0].text[8:] # get abstract from html & pick off first 8 characters,
                                        # these characters read, "Abstract"
        except:
            abstract = "No abstract found"  # todo - the abstract is probably just in another section of unlabeled text
        try:
            author_summary = article_soup.select("div.abstract")[1].text[15:] # "author summary" 
        except: # a few articles don't have author summaries
            author_summary = "No author summary found" # todo - the author summary is probably just in another section of unlabeled text
        
        article_data_object = {"url":url, "authors":authors, "date":date, "title":title,"abstract":abstract,
                                  "author_summary":author_summary}
        
        # add article data object to a database        
        cursor.execute('''INSERT INTO plos_cb_abstracts(url, authors, date, title, abstract, author_summary)
                VALUES(:url, :authors, :date, :title, :abstract, :author_summary)''',
                article_data_object)

        db.commit()
        print('entry added to database')

In [None]:
edition_urls = [ed['url'] for ed in list_of_editions]
edition_urls = edition_urls

items = [ (url,process_url) for url in edition_urls]

signal.signal(signal.SIGINT, signal.SIG_DFL)
print('press ctrl+c to quit\n')
app = QtGui.QApplication(sys.argv)
webpage = WebPage(db)
webpage.process(items)
sys.exit(app.exec_())

db.close()
print('database closed')


press ctrl+c to quit

http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v14.i01
testing database entry...
Cover
Editorial
Education
Research_Article
https://doi.org/10.1371/journal.pcbi.1005973
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005931
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005953
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005949
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005968
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005951
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005943
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005930
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005944
requesting article...
entry added to database
https://doi.org/

entry added to database
https://doi.org/10.1371/journal.pcbi.1005959
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005987
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v14.i03
testing database entry...
Cover
Editorial
Education
Research_Article
https://doi.org/10.1371/journal.pcbi.1006054
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006078
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006080
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005977
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005877
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006070
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006032
requesting article...
entry added to database
https://doi.or

entry added to database
https://doi.org/10.1371/journal.pcbi.1006101
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006096
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006094
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006082
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006060
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006040
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005996
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006079
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006095
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006067
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006044
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1006241
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006208
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006167
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006204
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006233
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006222
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006201
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006145
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006229
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006195
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1006143
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005306
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005228
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005338
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005327
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005319
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005309
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005299
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005291
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005257
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005337
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005316
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005449
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005397
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005446
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005427
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005445
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005394
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005432
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005433
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005385
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005415
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005423
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005528
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005517
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005495
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005541
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005539
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005537
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005536
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005515
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005530
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005527
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005418
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005654
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005604
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005667
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005676
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005671
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005657
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005647
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005627
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005631
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005607
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005554
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005779
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005768
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005767
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005617
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005750
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005707
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005634
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005754
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005744
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005763
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005766
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005773
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005762
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005662
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v13.i11
testing database entry...
Cover
Research_Article
https://doi.org/10.1371/journal.pcbi.1005793
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005878
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005864
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005851
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005806
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005860
requesting article...
entry added to database
https://doi.org/10.1371/journal.pc

entry added to database
https://doi.org/10.1371/journal.pcbi.1004715
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004664
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004690
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004709
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004710
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004714
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004712
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004654
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004681
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004701
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004663
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1004775
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004754
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004768
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004793
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004801
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004813
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004823
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004814
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004818
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004734
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004822
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1004849
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004855
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004856
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004861
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v12.i05
testing database entry...
Cover
Message_from_ISCB
Perspective
Research_Article
https://doi.org/10.1371/journal.pcbi.1004949
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004905
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004910
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004925
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004962
requesting article...
entry added to database
http

entry added to database
https://doi.org/10.1371/journal.pcbi.1004904
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004876
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004963
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004970
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004980
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004958
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004974
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004817
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004840
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004898
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004924
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005080
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005062
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005082
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005041
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005073
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005095
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005021
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005023
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005076
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005033
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005045
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005148
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005152
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005158
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005160
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005161
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005164
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005166
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005167
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005169
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005175
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005177
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1005294
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005295
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005296
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005298
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005300
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005302
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1005315
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v11.i01
testing database entry...
Cover
Editorial
Message_from_ISCB
Perspective
Research_Article
https://doi.org/10.1371/journal.pcbi.1004077
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004039
requesting article...
entry added to dat

entry added to database
https://doi.org/10.1371/journal.pcbi.1004129
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004157
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004164
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004172
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004186
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004089
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004117
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004104
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004055
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004030
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004082
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1004135
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004166
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004190
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004193
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004128
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004179
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004198
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004141
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v11.i05
testing database entry...
Cover
Perspective
Topic_Page
Research_Article
Formal_Comment
https://doi.org/10.1371/journal.pcbi.1004301
requesting article...
entry added to datab

entry added to database
https://doi.org/10.1371/journal.pcbi.1004222
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004288
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004306
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004313
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v11.i07
testing database entry...
Cover
Editorial
Review
Research_Article
https://doi.org/10.1371/journal.pcbi.1004266
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004379
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004425
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004360
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004383
requesting article...
entry added to database
https://doi.org/1

entry added to database
https://doi.org/10.1371/journal.pcbi.1004436
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004358
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004373
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v11.i09
testing database entry...
Cover
Editorial
Perspective
Research_Article
https://doi.org/10.1371/journal.pcbi.1004498
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004391
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004523
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004392
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004335
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004402
requesting article...
entry added to database
https://doi.

entry added to database
https://doi.org/10.1371/journal.pcbi.1004518
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004520
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004551
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004553
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004292
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004430
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004453
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004542
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004560
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004289
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004314
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1004554
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004567
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004581
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004599
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004540
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004556
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004571
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v11.i12
testing database entry...
Cover
Review
Research_Article
https://doi.org/10.1371/journal.pcbi.1004483
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1004636
requesting article...
entry added to database
https://doi.org/10.1371/jou

entry added to database
https://doi.org/10.1371/journal.pcbi.1003428
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003424
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003400
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003440
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003426
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003435
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003439
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003432
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003457
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003438
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003416
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1003491
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003526
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003538
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003549
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003528
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003515
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003502
requesting article...
entry added to database
http://journals.plos.org/ploscompbiol/issue?id=10.1371/issue.pcbi.v10.i04
testing database entry...
Cover
Editorial
Message_from_ISCB
Education
Perspective
Review
Research_Article
Correction
https://doi.org/10.1371/journal.pcbi.1003318
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003534
requesting a

entry added to database
https://doi.org/10.1371/journal.pcbi.1003638
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003641
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003630
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003636
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003634
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003605
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003642
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003578
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003654
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003626
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003639
requesting article...

entry added to database
https://doi.org/10.1371/journal.pcbi.1003705
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003707
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003711
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003714
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003715
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003720
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003725
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003726
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003729
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003733
requesting article...
entry added to database
https://doi.org/10.1371/journal.pcbi.1003683
requesting article...

IndexError: list index out of range

In [None]:
# PLOS makes this much easier in practice: https://www.plos.org/text-and-data-mining
# accessing the plos provided sqlite database - http://conference.scipy.org/proceedings/scipy2018/pdfs/elizabeth_seiver.pdf

# info for practicing spark: Googles and Microsofts free offering have already been described, so I'll add AWS. 750 hours of t2.micros. You could run 750 instances all at once for an hour if you liked. More than enough to practice Spark.
#But then unless your PC is underpowered then I'd guess you already have enough to practice. Spin up two 2 core VMs. An i5 should be able to handle that with ease.