# Making a DataFrame Containing Euler's Publications from Webpage

## Makes usable screen bigger

In [540]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

## Packages & definitions

In [541]:
# Necessary packages:
import requests
import re
import os
import shutil
import sys
import pandas as pd
pd.set_option("display.max_colwidth", 500)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 1000)

# If not yet installed, install these packages now:
try:
    from bs4 import BeautifulSoup as bs
    import PyPDF2
except ImportError:
    os.system('python -m pip install bs4')
    os.system('python -m pip install PyPDF2')
    from bs4 import BeautifulSoup as bs
    import PyPDF2

In [542]:
def makeSoup(url):
    response = requests.get(url).content
    soup = bs(response, 'html.parser')
    return soup

In [543]:
def findPub(soup):
    pubs = soup.find_all(id="series-home")[0]
    publist = pubs.find_all(class_="article-listing")
    pdfs = pubs.find_all(class_="pdf")
    pdfurls = []
    pdfnumbers = []
    n = 0
    for pdf in pdfs:
        pdfurls.append(pdf.a['href'])
        pdfnumbers.append(str(int(pdf.a['href'].split('=')[1].split('&')[0][1:])+1))
        n = n + 1
    urls = []
    titles = []
    numbers = []
    for pub in publist:
        urls.append(pub.a['href'])
        titles.append(re.sub(r'E\d{1,3}: ', '', pub.get_text().replace('&pm', '±')))
        try:
            numbers.append(str(int(pub.get_text().split(': ')[0].replace('E', ''))))
        except: 
            numbers.append('870')
    return pdfurls, pdfnumbers, urls, titles, numbers

In [544]:
def findInfo(soup):
    pubs = soup.find_all(id="alpha")[0]
    headings = pubs.find_all('h4')
    for i in range(len(headings)):
        headings[i] = headings[i].get_text()
    texts = pubs.find_all('p')
    for i in range(len(texts)):
        try:
            texts[i] = texts[i].a.strong.get_text()
        except:
            try:
                texts[i] = texts[i].a.get_text()
            except:
                texts[i] = texts[i].get_text()
        
    return headings, texts

## Grapping all publications

In [545]:
arcUrlList = ['https://scholarlycommons.pacific.edu/euler-works/index.html'] + ['https://scholarlycommons.pacific.edu/euler-works/index.' + str(i) + '.html' for i in range(2,10)]

In [546]:
soupsPages = [makeSoup(url) for url in arcUrlList]

In [547]:
pdfurls = []
pdfnumbers = []
urls = []
titles = []
numbers = []
for soup in soupsPages:
    pdfurl, pdfnumber, url, title, number = findPub(soup)
    pdfurls = pdfurls + pdfurl
    pdfnumbers = pdfnumbers + pdfnumber
    urls = urls + url
    titles = titles + title
    numbers = numbers + number

In [548]:
dfPDF = pd.DataFrame({'URLpdf': pdfurls, 'number': pdfnumbers})
dfPub = pd.DataFrame({'URLinfo': urls, 'title': titles, 'number': numbers})
dfEulers = dfPub.merge(dfPDF, 'outer', 'number')

In [549]:
dfEulers.URLpdf[220] = dfEulers.URLpdf[867]
dfEulers = dfEulers[:-1]

## Gathering all the information about the publications

In [550]:
urllist = list(dfEulers.URLinfo)

In [498]:
# This step takes quite a while for all publications by Euler. Be prepared to wait!
soups = [makeSoup(url) for url in urllist]

In [551]:
headings_ = []
texts_ = []

for soup in soups:
    headings, texts = findInfo(soup)
    headings_ = headings_ + [headings]
    texts_ = texts_ + [texts]

In [552]:
infos = []
for i in range(len(headings_)):
    info = {}
    for j in range(len(headings_[i])):
        info[headings_[i][j]] = texts_[i][j]
    infos.append(info)

In [553]:
dfInfo = pd.DataFrame(infos)
dfEuler = dfEulers.join(dfInfo)

In [557]:
dfEuler = dfEuler[['Enestrom Number', 'Fuss Index', 'Title', 'English Title',
       'Authors', 'Original Language', 'Published as',
       'Published Date', 'Written Date', 'Original Source Citation', 'Archive Notes',
       'Content Summary', 'Opera Omnia Citation', 'Record Created','URLinfo', 'URLpdf']]

In [558]:
dfEuler

Unnamed: 0,Enestrom Number,Fuss Index,Title,English Title,Authors,Original Language,Published as,Published Date,Written Date,Original Source Citation,Archive Notes,Content Summary,Opera Omnia Citation,Record Created,URLinfo,URLpdf
0,1,,Constructio linearum isochronarum in medio quocunque resistente,Construction of isochronous curves in a resistant medium,Leonhard Euler,Latin,Journal article,1726,1725,"Acta Eruditorum, Volume 1726, pp. 361-363.",,,"Series 2, Volume 6, pp.1-3.",2018-09-25,https://scholarlycommons.pacific.edu/euler-works/1,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1000&context=euler-works
1,2,500,Dissertatio physica de sono,Physical dissertation on sound,Leonhard Euler,Latin,Quarto book,1727,1727,"Basel: E. and J. R. Thurnisiorum, Volume 1727, pp. 208-226.","Euler wrote this Habilitation thesis as part of his application to the Physics chair of the University of Basel in 1727 (it was customary to present a paper of one's research, to demonstrate that one might be of service to the institution). It was reprinted in 1751 in a collection of papers edited by Albrecht von Haller in Göttingen.",Euler first explains his theory of what makes up the atmosphere; the basis of his theory lies in the theory of elasticity of the teacher he had before Johann Bernoulli. He also states (without proof) a formula for the speed of propagation and derives from it numerical values of the correct order of magnitude for air.,"Series 3, Volume 1, pp.181-196.",2018-09-25,https://scholarlycommons.pacific.edu/euler-works/2,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1001&context=euler-works
2,3,,Methodus inveniendi traiectorias reciprocas algebraicas,On a method for algebraic reciprocal trajectories,Leonhard Euler,Latin,Journal article,1727,1727,"Acta Eruditorum, Volume 1727, pp. 408-412.",,,"Series 1, Volume 27, pp.1-5.",2018-09-25,https://scholarlycommons.pacific.edu/euler-works/3,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1002&context=euler-works
3,4,601,"Meditationes super problemate nautico, quod illustrissima regia Parisiensis Academia Scientiarum proposuit","Thoughts on a nautical problem, proposed by the illustrious Royal Academy of Sciences in Paris",Leonhard Euler,Latin,Paris prize article,1728,1726,"Pièce qui ont remporté le prix de l'académie royale des sciences, Volume 1727, pp. 1-48.",,"Euler uses mechanics to design ships. He adopts Newton's law of resistance (which says that the pressure exerted by a fluid acting against a plane surface is proportional to the square of the speed, neglecting back pressure) by interpreting it as a statement about differential elements of surface. He calls this assumption ""the common hypothesis"" and uses it to calculate the total resistance, via integration. In this way, he arrives at definite answers (often in elegant, explicit forms) that ...","Series 2, Volume 20, pp.1-35.",2018-09-25,https://scholarlycommons.pacific.edu/euler-works/4,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1003&context=euler-works
4,5,387,Problematis traiectoriarum reciprocarum solutio,Solution to the problem of reciprocal trajectories,Leonhard Euler,Latin,Journal article,1729,1728,"Commentarii academiae scientiarum Petropolitanae, Volume 2, pp. 90-111.",,"This article contains Euler's first published use of complex numbers and a many-axis geometric construction. Euler also defines even functions f(x) as those for which f(x) = f(-x), perhaps the first use of this term.","Series 1, Volume 27, pp.6-23.",2018-09-25,https://scholarlycommons.pacific.edu/euler-works/5,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1004&context=euler-works
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
862,863,,"Seventeen letters from Euler to Johann I Bernoulli, 1727-1740","Seventeen letters from Euler to Johann I Bernoulli, 1727-1740",Leonhard Euler,"Latin, German",Published letter,1740,1740,Bibliotheca Mathematica.,E863,E64,,2018-09-25,https://scholarlycommons.pacific.edu/euler-works/863,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1862&context=euler-works
863,864,,"Three letters from Euler to Daniel Bernoulli, 1734-1740","Three letters from Euler to Daniel Bernoulli, 1734-1740",Leonhard Euler,German,Published letter,1907,1740,"Bibliotheca Mathematica, pp. 134-141, 145-153.","According to Eneström (as of 1913), the originals may be found in the Herzoglichen Library in Gotha.","As the title suggests, this is a collection of three letters from Euler to Daniel Bernoulli. The letters in question are dated 16 February 1734, November 1734, and 15 September 1740.",,2018-09-25,https://scholarlycommons.pacific.edu/euler-works/864,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1863&context=euler-works
864,865,,Several lines of a letter from Euler to the Royal Society dated 21 October/1 November 1768,Several lines of a letter from Euler to the Royal Society dated 21 October/1 November 1768,Leonhard Euler,English,Published letter,1908,1768,"The Royal Society. Some accounts of the letters and papers of the period 1741-1806 in the archives, by A. H. Church, pp. 16.",This work is likely a translation of Euler's original letter into English.,E364,,2018-09-25,https://scholarlycommons.pacific.edu/euler-works/865,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1864&context=euler-works
865,866,,Ein Brief Eulers an d'Alembert,A letter from Euler to d'Alembert,Leonhard Euler,French,Published letter,1911,1748,"Bibliotheca Mathematica, Volume 11, pp. 223-226.",,,,2018-09-25,https://scholarlycommons.pacific.edu/euler-works/866,https://scholarlycommons.pacific.edu/cgi/viewcontent.cgi?article=1865&context=euler-works


## Store the DataFrame

In [559]:
#dfEuler.to_csv('./data/dfEulerPub_v1.csv', sep = ';')