In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# Necessary packages:
import requests
import re
import os
import shutil
import sys
import pandas as pd
pd.set_option("display.max_colwidth", 500)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 1000)

In [3]:
# If not yet installed, install these packages now:
try:
    import fitz
except ImportError:
    os.system('python -m pip install PyMuPDF')
    import fitz

# Euler opera by Euler Society

In [4]:
eulerSociety=pd.DataFrame()
eulerSociety=pd.read_excel("./data/euler_works_1.xls")
len(eulerSociety)

867

In [5]:
eulerSociety.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 867 entries, 0 to 866
Data columns (total 20 columns):
title                 867 non-null object
eng_title             732 non-null object
id_index_num          866 non-null float64
fuss_index            746 non-null object
language              867 non-null object
written date          813 non-null object
published_date        813 non-null object
presented_date        813 non-null object
pub_type              867 non-null object
document_type         867 non-null object
source_publication    867 non-null object
source_vol            844 non-null object
page_range            859 non-null object
oo_series             832 non-null float64
oo_volnum             832 non-null object
oo_fpage              795 non-null float64
oo_lpage              794 non-null float64
list_view_id_tag      866 non-null object
label                 867 non-null int64
calc_url              867 non-null object
dtypes: float64(4), int64(1), object(15)
memory usag

In [6]:
eulerSociety.head(3)

Unnamed: 0,title,eng_title,id_index_num,fuss_index,language,written date,published_date,presented_date,pub_type,document_type,source_publication,source_vol,page_range,oo_series,oo_volnum,oo_fpage,oo_lpage,list_view_id_tag,label,calc_url
0,Constructio linearum isochronarum in medio quocunque resistente,Construction of isochronous curves in a resistant medium,1.0,,Latin,1725,1725,1725,journal,article,Acta Eruditorum,1726,361-363,2.0,6,1.0,3.0,E1,1,https://scholarlycommons.pacific.edu/euler-works/1
1,Dissertatio physica de sono,Physical dissertation on sound,2.0,500.0,Latin,1727,1727,1727,quarto,book,Basel: E. and J. R. Thurnisiorum,1727,208-226,3.0,1,181.0,196.0,E2,2,https://scholarlycommons.pacific.edu/euler-works/2
2,Methodus inveniendi traiectorias reciprocas algebraicas,On a method for algebraic reciprocal trajectories,3.0,,Latin,1727,1727,1727,journal,article,Acta Eruditorum,1727,408-412,1.0,27,1.0,5.0,E3,3,https://scholarlycommons.pacific.edu/euler-works/3


# PDF of Euler OO Index

In [7]:
doc = fitz.open('./data/EulerOperaOmniaIndex.pdf')
texts = []
for i in range(doc.pageCount):
    page = doc.loadPage(i)
    texts.append(page.getText('text'))

In [8]:
gwtoc=doc.getToC()
document=[]
pagetext={}
regex = r"([a-zé])- "
subst = "\\1"
for pnr,page in enumerate(doc):  # iterate the document pages
    bloks=page.getTextBlocks()
    pagetext[pnr]=page.getText().replace("\n"," ")
    for paranr,blk in enumerate(bloks):
        blkr=blk[4].replace("\n", " ")
        blkr=re.sub(regex, subst, blkr, 0, re.MULTILINE)
        document.append({"p":pnr,"pn":paranr,"par":blkr})

In [9]:
t = pd.DataFrame(document)

In [10]:
t.head(200)

Unnamed: 0,p,pn,par
0,0,0,Comprehensive Index of Leonhard Euler’s Opera Omnia Series prima
1,0,1,Series secunda (in preparation)
2,0,2,Series tertia (in preparation)
3,0,3,Series quarta (in preparation) Suchen
4,0,4,"<image: Indexed(255,DeviceRGB), width 176, height 277, bpc 8>"
5,0,5,
6,0,6,May 2003 Birkhäuser Basel 1707 – St. Petersburg 1783
7,1,0,
8,1,1,Contents
9,1,2,I Short list of all volumes ordered by series II Comprehensive index ordered by volumes


## Euler OO I

In [11]:
EulerI = t[(t.p > 2)&(t.p < 7)]

In [12]:
EulerI = EulerI[(~EulerI.par.str.startswith(('Vol.')))&(EulerI.par.str.startswith(('I.','X','L','V','C')))&(EulerI.par.str.replace(' ', '') != '')].reset_index(drop = True)

In [13]:
vols = []
isbns = []
references = []
titles = []
pages = []
years = []
rests = []
for i in EulerI.par.iloc[::2]:
    vol = re.split('((?<=^I\.\d{1})\s|(?<=^I\.\d{2})\s|(?<=^I\.\d{1}-\d)\s|(?<=^I\.\d{2}-\d)\s)', i)[0].strip()
    rest = re.split('((?<=^I\.\d{1})\s|(?<=^I\.\d{2})\s|(?<=^I\.\d{1}-\d)\s|(?<=^I\.\d{2}-\d)\s)', i)[-1].strip()
    isbn = '3-7643-' + re.split('(?<=^\d{4}-[\dX]) ', rest)[0].strip()
    rest = re.split('(?<=^\d{4}-[\dX]) ', rest)[-1].strip()
    reference = re.split(' (?=E.\d{3})', rest, 1)[0].strip()
    title = re.split(' (?=E.\d{3})', rest, 1)[-1].strip()
    vols.append(vol)
    isbns.append(isbn)
    references.append(reference)
    titles.append(title)
for i in EulerI.par.iloc[1::2]:
    page = re.split(' (?=19\d{2})', i)[0].strip()
    year = re.split(' (?=19\d{2})', i)[1].strip()
    pages.append(page)
    years.append(year)

In [14]:
EulerI = pd.DataFrame({'vol': vols, 'isbn': isbns, 'reference': references, 'title': titles, 'page': pages, 'year': years})

In [15]:
EulerI.head()

Unnamed: 0,vol,isbn,reference,title,page,year
0,I.1,3-7643-1400-1,"Vollständige Anleitung zur Algebra. Mit den Zusätzen von Joseph Louis Lagrange. Herausgegeben von Heinrich Weber. (In German, 1 contribution in French; 5 Ill., 1 plate)","E.387, E.388, P.101, P.102, X.101, X.102","XCV, 651",1911
1,I.2,3-7643-1401-X,"Commentationes arithmeticae. Volumen primum. Edidit Ferdinand Rudio. (25 contributions in Latin, 1 in French)","E.026, E.029, E.036, E.054, E.098, E.100, E.134, E.152, E.158, E.164, E.167, E.175, E.191, E.228, E.241, E.242, E.243, E.244, E.253, E.255, E.256, E.262, E.270, E.271, E.272, E.279, P.103","XXXVII, 611",1915 [1978]
2,I.3,3-7643-1402-8,"Commentationes arithmeticae. Volumen secundum. Edidit Ferdinand Rudio (22 contributions in Latin, 1 in German and 3 in French)","E.283, E.323, E.369, E.394, E.405, E.427, E.428, E.445, E.449, E.451, E.452, E.454, E.461, E.466, E.467, E.474, E.498, E.515, E.523, E.541, E.542, E.552, E.554, E.708a, A.09, A.31, P.104","XXXVII, 543",1917 [1978]
3,I.4,3-7643-1403-6,Commentationes arithmeticae. Volumen tertium. Edidit Rudolf Fueter (In Latin; 2 Ill.),"E.556, E.557, E.558, E.559, E.560, E.564, E.566, E.586, E.591, E.596, E.598, E.610, E.683, E.696, E.699, E.702, E.708, E.713, E.715, E.716, E.718, E.719, E.725, E.732, E.739, E.744, P.105","XXXIII, 431",1941
4,I.5,3-7643-1404-4,"Commentationes arithmeticae. Volumen quartum. Edidit Rudolf Fueter (18 contributions in Latin, 5 in French)","E.748, E.753, E.754, E.755, E.758, E.763, E.764, E.769, E.772, E.773, E.774, E.775, E.776, E.777, E.778, E.792, E.793, E.796, E.797, E.798, E.799, E*1944, E*1944, P.106, P.107, P.108, I.101","LVII, 374",1944


In [16]:
#EulerI.to_csv('./data/EulerOperaOminaI_v1.csv', sep = ';', index=False)

## Euler OO II

In [17]:
EulerII = t[(t.p > 7)&(t.p < 36)]

In [18]:
EulerII = EulerII[(~EulerII.par.str.startswith(('No.')))&(EulerII.par.str.replace(' ', '') != '')].reset_index(drop = True)

In [19]:
s = [i for i in range(len(EulerII.par)) if EulerII.par[i].startswith(('P.','E.','X.','I.','A.','E*'))]

In [20]:
dfs = []
for i in range(len(s[:-1])):
    dfs.append(EulerII.iloc[s[i]:s[i+1]])
dfs.append(EulerII.iloc[s[-1]:])

In [21]:
rows = []
for i in dfs:
    rows.append('_'.join(i.par))
rows[191] = rows[191] + rows[192]
del rows[192]

In [22]:
nos = []
titles = []
sources = []
years = []
vols = []
pages = []
rests = []
for i in rows:
    no = re.split('((?<=^P\.\d{3})\s|(?<=^E\.\d{3})\s|(?<=^E\.\d{3}[a-z])\s|(?<=^X\.\d{3})\s|(?<=^I\.\d{3})\s|(?<=^A\.\d{2})\s|(?<=^E\*\d{4})\s)', i)[0].strip()
    rest = re.split('((?<=^P\.\d{3})\s|(?<=^E\.\d{3})\s|(?<=^E\.\d{3}[a-z])\s|(?<=^X\.\d{3})\s|(?<=^I\.\d{3})\s|(?<=^A\.\d{2})\s|(?<=^E\*\d{4})\s)', i)[-1].strip()
    page = re.split('\s((?=\d{3}–\d{3}$)|(?=\d{3}-\d{3}$)|(?=\d{3}– \d{3}$)|(?=[IVXLC]{1,7}–[IVXLC]{1,7}$)|(?=[IVXLC]{1,7}– [IVXLC]{1,7}$)|(?=[IVXLC]{1,7} –[IVXLC]{1,7}$)|(?=VII$)|(?=403$)|(?=371$)|(?=XI$)|(?=1–308$)|(?=250268–$)|(?=343$))', rest)[-1].strip().replace('– ', '–').replace(' –', '–')
    rest = re.split('\s((?=\d{3}–\d{3}$)|(?=\d{3}-\d{3}$)|(?=\d{3}– \d{3}$)|(?=[IVXLC]{1,7}–[IVXLC]{1,7}$)|(?=[IVXLC]{1,7}– [IVXLC]{1,7}$)|(?=[IVXLC]{1,7} –[IVXLC]{1,7}$)|(?=VII$)|(?=403$)|(?=371$)|(?=XI$)|(?=1–308$)|(?=250268–$)|(?=343$))', rest)[0].strip()
    if page == 'De motu gravium citissimo super curvis  specie datis  _N. Comm. Ac. Petrop.  17 (1772), 1773,   p. 488–504  _1773  I.25  0236–249':
        page = page[-7:].replace('– ', '–')
        rest = rest[:-9].replace('– ', '–')
    if page == '250268–':
        page = '250–268'
    vol = re.split('\s((?=I\.([1-9]|1[0-9]|2[0-9]$))|(?=I\.16-[1-2]$))', rest)[-1].strip()
    rest = re.split('\s((?=I\.([1-9]|1[0-9]|2[0-9]$))|(?=I\.16-[1-2]$))', rest)[0].strip()
    year = re.split('[\s_]((?=1[7-9]\d{2}$)|(?=1907– 1908$)|(?=17954))', rest)[-1].strip().strip('_').strip()
    if year == 'De radicibus aequationis infinitae  _2 4 _6 _0 1 ( 1) ( 1)( 2)( 3) _etc. ( 5) _x x n n n n n n x n n _= − + + + + + _− + + � _N. Acta Ac. Petrop. 9  (1791), 1795, p. 19–40  _17954':
        year = '1795'
    rest = re.split('[\s_]((?=1[7-9]\d{2}$)|(?=1907– 1908$)|(?=17954))', rest)[0].strip().strip('_').strip()
    source = re.split('\s_', rest)[-1].strip()
    title = re.split('\s_', rest)[0].strip()
    if title.startswith('Vorwort ') or title.startswith('Index ') or title == 'Übersicht über den Band 10 der ersten  Serie: Institutiones calculi differentialis  (Andreas Speiser)':
        source = ''
    nos.append(no)
    pages.append(page)
    vols.append(vol)
    years.append(year)
    rests.append(rest)
    sources.append(source)
    titles.append(title)

In [23]:
EulerII = pd.DataFrame({'no': nos, 'title': titles, 'source': sources, 'year': years, 'vol': vols, 'pages': pages})

In [24]:
EulerII.head()

Unnamed: 0,no,title,source,year,vol,pages
0,P.101,Vorwort zur Gesamtausgabe der Werke von Leonhard Euler (Ferdinand Rudio),,1911,I.1,IX–XLI
1,P.102,Aufruf zur Unterstützung der von der Schweiz. Naturforschenden Gesellschaft in Aussicht genommenen Herausgabe der Werke Leonhard Eulers durch Subskriptionen und durch Zeichnung von freiwilligen Beiträgen.,"Basel und Zürich, April 1909",1911,I.1,XXII–XXV
2,X.101,"Nicolaus Fuss: Lobrede auf Herrn Leonhard Euler, in der Versammlung der Kayserlichen Akademie der Wissenschaften zu St. Petersburg den 23. Octob. 1783 vorgelesen. Von dem Verfasser selbst aus dem französischen übersetzt etc.",Basel 1786,1786,I.1,XLIII–XCV
3,E.387,"Vollständige Anleitung zur Algebra. Erster Theil. Von den verschiedenen RechnungsArten, Verhältnissen und Proportionen. (ˇContent)",St. Petersburg 1770,1770,I.1,001–208
4,E.388,Vollständige Anleitung zur Algebra. Zweiter Theil. Von Auflösung algebraischer Gleichungen und der unbestimmten Analytic. (ˇContent),St. Petersburg 1770,1770,I.1,209–498


In [25]:
#EulerII.to_csv('./data/EulerOperaOminaII_v1.csv', sep = ';', index=False)

## Euler OO III

still to be done!