# Making a DataFrame Containing Euler's Correspondences from Webpage

## Makes usable screen bigger

In [95]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

## Packages & definitions

In [96]:
# Necessary packages:
import requests
import re
import os
import shutil
import sys
import pandas as pd
import urllib.request as urllib2
pd.set_option("display.max_colwidth", 500)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 1000)

# If not yet installed, install these packages now:
try:
    from bs4 import BeautifulSoup as bs
except ImportError:
    os.system('python -m pip install bs4')
    from bs4 import BeautifulSoup as bs

In [97]:
def makeSoup(url):
    response = requests.get(url).content
    soup = bs(response, 'html.parser')
    return soup

In [111]:
def findCorres(soup):
    na = soup.find_all('b')
    names = []
    for n in na:
        try:
            names.append(n.get_text().strip())
        except:
            names.append(n.a.get_text().strip())
            
    entries = re.split(r'\n', soup.get_text())
    entries = [i.strip().replace('\xa0\xa0\xa0\xa0\xa0 ', '') for i in entries if i.startswith(' ')][22:]
    entries = ['Corresponded with ' + i.split(' Corresponded with ')[-1] for i in entries]
    
    times = []
    for e in entries:
        times.append(' - '.join(re.findall(r'\d{4}', e)))
    
    letters = []
    for e in entries:
        l = re.findall(r'(?<=\s)one|\d{1,3}(?= letter)', e)[0]
        if l == 'one':
            l = '1'
        letters.append(l)
    return names, entries, times, letters

## Grapping all correspondences

In [99]:
url = 'http://eulerarchive.maa.org/correspondence/correspondents-alph.html'
soup = makeSoup(url)

In [112]:
names, entries, times, letters = findCorres(soup)

In [117]:
dfEuler = pd.DataFrame({'Name': names, 'Time': times, 'LetterCount': letters, 'Entries': entries})

In [119]:
dfEuler['LetterCount'] = pd.to_numeric(dfEuler['LetterCount'])

In [120]:
dfEuler

Unnamed: 0,Name,Time,LetterCount,Entries
0,"Abbt, Thomas",1759,1,"Corresponded with Euler in 1759. The Euler-Abbt correspondence consists of only one letter, written by Abbt."
1,"Adami, Jacob",1746 - 1759,11,"Corresponded with Euler from 1746 to 1759. The Euler-Adami correspondence consists of 11 letters, none of which were written by Euler."
2,"Admiralty Office,",1765,1,"Corresponded with Euler in 1765. The Euler-Admiralty Office correspondence consists of only one letter, written by Admiralty Office."
3,"Aepinus, A.F.D.",1754,1,"Corresponded with Euler in 1754. The Euler-Aepinus correspondence consists of only one letter, written by Aepinus."
4,"Aepinus, Franz Ulrich Theodor",1754 - 1758,9,"Corresponded with Euler from 1754 to 1758. The Euler-Aepinus correspondence consists of 9 letters, none of which were written by Euler."
5,"d'Alembert, Jean le Rond",1746 - 1773,39,"Corresponded with Euler from 1746 to 1773. The Euler-d'Alembert correspondence consists of 39 letters, of which 14 were written by Euler."
6,"Argenson, Marc Antoine Rene de Voyer, Marquis de Paulmy d'",1755,1,"Corresponded with Euler in 1755. The Euler-d'Argenson correspondence consists of only one letter, written by d'Argenson."
7,"Aubar, S.",1776,1,"Corresponded with Euler in 1776. The Euler-Aubar correspondence consists of only one letter, written by Aubar."
8,"Baermann, George Friedrich",1745 - 1754,8,"Corresponded with Euler from 1745 to 1754. The Euler-Baermann correspondence consists of 8 letters, none of which were written by Euler."
9,"Bailly, Jean Sylvain",1766,1,"Corresponded with Euler in 1766. The Euler-Bailly correspondence consists of only one letter, written by Euler."


## Store the DataFrame

In [121]:
#dfEuler.to_csv('./data/dfEulerCorres_v1.csv', sep = ';', index=False)