In [1]:
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
uiuc_faculty_url = 'https://math.illinois.edu/directory/faculty'
html_text = requests.get(uiuc_faculty_url).text

In [3]:
def personal_website_text(url):
    spw = BeautifulSoup(requests.get(url).text, 'lxml') 
    return spw.text.lower()

In [4]:
# print(html_text)

In [5]:
soup = BeautifulSoup(html_text, 'lxml')

In [22]:
cards = soup.find_all('article', class_='profile-card')

# lists for DataFrame
names = []
titles = []
websites = []
does_research = []
mentions_diff_eq = []

sentinel = 0
h = time.time()

print('Starting!!!')

for card in cards:
    
    # get the card data, split to nicely have all components separated
    card_data = card.text.split('\n')
    # clean the card data
    prof_data = [d for d in card_data if d!='']
    
    # append the name
    names.append(prof_data[0])
    # append the title
    titles.append(prof_data[1])
    # append the website
    pw_url = 'https://math.illinois.edu' + card.find('a', class_='profile-card__link')['href']
    websites.append(pw_url)
    
    # get the more personal website's text
    pw_text = personal_website_text(pw_url)
    
    # append if the word 'research' appears in the personal website
    does_research.append('research' in pw_text)
    # append if the word 'differential equation' appears in the personal website
    mentions_diff_eq.append('differential equation' in pw_text)
    
    # increase sentinel
    sentinel+=1
    
    # print progess every ten faculty
    if sentinel % 10 == 0.0:
        print("{t:.2f}s: Done with {s} people...".format(t=time.time() - h, s=sentinel))
    
    # sleep time for requests
    time.sleep(1)
    
print('Done!!!')

Starting!!!
13.42s: Done with 10 people...
27.83s: Done with 20 people...
43.39s: Done with 30 people...
59.66s: Done with 40 people...
75.97s: Done with 50 people...
92.62s: Done with 60 people...
109.43s: Done with 70 people...
125.90s: Done with 80 people...
141.75s: Done with 90 people...
157.68s: Done with 100 people...
173.51s: Done with 110 people...
190.16s: Done with 120 people...
209.92s: Done with 130 people...
251.59s: Done with 140 people...
271.15s: Done with 150 people...
289.28s: Done with 160 people...
307.70s: Done with 170 people...
Done!!!


In [23]:
# make a DataFrame to store all the data
d = {'name': names, 
     'title': titles,
     'website': websites,
     'does_research': does_research,
     'mentions_diff_eq': mentions_diff_eq}

df = pd.DataFrame(data=d)
df

Unnamed: 0,name,title,website,does_research,mentions_diff_eq
0,Scott Ahlgren,Professor,https://math.illinois.edu/directory/profile/sa...,True,False
1,Pierre Albin,Professor and Helen Corley Petit Scholar,https://math.illinois.edu/directory/profile/pa...,True,False
2,Matthew Ando,Professor of Mathematics and Associate Dean fo...,https://math.illinois.edu/directory/profile/mando,True,False
3,Jozsef Balog,Professor and J. Andrew and Susan Langan Scholar,https://math.illinois.edu/directory/profile/jobal,True,False
4,Yuliy Baryshnikov,Professor of Mathematics and Electrical and Co...,https://math.illinois.edu/directory/profile/ymb,True,False
...,...,...,...,...,...
173,Paul M. Weichsel,Professor Emeritus,https://math.illinois.edu/directory/profile/we...,True,False
174,Elliot Weinberg,Professor Emeritus,https://math.illinois.edu/directory/profile/ecw,True,False
175,Douglas B. West,Professor Emeritus,https://math.illinois.edu/directory/profile/dwest,True,False
176,John E. Wetzel,Professor Emeritus,https://math.illinois.edu/directory/profile/j-...,True,False


In [29]:
df['does_research'].value_counts()

True    178
Name: does_research, dtype: int64

In [None]:
df['mentions_diff_eq'].value_counts()

In [25]:
df[df['mentions_diff_eq']]

Unnamed: 0,name,title,website,does_research,mentions_diff_eq
9,Alfred Chong,Assistant Professor,https://math.illinois.edu/directory/profile/wf...,True,True
10,Lee DeVille,"Director of Graduate Studies, Professor",https://math.illinois.edu/directory/profile/rd...,True,True
17,M. Burak Erdoğan,Professor,https://math.illinois.edu/directory/profile/be...,True,True
26,Vera Mikyoung Hur,Professor and Philippe Tondeur ScholarBrad and...,https://math.illinois.edu/directory/profile/ve...,True,True
32,Kay Kirkpatrick,"Associate Professor, Blackwell Scholar, and He...",https://math.illinois.edu/directory/profile/kk...,True,True
33,Eduard-Wilhelm Kirr,Associate Professor,https://math.illinois.edu/directory/profile/ekirr,True,True
37,Richard S. Laugesen,Professor,https://math.illinois.edu/directory/profile/la...,True,True
49,Zoi Rapti,Associate Professor,https://math.illinois.edu/directory/profile/zr...,True,True
52,Renming Song,Professor,https://math.illinois.edu/directory/profile/rsong,True,True
59,Nikolaos Tzirakis,Professor,https://math.illinois.edu/directory/profile/tz...,True,True


In [28]:
df.to_csv('uiuc_all.csv')
df[df['mentions_diff_eq']].to_csv('uiuc_diff_eq.csv')