## Math Counts

## Scrape Competition Locations

In [21]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [35]:
# Get all competitions from Math Counts website

columns = ["Competition Name", "URL", "State", "Date", "Type", "City"]
data = []

for page in range(0, 23):
    url = "https://www.mathcounts.org/dates-locations-coordinators?field_competition_state_value=All&page=" + str(page)
    html = requests.get(url).text
    soup = BeautifulSoup(html, "html.parser")
    
    table = soup.find("table", {"class": "views-table"}).find("tbody")
    for row in table.findAll("tr"):
        rowData = row.findAll("td")
        
        competitionName = rowData[0].getText().replace("\n", "")
        url = "https://www.mathcounts.org" + rowData[0].find("a")["href"]
        state = rowData[1].getText().strip()
        date = rowData[2].getText().strip()
        competitionType = rowData[3].getText().strip()
        city = rowData[4].getText().strip()
        
        data.append([competitionName, url, state, date, competitionType, city])

df = pd.DataFrame(data, columns=columns)

In [36]:
df.head()

Unnamed: 0,Competition Name,URL,State,Date,Type,City
0,Alaska State Competition,https://www.mathcounts.org/alaska-state-compet...,AK,,State,Canceled
1,Anchorage Chapter Competition,https://www.mathcounts.org/anchorage-chapter-c...,AK,02/08/2020,Chapter,Anchorage
2,Fairbanks Chapter Competition,https://www.mathcounts.org/fairbanks-chapter-c...,AK,02/15/2020,Chapter,Fairbanks
3,Remote Chapter Competition,https://www.mathcounts.org/remote-chapter-comp...,AK,02/08/2020,Chapter,Anchorage
4,Southeast AK Chapter Competition,https://www.mathcounts.org/southeast-ak-chapte...,AK,02/08/2020,Chapter,Juneau


In [38]:
# Filter only chapter locations 
df = df[df["Type"] != "State"]
df.head()

Unnamed: 0,Competition Name,URL,State,Date,Type,City
1,Anchorage Chapter Competition,https://www.mathcounts.org/anchorage-chapter-c...,AK,02/08/2020,Chapter,Anchorage
2,Fairbanks Chapter Competition,https://www.mathcounts.org/fairbanks-chapter-c...,AK,02/15/2020,Chapter,Fairbanks
3,Remote Chapter Competition,https://www.mathcounts.org/remote-chapter-comp...,AK,02/08/2020,Chapter,Anchorage
4,Southeast AK Chapter Competition,https://www.mathcounts.org/southeast-ak-chapte...,AK,02/08/2020,Chapter,Juneau
6,Birmingham Chapter Competition,https://www.mathcounts.org/birmingham-chapter-...,AL,02/29/2020,Chapter,Vestavia Hills


In [56]:
# Get facility locations for each competition 

columns = ["Competition Name", "URL", "State", "Date", "Type", "City", "Facility", "Street Address", "ZIP"]
data = []

for i, row in df.iterrows(): 
    html = requests.get(row["URL"]).text
    soup = BeautifulSoup(html, "html.parser")
    
    fields = soup.find("fieldset").findAll("div", {"class": "field"})
    
    facility = None 
    address = None 
    zipCode = None
    
    for field in fields:
        label = field.find("div", {"class": "field-label"}).text.replace(":", "").strip()
        item = field.find("div", {"class": "field-item"}).text.strip()
        
        if label == "Competition Facility": facility = item
        if label == "Facility Street Address": address = item
        if label == "Facility ZIP Code": zipCode = item
            
    data.append(list(row) + [facility, address, zipCode])

In [61]:
df = pd.DataFrame(data, columns=columns)
df.head()

Unnamed: 0,Competition Name,URL,State,Date,Type,City,Facility,Street Address,ZIP
0,Anchorage Chapter Competition,https://www.mathcounts.org/anchorage-chapter-c...,AK,02/08/2020,Chapter,Anchorage,University of Alaska Anchorage,,
1,Fairbanks Chapter Competition,https://www.mathcounts.org/fairbanks-chapter-c...,AK,02/15/2020,Chapter,Fairbanks,University of Alaska Fairbanks,,
2,Remote Chapter Competition,https://www.mathcounts.org/remote-chapter-comp...,AK,02/08/2020,Chapter,Anchorage,UAA,3211 Providence Drive,99503.0
3,Southeast AK Chapter Competition,https://www.mathcounts.org/southeast-ak-chapte...,AK,02/08/2020,Chapter,Juneau,University of Alaska Juneau,,
4,Birmingham Chapter Competition,https://www.mathcounts.org/birmingham-chapter-...,AL,02/29/2020,Chapter,Vestavia Hills,Liberty Park Middle School,17035 Liberty Parkway,35242.0


In [63]:
for i, row in df.iterrows():
    print(row["Competition Name"], row["Facility"], row["Street Address"], row["ZIP"])

Anchorage Chapter Competition  University of Alaska Anchorage None None
Fairbanks Chapter Competition  University of Alaska Fairbanks None None
Remote Chapter Competition  UAA 3211 Providence Drive 99503
Southeast AK Chapter Competition  University of Alaska Juneau None None
Birmingham Chapter Competition  Liberty Park Middle School 17035 Liberty Parkway 35242
Huntsville Chapter Competition  Discovery Middle School 1304 Hughes Rd 35758
Mobile/Southwest Chapter Competition  University of South Alabama Shelby Hall TBD
Montgomery Chapter Competition  AUM Campus None None
Muscle Shoals Chapter Competition  University of North Alabama Stevens Hall Auditorium None
Tuscaloosa Chapter Competition  HM Comer Hall 245 7th Ave 35487
Central Arkansas Chapter Competition  UALR 2801 S. University Avenue 72204
Northwest Arkansas Chapter Competition  Bell Engineering Center, University of Arkansas Fayetteville 800 W. Dickson St. 72701
Central Arizona Chapter Competition  Ninyo & Moore 3202 East Harbour