In [1]:
import requests
import re
import pandas
import time
import datetime
from bs4 import BeautifulSoup
from math import isnan
import dateutil.parser as dparser
from dateutil.parser import ParserError

In [2]:
 #Get the original page
def get_boe_page(url):
    response = requests.get(url)
    return BeautifulSoup(response.content)

In [3]:
# curl 'https://lookup.boe.ohio.gov/vtrapp/athens/avlookup.aspx' \
#   -H 'authority: lookup.boe.ohio.gov' \
#   -H 'cache-control: max-age=0' \
#   -H 'origin: https://lookup.boe.ohio.gov' \
#   -H 'upgrade-insecure-requests: 1' \
#   -H 'dnt: 1' \
#   -H 'content-type: application/x-www-form-urlencoded' \
#   -H 'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36' \
#   -H 'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
#   -H 'sec-fetch-site: same-origin' \
#   -H 'sec-fetch-mode: navigate' \
#   -H 'sec-fetch-user: ?1' \
#   -H 'sec-fetch-dest: document' \
#   -H 'referer: https://lookup.boe.ohio.gov/vtrapp/athens/avlookup.aspx' \
#   -H 'accept-language: en-US,en;q=0.9' \
#   -H 'cookie: _ga=GA1.2.1662054538.1572970731; __cfduid=d1db2dc7146ccac0c64541b5d5540b7fc1604168643; _gid=GA1.2.886396333.1604168645; _gat_gtag_UA_127176238_3=1' \
#   --data-raw '__LASTFOCUS=&__EVENTTARGET=&__EVENTARGUMENT=
#    &__VIEWSTATE=...
#    &__VIEWSTATEGENERATOR=B6786635
#    &__EVENTVALIDATION=...
#    &cmbelectionlist=20201103G
#    &frmLname=Lachman
#    &frmfname=Benjamin
#    &btnsubmit=Submit
#    &txt_byear=&txt_bmvid=&hid_byear=&hid_bmvid=&hid_app=' \
#   --compressed

In [23]:
# Scrape the state data we need to validate the form request
def get_status(base_url, fname, mname, lname, target_page):
    viewstate = target_page.find('input' , id ='__VIEWSTATE')['value']
    eventvalidation=target_page.find('input' , id ='__EVENTVALIDATION')['value']
    viewstategenerator=target_page.find('input' , id ='__VIEWSTATEGENERATOR')['value']
    params={'__EVENTTARGET':'','__EVENTARGUMENT':'','__VIEWSTATE':viewstate,
            '__VIEWSTATEGENERATOR':viewstategenerator,
            '__EVENTVALIDATION':eventvalidation,
            'cmbelectionlist':'20201103G',
            'frmLname':lname,
            'frmfname':fname,
            'btnsubmit':'Submit'}

    # Use the validation data when making the request for all current applications
    r = requests.post(base_url, data=params)
    soup = BeautifulSoup(r.text, "html.parser")
    
    results = next(iter(soup.select('#numresults p')), None)
    regex = r'Total Records found:\s?(\d+)'
    
    status = {'ballot_requested': False}
    
    if results is None:
        return status
    
    match = re.search(regex,results.text)
    num_results = int(match.group(1))
    
    if num_results == 0:
        return status
    
    rows = soup.select('tr')

    for i in range(0,num_results):
        skip = False

        for cell in rows[2+i*2].select('.oddrow~ td , .oddrow+ td , td:nth-child(1)'):
            ctype = cell['headers'][0]
            text = cell.text.strip(' \n\t\r')

            if ctype == 'Full_Name':
                if num_results == 1:
                    continue
                    
                if isinstance(mname, float) or text.find(mname) < 0:
#                     print('no matching middle name')
                    skip = True
                    break
#                 else:
#                     print('matched middle name')
            elif ctype == 'App_Date':
                status['ballot_requested'] = True
                try:
                    status['applied_date'] = dparser.parse(text)
                except ParserError:
                    status['applied_date'] = None                    
            elif ctype == 'App_Type':
                status['type'] = text.strip()
            elif ctype == 'Ballot_Processed':
                try:
                    status['send_date'] = dparser.parse(text, fuzzy=True)
                except ParserError:
                    status['received_date'] = None
            elif ctype == 'Ballot_Received':
                try:
                    status['received_date'] = dparser.parse(text, fuzzy=True)
                except ParserError:
                    status['received_date'] = None
            else:
                print(f'{ctype}: {cell}')

        if skip:
            continue

        for cell in rows[3+i*2].select('.oddrow~ td , .oddrow+ td , td:nth-child(1)'):
            ctype = cell['headers'][0]
            text = cell.text.strip(' \n\t\r')

            if ctype == 'Description':
                 status['ballot_verified'] = \
                    cell.text.find('Date Approved for Counting:') > -1
                    
    return status

In [24]:
def progressBar(iterable, prefix = '', suffix = '', decimals = 1, length = 100, numitems = 0, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    
    total = numitems
    
    if numitems == 0:
        total = len(iterable)
        
    # Progress Bar Printing Function
    def printProgressBar (iteration):
        percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
        filledLength = int(length * iteration // total)
        bar = fill * filledLength + '-' * (length - filledLength)
        print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Initial Call
    printProgressBar(0)
    # Update Progress Bar
    for i, item in enumerate(iterable):
        yield i,item
        printProgressBar(i + 1)
    # Print New Line on Complete
    print()
    

In [25]:
url = 'https://lookup.boe.ohio.gov/vtrapp/athens/avlookup.aspx'
response_soup = get_boe_page(url)
get_status(base_url=url, fname='BENJAMIN', mname='Joseph', lname='Lachman', target_page=response_soup)

{'ballot_requested': True,
 'applied_date': datetime.datetime(2020, 8, 18, 0, 0),
 'type': 'MAIL - ABSENTEE',
 'send_date': datetime.datetime(2020, 10, 6, 0, 0),
 'received_date': datetime.datetime(2020, 10, 21, 0, 0),
 'ballot_verified': True}

In [51]:
def add_statuses(csv_filename, boe_url, response):
    statuses = list()
    df = pandas.read_csv(csv_filename)

    num = len(df.index) #can cap number here

    for index, row in progressBar(df[:num].iterrows(), prefix = 'Progress:', suffix = 'Complete', numitems=num):
        row = row[1]
        stat = get_status(base_url=boe_url, fname=row['FIRSTN'], mname=row['MIDDLEN'], lname=row['LASTN'], target_page=response)
        statuses.append(stat)
    status_df = pandas.DataFrame.from_dict(statuses)
    
    out_df = pandas.concat([df[:num], status_df], axis=1)
    
    return out_df

In [52]:
counties = ['athens', 'hocking', 'jackson', 'meigs', 'morgan', 'vinton', 'washington']
out = dict()

for county in counties:
    url = f'https://lookup.boe.ohio.gov/vtrapp/{county}/avlookup.aspx'
    csv = f'csv/{county}-absentee_list-2020_08_13-final.csv'
    csv_out = f'csv/{county}-absentee_results-{datetime.date.today()}.csv'
    
    response_soup = get_boe_page(url)
    
    print(f'Counting result for: {county}')
    
    df = add_statuses(csv, url, response_soup)

    df.to_csv(csv_out)
    out[county] = df

Counting result for: athens
Progress: |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Counting result for: hocking
Progress: |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Counting result for: jackson
Progress: |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Counting result for: meigs
Progress: |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Counting result for: morgan
Progress: |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Counting result for: vinton
Progress: |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0% Complete
Counting result for: washington
Progress: |██████████████

In [53]:
for key in out:
    print(f'{key}: ({len(out[key].index)})')
    print(out[key].ballot_requested.value_counts())
    print(out[key].ballot_verified.value_counts())
    print('\n')

athens: (2556)
False    1289
True     1267
Name: ballot_requested, dtype: int64
True     1189
False      78
Name: ballot_verified, dtype: int64


hocking: (1950)
True     1429
False     521
Name: ballot_requested, dtype: int64
True     1403
False      26
Name: ballot_verified, dtype: int64


jackson: (1950)
False    1898
True       52
Name: ballot_requested, dtype: int64
True     49
False     3
Name: ballot_verified, dtype: int64


meigs: (1369)
True     855
False    514
Name: ballot_requested, dtype: int64
True     832
False     23
Name: ballot_verified, dtype: int64


morgan: (788)
True     563
False    225
Name: ballot_requested, dtype: int64
True    563
Name: ballot_verified, dtype: int64


vinton: (1053)
True     627
False    426
Name: ballot_requested, dtype: int64
True     589
False     38
Name: ballot_verified, dtype: int64


washington: (1335)
True     673
False    662
Name: ballot_requested, dtype: int64
True    673
Name: ballot_verified, dtype: int64


