# Nigerian Election Scraper

This script is used to scrape images of election results in Lagos, Nigeria from the INEC website. This is done using selenium.
The starting page is https://inecelectionresults.ng/elections/63f8f25b594e164f8146a213?state=25.
The scraper will go through each of the 20 LGAs in Lagos.

In [None]:
import requests as req
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import ElementClickInterceptedException
import time
import json
import urllib
import os
import logging

In [None]:
# configure logging
logging.basicConfig(filename='scraper.log', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')

# open dictionary from json file
with open('LGA_dict.json', 'r') as fp:
   LGA_dict = json.load(fp)

# set homepage url
homepage = 'https://inecelectionresults.ng'

# javascript snippet needed to scroll to element
js_code = "arguments[0].scrollIntoView();"

# start selenium
driver = webdriver.Chrome(ChromeDriverManager().install())
delay = 5
ignored_exceptions=(NoSuchElementException,StaleElementReferenceException, ElementClickInterceptedException)
logging.info('Start selenium driver')

# open each ward page and navigate to the results page
# create try/except loop to catch keyboard interrupt
try:
    for LGA in LGA_dict:
        # create LGA name by replacing / and : with -
        LGA_name = LGA.replace('/', '-').replace(':', '-')
        logging.info('Scraping LGA: %s', LGA)
        for ward in LGA_dict[LGA]:
            # check if ward has already been scraped
            if LGA_dict[LGA][ward]['done'] == False:
                logging.info('Scraping ward: %s', ward)
                # try for as long as ward is not done
                while LGA_dict[LGA][ward]['done'] == False:
                    try:

                        # specify image download path
                        path = f'images/{LGA_name}/{ward}/'
                        # create path if it doesn't exist
                        if not os.path.exists(path):
                            os.makedirs(path)
                        # open ward page with selenium
                        driver.get(LGA_dict[LGA][ward]['link'])
                        # wait for page to load
                        WebDriverWait(driver, delay,
                                    ignored_exceptions=ignored_exceptions).until(
                            EC.presence_of_element_located(
                            (By.XPATH, '//div[@class="d-flex justify-content-between m-2 p-2 bg-light"]//div[button]')))
                        # get list of buttons
                        buttons = driver.find_elements(By.XPATH, '//div[@class="d-flex justify-content-between m-2 p-2 bg-light"]//div[button]')
                        clickable_buttons = [b for b in buttons if b.text == 'View result']
                        # get list of polling unit names
                        polling_unit_elements = driver.find_elements(By.XPATH, '//div[@class="d-flex justify-content-between m-2 p-2 bg-light"]/div[div]')
                        polling_unit_names = [e.text.replace('/', '-').replace(':', '-').replace('\\', '-') for e in polling_unit_elements]
                        # log number of polling units
                        logging.info('Number of polling units: %s', len(buttons))


                        # for each polling unit element, click on it, then go to previous page
                        for i in range(len(buttons)):
                            # specify image name
                            img_name = polling_unit_names[i]
                            # log number of button being clicked
                            logging.info('Button number: %s', i)
                            # log polling unit name
                            logging.info('Polling unit name: %s', img_name)
                            # check if any of the files in path start with img_name
                            if not any(img_name in f for f in os.listdir(path)):
                                try:
                                    # need to refresh list of buttons
                                    WebDriverWait(driver, delay,
                                            ignored_exceptions=ignored_exceptions).until(
                                    EC.presence_of_element_located(
                                    (By.XPATH, '//div[@class="d-flex justify-content-between m-2 p-2 bg-light"]//div[button]')))
                                    # gather temp buttons
                                    temp_buttons = driver.find_elements(By.XPATH, '//div[@class="d-flex justify-content-between m-2 p-2 bg-light"]//div[button]')
                                    # log number of temp buttons
                                    try:
                                        # check if button is clickable
                                        if temp_buttons[i].text == 'View result':
                                            # if no, create image filename 
                                            img_filename = img_name + '.jpg'
                                            # scroll to button
                                            driver.execute_script(js_code, temp_buttons[i])
                                            time.sleep(0.5)
                                            try:
                                                # wait for button to be clickable
                                                try:
                                                    WebDriverWait(driver, delay, ignored_exceptions=ignored_exceptions).until(EC.element_to_be_clickable((temp_buttons[i]))).click()
                                                except:
                                                    try:
                                                        temp_buttons = driver.find_elements(By.XPATH, '//div[@class="d-flex justify-content-between m-2 p-2 bg-light"]//div[button]')
                                                        driver.execute_script(js_code, temp_buttons[i])
                                                        WebDriverWait(driver, delay, ignored_exceptions=ignored_exceptions).until(EC.element_to_be_clickable((temp_buttons[i]))).click()
                                                    except Exception as e:
                                                        logging.error('Error clicking button: %s', e)
                                                        continue
                                                # wait for page to load
                                                time.sleep(1)
                                                # option 1: image is in iframe
                                                try:
                                                    logging.info('Try downloading image from iframe...')
                                                    # wait until image loaded
                                                    img = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//iframe'))).get_attribute('src')
                                                    # download image from url
                                                    urllib.request.urlretrieve(img, path + img_filename)
                                                    logging.info('Image downloaded')
                                                # option 2: image is in embed
                                                except Exception as e:
                                                    logging.error('Error downloading image: %s', e)
                                                    try:
                                                        logging.info('Image not in iframe. Trying embed...')
                                                        # wait until image loaded
                                                        img = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//embed'))).get_attribute('original-url')
                                                        # download image from url
                                                        urllib.request.urlretrieve(img, path + img_filename)
                                                        logging.info('Image downloaded')
                                                    except Exception as e:
                                                        logging.error('Error downloading image: %s', e)
                                                        # create text file with img name
                                                        with open(path + img_name + '.txt', 'w') as f:
                                                            f.write('Image not available')
                                                        # log text file created
                                                        logging.info('Text file created')
                                                        driver.get(LGA_dict[LGA][ward]['link'])
                                                        continue
                                                # go back to previous page
                                                driver.get(LGA_dict[LGA][ward]['link'])
                                                # wait for page to load
                                                time.sleep(1)
                                            # if error occurs, log error
                                            except Exception as e:
                                                logging.error('Error clicking button: %s', e)
                                                continue
                                    # if error occurs, log error
                                    except Exception as e:
                                        logging.error('Error scrolling to button: %s', e)
                                        continue
                                # if error occurs, log error
                                except Exception as e:
                                    logging.error('Error while waiting for buttons to appear: %s', e)
                                    continue
                            else:
                                # if image already exists, skip
                                # log that image already exists
                                logging.info('Image already exists')
                                continue

                        # check if number of images downloaded is equal to number of polling units
                        if len(os.listdir(path)) == len(clickable_buttons):
                            # if yes, mark ward as done
                            LGA_dict[LGA][ward]['done'] = True
                            with open('LGA_dict.json', 'w') as fp:
                                json.dump(LGA_dict, fp)
                            # log success
                            logging.info('Ward %s scraped successfully', ward)

                    # if error occurs, skip ward and log error
                    except Exception as e:
                        logging.error('Error scraping ward %s: %s', ward, e)
                        continue
            else:
                # if ward has already been scraped, skip
                continue

    # once all wards have been scraped, save dictionary to json file
    with open('LGA_dict.json', 'w') as fp:
        json.dump(LGA_dict, fp)
        driver.quit()

# in case of keyboard interrupt, save dictionary to json file
except KeyboardInterrupt:
    with open('LGA_dict.json', 'w') as fp:
        json.dump(LGA_dict, fp)
    driver.quit()