# Referendum Results

This notebook scrapes the voting results in the membership of the European Union on 23rd June, 2016. This is scraped from the BBC website

In [4]:
# Check Python version for compatibility/reference
import sys
print(sys.executable)
print(sys.version)
print(sys.version_info)

/Applications/JupyterLab.app/Contents/Resources/jlab_server/bin/python
3.8.12 | packaged by conda-forge | (default, Sep 16 2021, 01:59:00) 
[Clang 11.1.0 ]
sys.version_info(major=3, minor=8, micro=12, releaselevel='final', serial=0)


In [5]:
# Install seaborn package in the current Jupyter kernel
!{sys.executable} -m pip install beautifulsoup4

Defaulting to user installation because normal site-packages is not writeable
Collecting beautifulsoup4
  Using cached beautifulsoup4-4.10.0-py3-none-any.whl (97 kB)
Collecting soupsieve>1.2
  Using cached soupsieve-2.2.1-py3-none-any.whl (33 kB)
Installing collected packages: soupsieve, beautifulsoup4
Successfully installed beautifulsoup4-4.10.0 soupsieve-2.2.1


In [6]:
# Import requisite libraries for analysis
from collections import defaultdict
from decimal import Decimal
import string
import requests
from bs4 import BeautifulSoup

In [7]:
# Set a constant for the alphabet's letters
ALPHABET = string.ascii_uppercase
print(ALPHABET)

ABCDEFGHIJKLMNOPQRSTUVWXYZ


In [8]:
# Set the base url in readiness to append the letter
BASE_URL = 'https://www.bbc.co.uk/news/politics/eu_referendum/results/local/'

In [9]:
# Set initial variables and results dictionary
results = defaultdict(list)
leave_votes, remain_votes = 0, 0

In [10]:
# Scrape content from each lettered page from the BBC website
# Print the status code returned from each page for review
for letter in ALPHABET:
    page_response = requests.get(f"{BASE_URL}{letter}", timeout=5)
    print(f"{'URL:'} {BASE_URL}{letter}{' | Status Code: '}{page_response.status_code}")
    if page_response:
        page_content = BeautifulSoup(page_response.content, "html.parser")
        areas = page_content.find_all('div', attrs={'class': 'eu-ref-result-bar'})
        for area in areas:
            results['area_name'].append(area.find('h3').getText())
            cleaned_leave_votes = int(area.find_all('div',
                                    {'class': 'eu-ref-result-bar__votes'}
                                    )[0].string.strip().split('\n')[0].strip().replace(',', ''))
            results['leave_votes'].append(cleaned_leave_votes)
            cleaned_remain_votes = int(area.find_all('div',
                                    {'class': 'eu-ref-result-bar__votes'}
                                    )[1].string.strip().split('\n')[0].strip().replace(',', ''))
            results['remain_votes'].append(cleaned_remain_votes)
            area_votes = cleaned_leave_votes + cleaned_remain_votes
            results['area_votes'].append(area_votes)
            results['leave_percent'].append(f"{cleaned_leave_votes / area_votes:.1%}")
            results['remain_percent'].append(f"{cleaned_remain_votes / area_votes:.1%}")
            results['turnout'].append(area.find(
                'div', {'class': 'eu-ref-result-bar__turnout'})
                .getText().replace('Turnout: ', '')
            )
            leave_votes = sum(Decimal(num) for num in results['leave_votes'])
            remain_votes = sum(Decimal(num) for num in results['remain_votes'])
    else:
        continue

URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/A | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/B | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/C | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/D | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/E | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/F | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/G | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/H | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/I | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/J | Status Code: 404
URL: https://www.bbc.co.uk/news/politics/eu_referendum/results/local/K | Status Code: 404
URL: https

In [11]:
# Print a totals summary for the number of leave and remain votes
print(f"Leave votes: {leave_votes:,}\nRemain votes: {remain_votes:,}")

Leave votes: 0
Remain votes: 0


In [12]:
# Zip together the individual components of an area's result
area_results = zip(
    results['area_name'],
    results['leave_votes'],
    results['leave_percent'],
    results['remain_votes'],
    results['remain_percent'],
    results['area_votes'],
    results['turnout'],
)

In [13]:
# Print the results for each area
results = []
for area in area_results:
    results.append(area)

In [14]:
results

[]