In [75]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys #gives us access to keys such as Enter, Backspace, Tab, Escape etc
from selenium.webdriver.support.ui import WebDriverWait as W
from selenium.webdriver.support import expected_conditions as E
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import pandas as pd
import time

In [37]:
#Search fields for api call
#$ curl -v -H "Accept: text/csv" -H "Authorization: Basic <>" "https://epc.opendatacommunities.org/api/v1/non-domestic/search?postcode=M1"

In [76]:
df_ec_cepc = pd.read_csv('./data/ec-certificates.csv')

In [77]:
#EXTRACTS ALL UNIQUE ZIPCODES WHERE CEPCs ARE FOUND
target_zipcodes = df_ec_cepc['POSTCODE'].sort_values().unique().tolist()
target_zipcodes_sample = target_zipcodes[4:25]
target_zipcodes_sample

['EC1A 2AT',
 'EC1A 2BN',
 'EC1A 2DH',
 'EC1A 2DJ',
 'EC1A 2DP',
 'EC1A 2EJ',
 'EC1A 2FD',
 'EC1A 2FG',
 'EC1A 4AS',
 'EC1A 4EN',
 'EC1A 4HD',
 'EC1A 4HJ',
 'EC1A 4HT',
 'EC1A 4HY',
 'EC1A 4JA',
 'EC1A 4JN',
 'EC1A 4JP',
 'EC1A 4JQ',
 'EC1A 4JR',
 'EC1A 4JU',
 'EC1A 4LX']

In [78]:
#INITIALIZE DATAFRAME + GET TO POSTCODE ENTRY PAGE
df_target = pd.DataFrame(columns = ['Address', 'Inspection Date', 'Inspection Level','Assessment software', 'Assessor’s declaration', 
              'F-Gas compliant date', 'Total effective rated output', 'System sampling', 
                'Treated floor area', 'Subsystems metered','Total estimated refrigerant charge' ,'Hyperlink'])

driver = webdriver.Chrome()
wait_time_out = 5
wait_variable = W(driver, wait_time_out)
#We now need to run our previous script for every ZIPCODE in this target_zipcodes list (108547 unique zipcodes)
driver.get("https://www.gov.uk/find-energy-certificate")
time.sleep(1)

try:
    accept_cookies_1 = W(driver, 2).until(
        EC.element_to_be_clickable(
            (By.CSS_SELECTOR, "#global-cookie-message > div.govuk-cookie-banner.js-banner-wrapper > div > div.govuk-button-group > button:nth-child(1)")
        )
    )
except NoSuchElementException:
    pass
accept_cookies_1.click()

#Get to zipcode entry page
start_now = driver.find_element(By.CSS_SELECTOR, "#get-started > a")
start_now.click()

#Accept analytical cookies
try:
    accept_cookies_2 = W(driver, 2).until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="accept-button"]')
        )
    )
except NoSuchElementException:
    pass
accept_cookies_2.click()

#pick domestic / non_domestic , find element by id
click_domestic = driver.find_element(By.ID, "non_domestic")
click_domestic.click()
click_continue = driver.find_element(By.CSS_SELECTOR, "#main-content > form > fieldset > button")
click_continue.click()

In [79]:
#RUN OUR SCRAPING SCRIPT LOOPED FOR EVERY POSTCODE IN THE LIST - THIS SCRAPES "AC-CERT" occurences on every page.

for zipcode in target_zipcodes_sample:
    element = driver.find_element(By.ID, "postcode")
    element.clear()
    element.send_keys(zipcode)
    #element.send_keys(zipcode)
    element.send_keys(Keys.ENTER)  #same as hitting enter
    time.sleep(0.25)

    #will stop 
    links = []
    
    #if link(s) present, run loop, if no link present, go back() and run through next zip code
    link_amount = len(driver.find_elements(By.PARTIAL_LINK_TEXT, "AC-CERT\n"))
    isPresent = link_amount > 0
    print(len(driver.find_elements(By.PARTIAL_LINK_TEXT, "AC-CERT\n")))
    
    #this for loop creates a list of certificate links where link text contains CEPC\n
    if isPresent == True :
        for link in wait_variable.until(E.visibility_of_any_elements_located((By.PARTIAL_LINK_TEXT, "AC-CERT\n"))):
            links.append(link.get_attribute('href'))
        #for i in len(cepc_links):    
        #    driver.back()
        #    time.sleep(0.2)
        #this for loop runs through the list of links created above, scrapes the fields needed and append them to our DataFrame, handling missing data

        for link in links:
            pagedata = []
            driver.get(link)
            TITLE = driver.title
            print(TITLE)
            time.sleep(0.25)
            
            #Test presence of Address, scrape it
            try:
                address = driver.find_element(By.XPATH, '//*[@id="main-content"]/div/div[3]/div[2]/div/div[1]/span[1]')
                address2 = driver.find_element(By.XPATH, '//*[@id="main-content"]/div/div[3]/div[2]/div/div[1]/span[2]')
                address3 = driver.find_element(By.XPATH, '//*[@id="main-content"]/div/div[3]/div[2]/div/div[1]/span[3]')
                address4 = driver.find_element(By.XPATH, '//*[@id="main-content"]/div/div[3]/div[2]/div/div[1]/span[4]')
                full_address = address.text + ' ' + address2.text + ' ' + address3.text + ' ' + address4.text
                pagedata.append(full_address) 
                print(full_address)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            #Test presence of Inspection Date , scrape it
            try:
                inspection_date = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[1]/dd')
                pagedata.append(inspection_date.text) 
                print(inspection_date.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass

            #Test presence of Inspection Date , scrape it
            try:    
                inspection_level = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[2]/dd')
                pagedata.append(inspection_level.text)
                print(inspection_level.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            #Test presence of Inspection Date , scrape it
            try:    
                assessment_software = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[3]/dd')
                pagedata.append(assessment_software.text) 
                print(assessment_software.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            #Test presence of Inspection Date , scrape it
            try:
                assessment_declaration = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[4]/dd')
                pagedata.append(assessment_declaration.text) 
                print(assessment_declaration.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            #Test presence of Inspection Date , scrape it
            try:
                fgas_date = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[5]/dd')
                pagedata.append(fgas_date.text)
                print(fgas_date.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            #Test presence of Inspection Date , scrape it
            try:
                total_rated_output = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[6]/dd')
                pagedata.append(total_rated_output.text)
                print(total_rated_output.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            try:
                system_sampling = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[7]/dd')
                pagedata.append(system_sampling.text)
                print(system_sampling.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass

            try:
                treated_floor_area = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[8]/dd')
                pagedata.append(treated_floor_area.text)
                print(treated_floor_area.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            try:
                subsystems_metered = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[9]/dd')
                pagedata.append(subsystems_metered.text)
                print(subsystems_metered.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass
            
            try:
                tot_est_refrigerant_charge = driver.find_element(By.XPATH, '//*[@id="assessment_details"]/dl/div[10]/dd')
                pagedata.append(tot_est_refrigerant_charge.text)
                print(tot_est_refrigerant_charge.text)
            except NoSuchElementException:
                pagedata.append('NA')
                pass

            pagedata.append(link)

            df_target.loc[link] = pagedata
            print(df_target.loc[link])
         
        for i in range(0, link_amount + 1):
            driver.back()
            time.sleep(0.25)
    else:
        driver.back()

3
Air conditioning inspection certificate – Find an energy certificate – GOV.UK
Lovells 21 Holborn Viaduct LONDON EC1A 2AT
22 June 2017
Level 4
CLG, ACReport, v2.0
Not related to the owner/occupier or person who has technical control of the system or subcontractor.
14 July 2017
775 kW
Yes
5570 square metres
No
38 kg
Address                                      Lovells 21 Holborn Viaduct LONDON EC1A 2AT
Inspection Date                                                            22 June 2017
Inspection Level                                                                Level 4
Assessment software                                                 CLG, ACReport, v2.0
Assessor’s declaration                Not related to the owner/occupier or person wh...
F-Gas compliant date                                                       14 July 2017
Total effective rated output                                                     775 kW
System sampling                                                   

3
Air conditioning inspection certificate – Find an energy certificate – GOV.UK
18 January 2016
Level 4
CLG, ACReport, v2.0
Not related to the owner/occupier or person who has technical control of the system or subcontractor.
3 November 2015
12044 kW
No
78000 square metres
No
2400 kg
Address                                                                              NA
Inspection Date                                                         18 January 2016
Inspection Level                                                                Level 4
Assessment software                                                 CLG, ACReport, v2.0
Assessor’s declaration                Not related to the owner/occupier or person wh...
F-Gas compliant date                                                    3 November 2015
Total effective rated output                                                   12044 kW
System sampling                                                                      No
Treated flo

3
Air conditioning inspection certificate – Find an energy certificate – GOV.UK
20 July 2017
Level 4
Sterling Accreditation, Sterling e-Volve, v1.2
Not related to the owner/occupier or person who has technical control of the system or subcontractor.
Not Provided
5161 kW
Yes
44068 square metres
In part
1170 kg
Address                                                                              NA
Inspection Date                                                            20 July 2017
Inspection Level                                                                Level 4
Assessment software                      Sterling Accreditation, Sterling e-Volve, v1.2
Assessor’s declaration                Not related to the owner/occupier or person wh...
F-Gas compliant date                                                       Not Provided
Total effective rated output                                                    5161 kW
System sampling                                                          

Air conditioning inspection certificate – Find an energy certificate – GOV.UK
LAND SECURITIES 140 Aldersgate Street LONDON EC1A 4HY
9 December 2015
Level 4
Sterling Accreditation, Sterling e-Volve, v1.2
Not related to the owner/occupier or person who has technical control of the system or subcontractor.
7 December 2015
1504 kW
No
6536 square metres
No
335 kg
Address                               LAND SECURITIES 140 Aldersgate Street LONDON E...
Inspection Date                                                         9 December 2015
Inspection Level                                                                Level 4
Assessment software                      Sterling Accreditation, Sterling e-Volve, v1.2
Assessor’s declaration                Not related to the owner/occupier or person wh...
F-Gas compliant date                                                    7 December 2015
Total effective rated output                                                    1504 kW
System sampling        

Air conditioning inspection certificate – Find an energy certificate – GOV.UK
2 February 2022
Level 4
ACE Wizard, V1.0
Not related to the owner/occupier or person who has technical control of the system or subcontractor.
Not Provided
169 kW
Yes
1330 square metres
No
47 kg
Address                                                                              NA
Inspection Date                                                         2 February 2022
Inspection Level                                                                Level 4
Assessment software                                                    ACE Wizard, V1.0
Assessor’s declaration                Not related to the owner/occupier or person wh...
F-Gas compliant date                                                       Not Provided
Total effective rated output                                                     169 kW
System sampling                                                                     Yes
Treated floor area     

In [80]:
df_target

Unnamed: 0,Address,Inspection Date,Inspection Level,Assessment software,Assessor’s declaration,F-Gas compliant date,Total effective rated output,System sampling,Treated floor area,Subsystems metered,Total estimated refrigerant charge,Hyperlink
https://find-energy-certificate.service.gov.uk/energy-certificate/0260-8980-0673-8290-3060,Lovells 21 Holborn Viaduct LONDON EC1A 2AT,22 June 2017,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,14 July 2017,775 kW,Yes,5570 square metres,No,38 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9587-6061-0079-0900-7401,Tesco 21 Holborn Viaduct LONDON EC1A 2AT,3 September 2013,Level 3,"Sterling Accreditation, Sterling e-Volve, v1.2",Contracted by the owner to provide other (non-...,Not Provided,13 kW,No,254 square metres,No,9 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0450-0968-8099-7692-4002,,14 February 2018,Level 3,"Sterling Accreditation, Sterling e-Volve, v1.2",Contracted by the owner to provide other Energ...,Not Provided,23 kW,Yes,217 square metres,No,9 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9200-5969-0648-4550-3064,,25 October 2018,Level 4,"Sterling Accreditation, Sterling e-Volve, v1.2",Not related to the owner/occupier or person wh...,Not Provided,170 kW,Yes,1879 square metres,No,108 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9700-1930-0638-2960-8034,,31 October 2018,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,6 July 2018,2840 kW,Yes,21600 square metres,No,644 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0930-0163-9429-9126-8002,Hogan Lovells International Llp Atlantic House...,21 June 2017,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,14 July 2017,4080 kW,Yes,24150 square metres,No,260 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0990-6941-0626-1820-1034,,18 January 2016,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,3 November 2015,12044 kW,No,78000 square metres,No,2400 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9362-6076-0239-0400-0521,Fidelity Management & Research 1 St. Martin's ...,20 June 2017,Level 3,"Sterling Accreditation, Sterling e-Volve, v1.2",Contracted by the owner to provide air conditi...,17 June 2017,67 kW,No,350 square metres,No,44 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/7423-9088-3565-3008-4933,,17 August 2021,Level 4,"Quidos, AIRS, v2.0",Not related to the owner/occupier or person wh...,Not Provided,7020 kW,Yes,25000 square metres,No,2386 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0694-9656-9760-2800-2103,,12 May 2019,Level 4,"Sterling Accreditation, Sterling e-Volve, v1.2",Not related to the owner/occupier or person wh...,29 January 2019,984 kW,Yes,5310 square metres,Yes,262 kg,https://find-energy-certificate.service.gov.uk...


In [74]:
df_target

Unnamed: 0,Address,Inspection Date,Inspection Level,Assessment software,Assessor’s declaration,F-Gas compliant date,Total effective rated output,System sampling,Treated floor area,Subsystems metered,Total estimated refrigerant charge,Hyperlink
https://find-energy-certificate.service.gov.uk/energy-certificate/0260-8980-0673-8290-3060,Lovells 21 Holborn Viaduct LONDON EC1A 2AT,22 June 2017,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,14 July 2017,775 kW,Yes,5570 square metres,No,38 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9587-6061-0079-0900-7401,Tesco 21 Holborn Viaduct LONDON EC1A 2AT,3 September 2013,Level 3,"Sterling Accreditation, Sterling e-Volve, v1.2",Contracted by the owner to provide other (non-...,Not Provided,13 kW,No,254 square metres,No,9 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0450-0968-8099-7692-4002,,14 February 2018,Level 3,"Sterling Accreditation, Sterling e-Volve, v1.2",Contracted by the owner to provide other Energ...,Not Provided,23 kW,Yes,217 square metres,No,9 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9200-5969-0648-4550-3064,,25 October 2018,Level 4,"Sterling Accreditation, Sterling e-Volve, v1.2",Not related to the owner/occupier or person wh...,Not Provided,170 kW,Yes,1879 square metres,No,108 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9700-1930-0638-2960-8034,,31 October 2018,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,6 July 2018,2840 kW,Yes,21600 square metres,No,644 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0930-0163-9429-9126-8002,Hogan Lovells International Llp Atlantic House...,21 June 2017,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,14 July 2017,4080 kW,Yes,24150 square metres,No,260 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0990-6941-0626-1820-1034,,18 January 2016,Level 4,"CLG, ACReport, v2.0",Not related to the owner/occupier or person wh...,3 November 2015,12044 kW,No,78000 square metres,No,2400 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/9362-6076-0239-0400-0521,Fidelity Management & Research 1 St. Martin's ...,20 June 2017,Level 3,"Sterling Accreditation, Sterling e-Volve, v1.2",Contracted by the owner to provide air conditi...,17 June 2017,67 kW,No,350 square metres,No,44 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/7423-9088-3565-3008-4933,,17 August 2021,Level 4,"Quidos, AIRS, v2.0",Not related to the owner/occupier or person wh...,Not Provided,7020 kW,Yes,25000 square metres,No,2386 kg,https://find-energy-certificate.service.gov.uk...
https://find-energy-certificate.service.gov.uk/energy-certificate/0694-9656-9760-2800-2103,,12 May 2019,Level 4,"Sterling Accreditation, Sterling e-Volve, v1.2",Not related to the owner/occupier or person wh...,29 January 2019,984 kW,Yes,5310 square metres,Yes,262 kg,https://find-energy-certificate.service.gov.uk...


In [81]:
import os
os.makedirs('./data', exist_ok=True)  
df_target.to_csv('./data/output.csv')  