# DNB automation

Uploads a list of company names to the Dun and Bradstreet portal.

## Setup
Setup node version and conda environment with jupyterlab and selenium:
```
nvm use 16
conda activate web
```

Set the DNB_PASSWORD as an environment variable before launching jupyter lab.
```
 DNB_PASSWORD='abc' jupyter lab
```
Note: a space before the command does not write the command in history. To check, run `setop` in zsh and check `histignorespace` is in list.

## IMPORTANT: Always logout from DNB before closing the window
Otherwise you will not be able to log back in.

# Load modules

In [193]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.chrome.options import Options

# PART 1: Get list of companies with single match

In [9]:
startup_list = pd.read_excel('Catalonia Startups 2021.xlsx')
# startup_names = startup_list['NOM DE LA STARTUP']
startup_df = startup_list['MARCA']
startup_df = startup_df.reset_index()
startup_df.columns=['id', 'name']
startup_df['matches']=np.nan

## Threshold

In [10]:
start_pos = 0
# start_pos = 1600
# batch_size = None
batch_size = 10
if not batch_size:
    batch_size = len(startup_df) - start_pos
print(f"Positions: {start_pos}-{start_pos+batch_size}.")

Positions: 0-10.


## Get DNB Password

In [11]:
# Get DNB password from env variables. You need to set env vars before launching jupyter lab.
# E.g.: export DNB_PASSWORD='abc'; jupyter lab
password = os.environ['DNB_PASSWORD']

## Automation

In [12]:
print(f"Opening DNB App, calculating matches in positions: {start_pos}-{start_pos+batch_size}.")

# Open DNB on Chrome
s=Service('/Users/fer/Dropbox/datascience/selenium/chromedriver')
driver = webdriver.Chrome(service=s)
URL = "https://app.dnbhoovers.com/search/company"
driver.get(URL)

try:
    # Input username
    username = driver.find_element(by=By.CSS_SELECTOR, value='input#username')
    username.send_keys("fernando@aretian.com")
    login_btn = driver.find_element(by=By.CSS_SELECTOR, value='form#login button.continue-btn')
    login_btn.click()
    
    # Input password
    password_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "input#password"))
    )
    password_input.send_keys(password)
    
    # Click login
    login_btn.click()
    
    # Add country
    selector = '#rc_select_5'
    country_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    country_input.send_keys("Spain")
    
    selector = "div[label='Spain'][type='country'][title='Spain']"
    country_list = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    country_list.click()
    
    # Add provinces
    provinces = ['Barcelona, Spain', 'Girona, Spain']
    
    selector = '#rc_select_4'
    province_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    
    for province in provinces:
        province_input.send_keys(province)
        selector = f"div[label='{province}'][type='stateOrProvince'][title='{province}']"
        province_list = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        province_list.click()
    
    # Locate Company Name input box
    selector = '#companyNameFilter > div:nth-child(3) > div:nth-child(1) > input'
    company_name_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    # Locate Add Button
    selector = '#companyNameFilter > div:nth-child(3) > div.ant-col.spacer-x-small-left > button'
    add_btn = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    # Locate n_results element
    n_results_selector = '#bal-content > div.filter-results-header > div.sticky-outer-wrapper > div > div.ant-row.ant-row-space-between.ant-row-middle > div.ant-col.ant-col-8 > span > span'
    # Locate cross button
    cross_selector = '#selected-filter-companyNameFilter > ul > li:last-child > svg'
    
    for index, company in startup_df[start_pos:start_pos+batch_size].iterrows():
        print(f"Progress: {(index-start_pos)/batch_size*100:.0f}% ({index}/{start_pos+batch_size}).", end="\r")
        # Add company name to input box
        company_name_input.send_keys(company['name'])

        # Click on Add button
        add_btn.click()
        
        # Check number of results
        try:
            results_element = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, n_results_selector)))
        except Exception as e:
            print(f"{company['name']}. Exception: {e}.")
            n_results = -1
        
        # Store results
        n_results = int(results_element.text.replace(",",""))
        startup_df.loc[index, 'matches'] = n_results
        
        # Remove company from list
        while True:
            cross_btn = driver.find_element(by=By.CSS_SELECTOR, value=cross_selector)
            cross_btn.click()
            try:
                selector = '#selected-filter-companyNameFilter'
                company_filter = driver.find_element(by=By.CSS_SELECTOR, value=selector)
                time.sleep(0.5)
            except NoSuchElementException:
                break
    
    print("")
    print("Done.")

except Exception as e:
    print(e)

Opening DNB App, calculating matches in positions: 0-10.
Progress: 90% (9/10).
Done.


## Logout and Quit

In [13]:
# Get logout popup
# driver.find_element(by=By.TAG_NAME, value='body').send_keys(Keys.CONTROL + Keys.HOME)
driver.execute_script("scrollBy(0,-10000);")

# selector = '//*[@id="main"]/section/div[2]/div/header/ul[2]/li[3]'
selector = '#main > section > div.headroom-wrapper > div > header > ul.ant-menu-overflow.ant-menu.ant-menu-root.ant-menu-horizontal.ant-menu-dark.side-nav-menu > li:nth-child(3) > div > span > span > svg'
# account_btn = driver.find_element(by=By.CSS_SELECTOR, value=selector)
account_btn = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
account_btn.click()

# Click on logout button
selector = 'ul[data-menu-list="true"].ant-menu.ant-menu-sub.ant-menu-vertical > li:nth-child(4)'
logout_btn = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
logout_btn.click()

selector = '#content > h3'

logged_out = False
while not logged_out:
    try: 
        logged_out_msg = driver.find_element(by=By.CSS_SELECTOR, value=selector)
        if logged_out_msg.text == 'You are now logged out.':
            logged_out = True
    except:
        pass
        
driver.quit()

## Quit

In [None]:
# driver.quit()

## Concatenate all results

In [271]:
final_df = pd.concat([startup_df_0[:700], startup_df_1[700:1600], startup_df_2[1600:]])
final_df.to_csv('startup_df.csv', index=None)

In [None]:
# View results
final_df['matches'].value_counts()

In [282]:
# Get companies with single match
startup_matched_df = final_df.loc[final_df['matches']==1]
startup_matched_df.to_csv('startup_matched_df.csv', index=None)

# PART 2: Download companies with single match

## Load data

In [144]:
startup_df = pd.read_csv('startup_matched_df.csv')

## Select threshold

In [141]:
start_pos = 0
batch_size = 24 # This is the max size that can fit on a DNB results page (it is 25 but we need space for a placeholder company)
if not batch_size:
    batch_size = len(startup_matched_df) - start_pos
print(f"Positions: {start_pos}-{start_pos+batch_size}.")

startup_batches = [startup_df[i:i+batch_size] for i in range(start_pos, len(startup_df), batch_size)]

Positions: 0-24.


In [142]:
# Uncomment to start run where we dropped off after last error
# restart_batch = 0
restart_batch = 15
if not restart_batch:
    restart_batch = 0
print(f"Restarting from batch: {restart_batch}.")
startup_batches = startup_batches[restart_batch:]

Restarting from batch: 15.


## Login to DNB

In [134]:
print(f"Opening DNB App, getting all startups in: {start_pos}-{start_pos+batch_size}.")

# Open DNB on Chrome
s=Service('/Users/fer/Dropbox/datascience/selenium/chromedriver')
driver = webdriver.Chrome(service=s)
URL = "https://app.dnbhoovers.com/search/company"
driver.get(URL)

try:
    time.sleep(random.random())
    
    # Input username
    username = driver.find_element(by=By.CSS_SELECTOR, value='input#username')
    username.send_keys("fernando@aretian.com")
    login_btn = driver.find_element(by=By.CSS_SELECTOR, value='form#login button.continue-btn')
    login_btn.click()
    
    time.sleep(random.random())
    
    # Input password
    password_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "input#password")))
    password_input.send_keys(password)
    
    time.sleep(random.random())
    
    # Click login
    login_btn.click()

except Exception as e:
    print(e)

Opening DNB App, getting all startups in: 0-25.


## Add country and province filter

In [135]:
try:
    time.sleep(random.random())
    
    # Add country
    selector = '#rc_select_5'
    country_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    country_input.send_keys("Spain")
    
    time.sleep(random.random())
    
    selector = "div[label='Spain'][type='country'][title='Spain']"
    country_list = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    country_list.click()
    
    # Add provinces
    provinces = ['Barcelona, Spain', 'Girona, Spain']
    
    selector = '#rc_select_4'
    province_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    
    for province in provinces:
        
        time.sleep(random.random())
        
        province_input.send_keys(province)
        selector = f"div[label='{province}'][type='stateOrProvince'][title='{province}']"
        province_list = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        province_list.click()
    
    # Select first company
    company = startup_df.loc[0]
    
    time.sleep(random.random())
        
    # Add company name to input box
    selector = '#companyNameFilter > div:nth-child(3) > div:nth-child(1) > input'
    company_name_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    company_name_input.send_keys(company['name'])
    
    time.sleep(random.random())
    
    # Click on Add button
    selector = '#companyNameFilter > div:nth-child(3) > div.ant-col.spacer-x-small-left > button'
    add_btn = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    add_btn.click()
    
    time.sleep(random.random())
    
    # Click on View Results
    selector = '#bal-content > div.filter-results-header > div.sticky-outer-wrapper.sticky-results > div > div.ant-row.ant-row-space-between.ant-row-middle > div.ant-col.ant-col-16 > button'
    view_results_btn = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    view_results_btn.click()
    
    time.sleep(random.random())
    
except Exception as e:
    print(e)

## Get companies

Run from this block if the code gets stuck.

In [143]:
try:
    # Wait until loading some results
    selector = '#main > section > main > section > main > main > section > main > div > div:nth-child(3) > div.result-container > div > ul > div:nth-child(1)'
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    
    # Add rest of companies one by one
    for batch_i, batch in enumerate(startup_batches):
        print(f"Batch: {batch_i+restart_batch}/{len(startup_batches)+restart_batch}.")

        time.sleep(random.random())
        
        # Remove all companies from list except first
        print('Cleaninig company name filter.')
        cross_selector = '#selected-filter-companyNameFilter > ul > li:nth-child(2) > svg'
        while True:
            try:
                cross_btn = driver.find_element(by=By.CSS_SELECTOR, value=cross_selector)
                cross_btn.click()
                # selector = '#companyNameFilter'
                # company_filter = driver.find_element(by=By.CSS_SELECTOR, value=selector)
                time.sleep(random.random())
            except NoSuchElementException:
                break
        
        # Add companies in current batch to company filter
        print(f"Adding companies to company filter. First company name: {batch.iloc[0]['name']}.")
        for index, company in batch.iterrows():
            # print(f"Progress: {(index-start_pos)/batch_size*100:.0f}% ({index}/{start_pos+batch_size}).", end="\r")

            # Sleep every 5 items
            if index % 5 == 0:
                time.sleep(1)

            time.sleep(random.random())
            
            # Add company name to input box
            selector = '#companyNameFilter > div:nth-child(3) > div:nth-child(1) > input'
            company_name_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
            company_name_input.send_keys(company['name'])

            time.sleep(random.random())
            
            # Click on Add button
            selector = '#companyNameFilter > div:nth-child(3) > div.ant-col.spacer-x-small-left > button'
            add_btn = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
            driver.execute_script("return arguments[0].scrollIntoView(true);", add_btn)
            add_btn.click()

        time.sleep(random.random())
        
        # Select all companies
        print("Selecting all companies.")
        selector = '#main > section > main > section > main > main > section > main > div > div:nth-child(3) > div.sticky-outer-wrapper > div > div.ant-row.ant-row-space-between.search-results-header > div.ant-col.ant-col-11 > label > span'
        select_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        select_btn.click()

        time.sleep(random.random())
        
        # Click on Create A List Dropdown menu
        print("Clicking on 'Create A List'.")
        selector = '#main > section > main > section > main > main > section > main > div > div:nth-child(3) > div.sticky-outer-wrapper > div > div.ant-row.ant-row-middle.results-row > div.ant-col.ant-col-16.result-actions > div.create-new-list > button'
        create_list_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        create_list_btn.click()

        time.sleep(random.random())
        
        # Click on add to list
        print("Clicking on 'Add to List'.")
        selector = 'ul[role="menu"][tabindex="0"][data-menu-list="true"].ant-dropdown-menu.ant-dropdown-menu-root.ant-dropdown-menu-vertical.ant-dropdown-menu-light > li[role="menuitem"][tabindex="-1"].ant-dropdown-menu-item:nth-child(2) > span.ant-dropdown-menu-title-content'
        add_to_list_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        add_to_list_btn.click()

        time.sleep(random.random())
        
        # Select Barcelona list
        print("Clicking on 'Barcelona'.")
        selector = 'div.ant-modal-wrap > div > div.ant-modal-content > div.ant-modal-body > form > div > div.ant-col.ant-form-item-control > div > div > div > div > label:nth-child(1) > span.ant-checkbox'
        select_list_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        select_list_btn.click()

        time.sleep(random.random())
        
        # Click on 'Add'
        print("Clicking on 'Add'.")
        selector = 'div.ant-modal-wrap > div > div.ant-modal-content > div.ant-modal-footer > button.ant-btn.ant-btn-primary'
        add_btn = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        add_btn.click()

        time.sleep(random.random())
        
        # Wait for success message
        print("Waiting for success message.")
        selector = 'div > div > div > div.ant-notification-notice-description > div'
        success_text = 'Added items to Barcelona Innovation Triangle Startups successfully.'
        tries = 0
        success = 0
        while tries < 10:
            success_box = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, selector)))
            if success_box.text == success_text:
                print('Batch successfully added to company list.')
                success = 1
                break
            else:
                tries += 1
                time.sleep(1)
                
        if success == 0:
            print("There was an error adding batch to company list.")
            
        print("")
        
    print("")
    print("Done.")

except Exception as e:
    print(f"There was an error while running batch {restart_batch+batch_i}.")
    print("Full error message:")
    print(e)

Progress: batch 15/16.
Cleaninig company name filter.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name: {batch.iloc[0]['name']}.
Adding companies to company filter. First company name:

## Logout and Quit


In [121]:
# Get logout popup
# driver.find_element(by=By.TAG_NAME, value='body').send_keys(Keys.CONTROL + Keys.HOME)
driver.execute_script("scrollBy(0,-10000);")

# selector = '//*[@id="main"]/section/div[2]/div/header/ul[2]/li[3]'
selector = '#main > section > div.headroom-wrapper > div > header > ul.ant-menu-overflow.ant-menu.ant-menu-root.ant-menu-horizontal.ant-menu-dark.side-nav-menu > li:nth-child(3) > div > span > span > svg'
# account_btn = driver.find_element(by=By.CSS_SELECTOR, value=selector)
account_btn = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
account_btn.click()

# Click on logout button
selector = 'ul[data-menu-list="true"].ant-menu.ant-menu-sub.ant-menu-vertical > li:nth-child(4)'
logout_btn = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
logout_btn.click()

selector = '#content > h3'

logged_out = False
while not logged_out:
    try: 
        logged_out_msg = driver.find_element(by=By.CSS_SELECTOR, value=selector)
        if logged_out_msg.text == 'You are now logged out.':
            logged_out = True
    except:
        pass
        
driver.quit()

## Quit

In [165]:
driver.quit()

# PART 3: Get DUNS numbers

## Load data

In [187]:
startup_df = pd.read_csv('startup_matched_df.csv')

## Create baches

In [188]:
start_pos = 0
batch_size = 24 # This is the max size that can fit on a DNB results page (it is 25 but we need space for a placeholder company)
if not batch_size:
    batch_size = len(startup_matched_df) - start_pos
startup_batches = [startup_df[i:i+batch_size] for i in range(start_pos, len(startup_df), batch_size)]

## Select starting point

In [189]:
# Uncomment to start run where we dropped off after last error
restart_batch = 0
# restart_batch = 30
if not restart_batch:
    restart_batch = 0
print(f"Restarting from batch: {restart_batch}.")
startup_batches = startup_batches[restart_batch:]

Restarting from batch: 0.


## Login to DNB

In [194]:
print(f"Opening DNB App, getting all startups in: {start_pos}-{start_pos+batch_size}.")

# Configure Selenium
s=Service('/Users/fer/Dropbox/datascience/selenium/chromedriver')
driver = webdriver.Chrome(service=s)

# Open DNB on Chrome
URL = "https://app.dnbhoovers.com/search/company"
driver.get(URL)

try:
    time.sleep(random.random())
    
    # Input username
    username = driver.find_element(by=By.CSS_SELECTOR, value='input#username')
    username.send_keys("fernando@aretian.com")
    login_btn = driver.find_element(by=By.CSS_SELECTOR, value='form#login button.continue-btn')
    login_btn.click()
    
    time.sleep(random.random())
    
    # Input password
    password_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "input#password")))
    password_input.send_keys(password)
    
    time.sleep(random.random())
    
    # Click login
    login_btn.click()

except Exception as e:
    print(e)

Opening DNB App, getting all startups in: 0-24.


## Add country and province filter and first company

In [195]:
try:
    time.sleep(random.random())
    
    # Add country
    selector = '#rc_select_5'
    country_input = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    country_input.send_keys("Spain")
    
    time.sleep(random.random())
    
    selector = "div[label='Spain'][type='country'][title='Spain']"
    country_list = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    country_list.click()
    
    # Add provinces
    provinces = ['Barcelona, Spain', 'Girona, Spain']
    
    selector = '#rc_select_4'
    province_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    
    for province in provinces:
        
        time.sleep(random.random())
        
        province_input.send_keys(province)
        selector = f"div[label='{province}'][type='stateOrProvince'][title='{province}']"
        province_list = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
        province_list.click()
    
    # Select first company
    company = startup_df.loc[0]
    
    time.sleep(random.random())
        
    # Add company name to input box
    selector = '#companyNameFilter > div:nth-child(3) > div:nth-child(1) > input'
    company_name_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    company_name_input.send_keys(company['name'])
    
    time.sleep(random.random())
    
    # Click on Add button
    selector = '#companyNameFilter > div:nth-child(3) > div.ant-col.spacer-x-small-left > button'
    add_btn = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    add_btn.click()
    
    time.sleep(random.random())
    
    # Click on View Results
    selector = '#bal-content > div.filter-results-header > div.sticky-outer-wrapper.sticky-results > div > div.ant-row.ant-row-space-between.ant-row-middle > div.ant-col.ant-col-16 > button'
    view_results_btn = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    view_results_btn.click()
    
    time.sleep(random.random())
    
except Exception as e:
    print(e)

Message: 



## Get companies

Run from this block if the code gets stuck.

In [177]:
try:
    # Wait until page fully loaded
    selector = '#main > section > main > section > main > main > section > main > div > div:nth-child(3) > div.result-container > div > ul > div:nth-child(1)'
    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    
    # Main loop: add companies
    for batch_i, batch in enumerate(startup_batches):
        print(f"Batch: {batch_i+restart_batch}/{len(startup_batches)+restart_batch}.")

        time.sleep(random.random())
        
        # Add companies in current batch to company filter
        for index, company in batch.iterrows():
            
            time.sleep(random.random())
            
            # Remove all companies from list except first
            print('Cleaninig company name filter.')
            cross_selector = '#selected-filter-companyNameFilter > ul > li:nth-child(2) > svg'
            while True:
                try:
                    cross_btn = driver.find_element(by=By.CSS_SELECTOR, value=cross_selector)
                    cross_btn.click()
                    time.sleep(random.random())
                except NoSuchElementException:
                    break
                    
            time.sleep(random.random())
            
            # Add company name to input box
            print(f"Adding company: {company['name']}.")
            selector = '#companyNameFilter > div:nth-child(3) > div:nth-child(1) > input'
            company_name_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
            company_name_input.send_keys(company['name'])

            time.sleep(random.random())
            
            # Click on Add button
            selector = '#companyNameFilter > div:nth-child(3) > div.ant-col.spacer-x-small-left > button'
            add_btn = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
            driver.execute_script("return arguments[0].scrollIntoView(true);", add_btn)
            add_btn.click()

            time.sleep(random.random())

            # Get DUNS number
            selector = '#main > section > main > section > main > main > section > main > div > div:nth-child(3) > div.result-container > div > ul > div:nth-child(2) > li > div.ant-row.relative-day-ago > div.ant-col.ant-col-14 > div > div:nth-child(4)'
            duns_div = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, selector)))
            duns_number = duns_div.text[9:]
            startup_df.loc[startup_df['id'] == company['id'], 'duns'] = duns_number

            print(f"Company name: {company['name']}. DUNS: {duns_number}.")
        
    print("Done.")

except Exception as e:
    print(f"There was an error while running batch {restart_batch+batch_i}.")
    print("Full error message:")
    print(e)

Batch: 30/31.
Cleaninig company name filter.
Adding company: Remote Eye.
Company name: Remote Eye. DUNS: 46-683-0956.
Cleaninig company name filter.
Adding company: Wild Audience.
Company name: Wild Audience. DUNS: 46-890-4142.
Cleaninig company name filter.
Adding company: WITECH.
Company name: WITECH. DUNS: 46-717-8873.
Cleaninig company name filter.
Adding company: Wisar.
Company name: Wisar. DUNS: 46-828-9158.
Cleaninig company name filter.
Adding company: WIZARDS ESPORTS CLUB.
Company name: WIZARDS ESPORTS CLUB. DUNS: 46-783-5417.
Cleaninig company name filter.
Adding company: Woffu.
Company name: Woffu. DUNS: 46-973-7207.
Cleaninig company name filter.
Adding company: Woman'sBack.
Company name: Woman'sBack. DUNS: 46-865-4039.
Cleaninig company name filter.
Adding company: Woodys Bcn.
Company name: Woodys Bcn. DUNS: 46-545-9432.
Cleaninig company name filter.
Adding company: Epinium.
Company name: Epinium. DUNS: 47-079-4420.
Cleaninig company name filter.
Adding company: Workmeter

## Logout and Quit

In [191]:
# Get logout popup
# driver.find_element(by=By.TAG_NAME, value='body').send_keys(Keys.CONTROL + Keys.HOME)
driver.execute_script("scrollBy(0,-10000);")

# selector = '//*[@id="main"]/section/div[2]/div/header/ul[2]/li[3]'
selector = '#main > section > div.headroom-wrapper > div > header > ul.ant-menu-overflow.ant-menu.ant-menu-root.ant-menu-horizontal.ant-menu-dark.side-nav-menu > li:nth-child(3) > div > span > span > svg'
# account_btn = driver.find_element(by=By.CSS_SELECTOR, value=selector)
account_btn = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
account_btn.click()

# Click on logout button
selector = 'ul[data-menu-list="true"].ant-menu.ant-menu-sub.ant-menu-vertical > li:nth-child(4)'
logout_btn = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
logout_btn.click()

selector = '#content > h3'

logged_out = False
while not logged_out:
    try: 
        logged_out_msg = driver.find_element(by=By.CSS_SELECTOR, value=selector)
        if logged_out_msg.text == 'You are now logged out.':
            logged_out = True
    except:
        pass
        
driver.quit()

## Quit

In [171]:
driver.quit()

# Check results match 

In [146]:
dnb_df = pd.read_excel('Barcelona Innovation Triangle Startups.xlsx')

In [148]:
startup_df.head()

Unnamed: 0,id,name,matches
0,1,011H,1.0
1,3,123SONAR INSTRUMENTS.,1.0
2,4,3D INTERACTIVES SOLUTIONS SL,1.0
3,6,3digital factory,1.0
4,10,A Thousand Colibris,1.0


In [147]:
dnb_df.head()

Unnamed: 0,Company Name,ANZSIC 2006 Code,ANZSIC 2006 Description,Address Line 1,Address Line 2,Address Line 3,Address Type,Advisory Fees (As Reported),Advisory Fees (EUR),Advisory Fees (GBP),...,US Metropolitan Area,US SIC 1987 Code,US SIC 1987 Description,Ultimate Parent Company,Ultimate Parent Country/Region,Ultimate Parent D-U-N-S® Number,Women Owned,Year Founded,techMark Membership,Direct Marketing Status
0,ADSMURAI SL.,5420,Software Publishing,"Paseo Gracia, 60 - P. 4",,,,,,,...,,7372,Prepackaged Software,,,,,2014.0,,Has Not Opted Out of Direct Marketing
1,011H SUSTAINABLE CONSTRUCTION S.L.,3019,Other Residential Building Construction,"Calle Avila, 126 - 138 P. 4 Pta. 2",,,,,,,...,,1522,"General Contractors-Residential Buildings, Oth...",,,,,,,Has Not Opted Out of Direct Marketing
2,AGROPTIMA SL.,5420,Software Publishing,"Pasaje Mules, P.Pb Pta.1",,,,,,,...,,7372,Prepackaged Software,,,,,,,Has Not Opted Out of Direct Marketing
3,AERVIO SOLUTIONS SOCIEDAD LIMITADA.,7220,Travel Agency and Tour Arrangement Services,"Avenida Diagonal, 626 - P. 3 Pta. 2 B",,,,,,,...,,4724,Travel Agencies,AERVIO SOLUTIONS SOCIEDAD LIMITADA.,Spain,467171008.0,,2016.0,,Has Not Opted Out of Direct Marketing
4,ABLE HUMAN MOTION S.L.,2412,Medical and Surgical Equipment Manufacturing,"Calle Sant Eudald, 57 - P. 1 Pta. 1",,,,,,,...,,3843,Dental Equipment and Supplies,,,,,,,Has Not Opted Out of Direct Marketing


# Extra code

In [40]:
start_pos = 1200
for name in startup_names[start_pos:start_pos+200]:
    # Add company name to input box
    selector = '#companyNameFilter > div:nth-child(3) > div:nth-child(1) > input'
    company_name_input = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    # company_name_input = WebDriverWait(driver, 10).until(
    #     EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))

    company_name_input.send_keys(name)

    # Click on Add button
    selector = '#companyNameFilter > div:nth-child(3) > div.ant-col.spacer-x-small-left > button'
    add_btn = driver.find_element(by=By.CSS_SELECTOR, value=selector)
    # add_btn = WebDriverWait(driver, 10).until(
    #     EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
    add_btn.click()

In [141]:
        
#         if n_results == 1: # there is a single match
#             results[name] = n_results
#             print("Found one result.")
#             break
        
        
#         if n_results - n_results_prev > 1: # there are multiple matches for this company
#             print(f"Multiple matches for company {name}.")
#             # Remove company from list
#             selector = '#selected-filter-companyNameFilter > ul > li:last-child > svg'
#             cross_btn = driver.find_element(by=By.CSS_SELECTOR, value= selector)
#             cross_btn.click()
#             # Save company
#             multiple_matches.append(name)
            
#         # Update results
#         n_results_prev = n_results

        
    # Click Search
    
    # print('Done.')
#     # Sleep
#     time.sleep(10)
    
#     # Get logout popup
#     selector = '//*[@id="main"]/section/div[2]/div/header/ul[2]/li[3]'
#     account_btn = WebDriverWait(driver, 10).until(
#         EC.element_to_be_clickable((By.XPATH, selector)))
#     account_btn.click()
    
#     # Click on logout button
#     selector = 'ul[data-menu-list="true"].ant-menu.ant-menu-sub.ant-menu-vertical > li:nth-child(4)'
#     logout_btn = WebDriverWait(driver, 10).until(
#         EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
#     logout_btn.click()
#     # driver.quit()
    
# except Exception as e:
#     print(e)
    
# finally:
#     # Sleep
#     time.sleep(10)