# Location Searching Using Google and Selenium

This notebook uses Google and Selenium to find geographic coordinates of institutions listed by participants in the May MolSSI webinar series.

In [4]:
import os
import time

import pandas as pd
from selenium import webdriver

In [5]:
# Get institutions as pandas series
institutions = pd.read_csv('institutions.csv').iloc[:,0]

In [8]:
# Create options for driver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")

# Create driver
driver = webdriver.Chrome(executable_path=os.getenv('EXECUTABLE_PATH'), options=chrome_options)

In [9]:
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")

driver = webdriver.Chrome(executable_path=os.getenv('EXECUTABLE_PATH'), options=chrome_options)

coordinates = {'institution': [], 'latitude': [], 'longitude':[]}
for institution in institutions.unique():    
    driver.get('https://www.google.com')
    search_box = driver.find_element_by_name('q')
    search_box.send_keys(F'{institution} coordinates')
    search_box.submit()
    elements = driver.find_elements_by_class_name('Z0LcW')
    
    if elements:
        institute_coordinates = elements[0].get_attribute('innerHTML').split(',')
        
        converted_coords = []
        for coordinate in institute_coordinates:
            if 'W' in coordinate or 'S' in coordinate:
                coordinate = -float(coordinate[:-3])
            else:
                coordinate = float(coordinate[:-3])
            converted_coords.append(coordinate)

        coordinates['institution'].append(institution)
        coordinates['latitude'].append(converted_coords[0])
        coordinates['longitude'].append(converted_coords[1])
        #print(F'{institution} {converted_coords}')
    else:
        print(F"Coordinates not found for {institution} - trying alternate method")
        
        driver.get('https://www.google.com')
        search_box = driver.find_element_by_name('q')
        search_box.send_keys(F'{institution}')
        search_box.submit()
        
        elements = driver.find_elements_by_class_name('LrzXr')
        
        if elements:
            institute_address = elements[0].get_attribute('innerHTML')
            
            driver.get('https://www.mapdevelopers.com/geocode_tool.php')
            search_box = driver.find_element_by_id('address')
            search_box.send_keys(institute_address)
            find_button = driver.find_elements_by_class_name('btn-default')[1]
            find_button.click()
            
            # Wait for page to load
            time.sleep(3)
            
            # Get coordinates for address.
            coordinates['institution'].append(institution)
            coordinates['latitude'].append(float(driver.find_element_by_id("display_lat").get_attribute('innerHTML')))
            coordinates['longitude'].append(float(driver.find_element_by_id("display_lng").get_attribute('innerHTML')))
                    
driver.close()

Coordinates not found for Northwestern University - trying alternate method
Coordinates not found for Universidad de Guanajuato - trying alternate method
Coordinates not found for Fox Chase Cancer Center - trying alternate method
Coordinates not found for Research Institute of Influenza - trying alternate method
Coordinates not found for izmir biomedicine and genome center - trying alternate method
Coordinates not found for Acibadem University - trying alternate method
Coordinates not found for Bombay College of Pharmacy - trying alternate method
Coordinates not found for Izmir Biomedicine and Genome Center - trying alternate method
Coordinates not found for Akamara Biomedicine - trying alternate method


In [10]:
import json

with open('results.json', 'w+') as f:
    json.dump(coordinates,f)

In [11]:
# Plotly imports
import plotly.graph_objs as go
import plotly.io as pio

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

In [16]:
data = [ go.Scattergeo(
    mode = 'markers',
    lat = coordinates['latitude'],
    lon = coordinates['longitude'],
    text = coordinates['institution'],
    marker = {
            'color': 'rgb(237, 28, 36)',
            'size': 10, 
            'opacity': 0.8,
            'symbol': 'circle',
            'line' : {
                'width':1,
                'color':'rgb(0, 0, 0)'
            },
    },
)]

layout = dict(
        geo = dict(
            scope='world',
            showland = True,
            showframe = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(57, 53, 54)",
            countrycolor = "rgb(57, 53, 54)",
            countrywidth = 0.5,
            subunitwidth = 0.5        
        ),
    )

fig = go.Figure(data=data, layout=layout)
iplot(fig)

pio.write_image(fig, 'may_webinar_locations_new.svg')