### Importar librerías necesarias

In [1]:
import numpy as np
import pandas as pd
import regex as re
import selenium.webdriver as webdriver
import time

from geopy.geocoders import Nominatim
from geopy import distance
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

### Extracción de datos: Función de web scrapping 

In [3]:
def get_results(search):

    url = "https://www.envigo.uy"
    browser = webdriver.Chrome("./chromedriver")
    browser.get(url)
    timeout = 15

    ### Clickear en cruz para quitar pop-up inicial:
    time.sleep(0.5)
    browser.find_element_by_xpath("//div[@id='modal-content']/div/button").click()

    ### Ingresar dirección de retiro:
    WebDriverWait(browser, timeout).until(
        EC.presence_of_element_located((By.ID, 'estimate_pickup_address'))
    )
    retiro = browser.find_element_by_id("estimate_pickup_address")
    retiro.send_keys(search['retiro'])
    
    WebDriverWait(browser, timeout).until(
        EC.presence_of_element_located((By.XPATH, "//div[@class='pac-container pac-logo hdpi']/div[1]"))
    )
    WebDriverWait(browser, timeout).until(
        EC.element_to_be_clickable((By.XPATH, "//div[@class='pac-container pac-logo hdpi']/div[1]"))
    )
    browser.find_element_by_xpath("//div[@class='pac-container pac-logo hdpi']/div[1]").click()

    ### Ingresar dirección de entrega:
    WebDriverWait(browser, timeout).until(
        EC.presence_of_element_located((By.ID, 'estimate_delivery_address'))
    )
    entrega = browser.find_element_by_id("estimate_delivery_address")
    entrega.send_keys(search['entrega'])
    WebDriverWait(browser, timeout).until(
        EC.presence_of_element_located((By.XPATH, "//div[@class='pac-container pac-logo hdpi']/div[1]"))
    )
    WebDriverWait(browser, timeout).until(
        EC.element_to_be_clickable((By.XPATH, "//div[@class='pac-container pac-logo hdpi']/div[1]"))
    )
    browser.find_element_by_xpath("//div[@class='pac-container pac-logo hdpi']/div[1]").click()

    ### Latitudes y longitudes

    # lat_retiro = browser.find_element_by_id("estimate_pickup_lat").get_attribute('value')
    # lng_retiro = browser.find_element_by_id("estimate_pickup_lng").get_attribute('value')
    # lat_entrega = browser.find_element_by_id("estimate_pickup_lat").get_attribute('value')
    # lng_entrega = browser.find_element_by_id("estimate_pickup_lng").get_attribute('value')

    geolocator = Nominatim(user_agent="sanico")
    coordenadas_retiro = geolocator.geocode(search["retiro"])
    coordenadas_entrega = geolocator.geocode(search["entrega"])
    lat_retiro = coordenadas_retiro.latitude
    lng_retiro = coordenadas_retiro.longitude
    lat_entrega = coordenadas_entrega.latitude
    lng_entrega = coordenadas_entrega.longitude

    coords_1 = (lat_retiro, lng_retiro)
    coords_2 = (lat_entrega, lng_entrega)
    km = distance.geodesic(coords_1, coords_2).km

    ### Ingresar tamaño de paquete:
    if search["tamano"]=="small":
        WebDriverWait(browser, timeout).until(
            EC.presence_of_element_located((By.ID, 'estimate_package_size_small'))
        )
        WebDriverWait(browser, timeout).until(
        EC.element_to_be_clickable((By.ID, 'estimate_package_size_small'))
        )
        browser.find_element_by_xpath("//div[@id='package-size']/div/label[1]").click()
    elif search["tamano"]=="medium":
        WebDriverWait(browser, timeout).until(
            EC.presence_of_element_located((By.ID, 'estimate_package_size_medium'))
        )
        WebDriverWait(browser, timeout).until(
            EC.element_to_be_clickable((By.ID, 'estimate_package_size_medium'))
        )
        browser.find_element_by_xpath("//div[@id='package-size']/div/label[2]").click()
    elif search["tamano"]=="large":
        WebDriverWait(browser, timeout).until(
            EC.presence_of_element_located((By.ID, 'estimate_package_size_large'))
        )
        WebDriverWait(browser, timeout).until(
            EC.element_to_be_clickable((By.ID, 'estimate_package_size_large'))
        )
        browser.find_element_by_xpath("//div[@id='package-size']/div/label[3]").click()
    else:
        raise Exception('Valor no válido')

    ### Ingresar cantidad de paquetes:
    if int(search["cantidad_de_paquetes"])>=0 and int(search["cantidad_de_paquetes"])<=10:
        WebDriverWait(browser, timeout).until(
            EC.presence_of_element_located((By.ID, 'estimate_number_of_packages'))
        )
        cantidad_de_paquetes = browser.find_element_by_id("estimate_number_of_packages")
        cantidad_de_paquetes.send_keys(str(search["cantidad_de_paquetes"]))
    else:
        raise Exception('Valor no válido')

    ### Ingresar opción de entrega:
    if search["opcion_de_entrega"]=="express":
        browser.find_element_by_xpath("//div[@id='estimate_form_container']/div[2]/div[14]/div/label[1]").click()
    elif search["opcion_de_entrega"]=="coordinado":
        browser.find_element_by_xpath("//div[@id='estimate_form_container']/div[2]/div[14]/div/label[2]").click()
    else:
        raise Exception('Valor no válido')

    ### Ingresar cantidad de asistentes:
    if int(search["cantidad_de_asistentes"])>=0 and int(search["cantidad_de_asistentes"])<=5:
        cantidad_de_asistentes = browser.find_element_by_id("estimate_laborer_number")
        cantidad_de_asistentes.send_keys(search["cantidad_de_asistentes"])
    else:
        raise Exception('Valor no válido')

    time.sleep(1)
    precio_txt = browser.find_element_by_id("estimate_value_label").text
    precio = re.search("(?<=\$)\s(.*?)\s(?=IVA)",precio_txt)[1]
    resultado = [lat_retiro, lng_retiro, lat_entrega, lng_entrega, km, precio]
    print(resultado)
    browser.quit()
    return resultado


### Lectura de datos de entrada

In [15]:
path = "./datos_entrada.csv"
datos_entrada = pd.read_csv(path)
datos_entrada.head()

Unnamed: 0,retiro,entrega,tamano,opcion_de_entrega,cantidad_de_paquetes,cantidad_de_asistentes
0,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0,0
1,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0,1
2,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0,2
3,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0,3
4,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0,4


### Extracción de datos

In [None]:
inputs = []
resultados = []

for row in datos_entrada.iterrows():
    search = {}
    
    search["retiro"] = row[1]["retiro"]
    search["entrega"] = row[1]["entrega"]
    search["tamano"] = row[1]["tamano"]
    search["cantidad_de_paquetes"] = row[1]["cantidad_de_paquetes"]
    search["opcion_de_entrega"] = row[1]["opcion_de_entrega"]
    search["cantidad_de_asistentes"] = row[1]["cantidad_de_asistentes"]
    input_row = list(search.values())
    inputs.append(input_row)
    print(input_row)
    
    try:
        resultados.append(get_results(search))
    except:
        resultados.append([np.nan]*6)

resultados_df = pd.DataFrame(resultados)
resultados_df.head()

### Renombrar columnas

In [7]:
resultados_df.columns = ["lat_retiro",
                         "lng_retiro",
                         "lat_entrega",
                         "lng_entrega",
                         "km",
                         "precio"]
resultados_df.head()

Unnamed: 0,lat_retiro,lng_retiro,lat_entrega,lng_entrega,km,precio
0,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,250
1,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,860
2,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,1470
3,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,2080
4,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,250


### Combinar datos de entrada con resultados

In [10]:
results = pd.concat([datos_entrada, resultados_df], axis = 1)
results

Unnamed: 0,retiro,entrega,tamano,opcion_de_entrega,cantidad_de_paquetes,cantidad_de_asistentes,lat_retiro,lng_retiro,lat_entrega,lng_entrega,km,precio
0,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0.0,0.0,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,250
1,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0.0,1.0,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,860
2,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0.0,2.0,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,1470
3,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0.0,3.0,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,2080
4,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",small,express,0.0,4.0,-34.887204,-56.057894,-34.887471,-56.050905,0.639574,250
...,...,...,...,...,...,...,...,...,...,...,...,...
301,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",large,coordinado,0.0,1.0,,,,,,
302,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",large,coordinado,0.0,2.0,,,,,,
303,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",large,coordinado,0.0,3.0,,,,,,
304,"Arocena, Montevideo, Uruguay","Mantua, Montevideo, Uruguay",large,coordinado,0.0,4.0,,,,,,


In [11]:
results.to_csv("results.csv")