<a href="https://colab.research.google.com/github/cristianbossolasco/scraper-agencias-viajes/blob/main/scraper_busplus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%shell
# Ubuntu no longer distributes chromium-browser outside of snap
#
# Proposed solution: https://askubuntu.com/questions/1204571/how-to-install-chromium-without-snap

# Add debian buster
cat > /etc/apt/sources.list.d/debian.list <<'EOF'
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF

# Add keys
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A

apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg

# Prefer debian repo for chromium* packages only
# Note the double-blank lines between entries
cat > /etc/apt/preferences.d/chromium.pref << 'EOF'
Package: *
Pin: release a=eoan
Pin-Priority: 500


Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300


Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF

# Install chromium and chromium-driver
apt-get update
apt-get install chromium chromium-driver

# Install selenium
pip install selenium

In [72]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import pandas as pd
import datetime

In [12]:
url = 'https://raw.githubusercontent.com/cristianbossolasco/scraper-agencias-viajes/main/busplus_files/paradas.csv'
df_paradas = pd.read_csv(url, delimiter = ';', encoding = 'unicode_escape')

In [21]:
df_paradas

Unnamed: 0,id,direccion,latitud,longitud,nombre
0,6,,-35137317600,-6.045238e+10,Bragado - Caminera (Buenos Aires - ARG)
1,7,,-35838385600,-6.187472e+10,Pehuajo - ESSO (Buenos Aires - ARG)
2,8,,-35636855700,-6.135356e+10,Carlos Casares - Shell (Buenos Aires - ARG)
3,10,,-34905308000,-5.795551e+10,La Plata - Terminal (Buenos Aires - ARG)
4,11,,-35451965600,-6.088127e+10,9 de Julio - Terminal (Buenos Aires - ARG)
...,...,...,...,...,...
2921,3916,RN+12+y+ACC+RP+16,-26491790000,-5.467446e+10,Puerto Piray Terminal (Misiones - ARG)
2922,3917,,-42028365026,-7.154611e+10,Cerro Radal - Lago Puelo (Chubut - ARG)
2923,3918,Rogelio+Martinez+2066,-32447752000,-6.440507e+10,BerrotarÃ¡n - Terminal (CÃ³rdoba - ARG) (Cordo...
2924,3919,,-41159480346,-7.128390e+10,270 viviendas - Bariloche (Rio Negro - ARG)


0

In [87]:
def get_data(origen_parada, nombre_origen_parada, destino_parada, nombre_destino_parada, fecha_ida):
  url = f'https://checkout.busplus.com.ar/servicios?origen_parada={origen_parada}&destino_parada={destino_parada}&fecha_ida={fecha_ida}&fecha_vuelta=&pasajeros=1&empresa=&cupondescuento='

  options = Options()
  options.add_argument("--headless")
  options.add_argument("--no-sandbox")

  options.headless = True

  driver = webdriver.Chrome("/usr/bin/chromedriver", options=options)

  driver.get(url)

  # get_attribute('innerHTML') para obtener el html

  items = []
  filas  = driver.find_elements(By.CLASS_NAME,"fila_servicio.servicio_fila.idatr")
  for fila in filas:
    if not "hide " in fila.get_attribute(name="class"):
      id = fila.get_attribute('id')

      empresa_img = fila.find_element(By.TAG_NAME,"td").find_element(By.TAG_NAME, 'img').get_attribute('src')
      salida = driver.find_element(By.XPATH, f'//*[@id="{id}"]/td[2]').text.replace('\n', ' ')
      llegada = driver.find_element(By.XPATH, f'//*[@id="{id}"]/td[4]').text.replace('\n', ' ')
      escala = driver.find_element(By.XPATH, f'//*[@id="{id}"]/td[5]/a').text.replace('\n', ' ')
      duracion = driver.find_element(By.XPATH, f'//*[@id="{id}"]/td[6]').text.replace('\n', ' ')
      categoria = driver.find_element(By.XPATH, f'//*[@id="{id}"]/td[7]/ul/li[1]').text
      butacas_libres = driver.find_element(By.XPATH, f'//*[@id="{id}"]/td[8]/ul/li[1]').text
      precio = driver.find_element(By.XPATH, f'//*[@id="{id}"]/td[9]/ul/li[1]/span').text


      item_json = {
          "origen_parada": origen_parada,
          "nombre_origen_parada": nombre_origen_parada,
          "destino_parada": destino_parada,
          "nombre_destino_parada": nombre_destino_parada,
          "fecha_ida": fecha_ida,
          "empresa_img": empresa_img,
          "salida": salida,
          "llegada": llegada,
          "escala": escala,
          "duracion": duracion,
          "categoria": categoria,
          "butacas_libres": butacas_libres,
          "precio": precio
      }

      items.append(item_json)

  driver.quit()
  return items


In [92]:
filas = df_paradas.shape[0]
filas = 5
id_origen = 28
nombre_origen = '(BUE) Retiro - Terminal (Capital Federal - ARG) (Buenos Aires - ARG)'
resultado = []

for i in range(filas):
  id_destino = df_paradas[i:i+1]['id'].iloc[0]
  if id_destino == id_origen: continue
  nombre_destino = df_paradas[i:i+1]['nombre'].iloc[0]

  date = datetime.datetime(2023,1,25,0,0,0)
  for j in range(3): 
      date += datetime.timedelta(days=1)
      year = str(date.year)
      month = ('00' + str(date.month))[-2:]
      day = ('00' + str(date.day))[-2:]

      fecha = f'{year}/{month}/{day}'
      
      print(id_destino, nombre_destino, fecha)
      
      result = get_data(28, nombre_origen, id_destino, nombre_destino, fecha)

      resultado.extend(result)


6 Bragado - Caminera (Buenos Aires - ARG) 2023/01/26


  options.headless = True
  driver = webdriver.Chrome("/usr/bin/chromedriver", options=options)


6 Bragado - Caminera (Buenos Aires - ARG) 2023/01/27
6 Bragado - Caminera (Buenos Aires - ARG) 2023/01/28
7 Pehuajo - ESSO (Buenos Aires - ARG) 2023/01/26
7 Pehuajo - ESSO (Buenos Aires - ARG) 2023/01/27
7 Pehuajo - ESSO (Buenos Aires - ARG) 2023/01/28
8 Carlos Casares - Shell (Buenos Aires - ARG) 2023/01/26
8 Carlos Casares - Shell (Buenos Aires - ARG) 2023/01/27
8 Carlos Casares - Shell (Buenos Aires - ARG) 2023/01/28
10 La Plata - Terminal (Buenos Aires - ARG) 2023/01/26
10 La Plata - Terminal (Buenos Aires - ARG) 2023/01/27
10 La Plata - Terminal (Buenos Aires - ARG) 2023/01/28
11 9 de Julio - Terminal (Buenos Aires - ARG) 2023/01/26
11 9 de Julio - Terminal (Buenos Aires - ARG) 2023/01/27
11 9 de Julio - Terminal (Buenos Aires - ARG) 2023/01/28


In [93]:
resultado

[{'origen_parada': 28,
  'nombre_origen_parada': '(BUE) Retiro - Terminal (Capital Federal - ARG) (Buenos Aires - ARG)',
  'destino_parada': 7,
  'nombre_destino_parada': 'Pehuajo - ESSO (Buenos Aires - ARG)',
  'fecha_ida': '2023/01/26',
  'empresa_img': 'https://checkout.busplus.com.ar/images/v2/empresas/1.png',
  'salida': '15:00 Jue 26/01',
  'llegada': '20:30 Jue 26/01',
  'escala': 'Directo',
  'duracion': '05:30hs',
  'categoria': 'Cama Ejecutivo',
  'butacas_libres': '8',
  'precio': '$6045'},
 {'origen_parada': 28,
  'nombre_origen_parada': '(BUE) Retiro - Terminal (Capital Federal - ARG) (Buenos Aires - ARG)',
  'destino_parada': 7,
  'nombre_destino_parada': 'Pehuajo - ESSO (Buenos Aires - ARG)',
  'fecha_ida': '2023/01/26',
  'empresa_img': 'https://checkout.busplus.com.ar/images/v2/empresas/1.png',
  'salida': '20:15 Jue 26/01',
  'llegada': '01:15 Vier 27/01',
  'escala': 'Directo',
  'duracion': '05:00hs',
  'categoria': 'Cama Ejecutivo',
  'butacas_libres': '17',
  'pre

In [94]:
dfItem = pd.DataFrame.from_records(resultado)
dfItem


Unnamed: 0,origen_parada,nombre_origen_parada,destino_parada,nombre_destino_parada,fecha_ida,empresa_img,salida,llegada,escala,duracion,categoria,butacas_libres,precio
0,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,7,Pehuajo - ESSO (Buenos Aires - ARG),2023/01/26,https://checkout.busplus.com.ar/images/v2/empr...,15:00 Jue 26/01,20:30 Jue 26/01,Directo,05:30hs,Cama Ejecutivo,8.0,$6045
1,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,7,Pehuajo - ESSO (Buenos Aires - ARG),2023/01/26,https://checkout.busplus.com.ar/images/v2/empr...,20:15 Jue 26/01,01:15 Vier 27/01,Directo,05:00hs,Cama Ejecutivo,17.0,$6045
2,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,7,Pehuajo - ESSO (Buenos Aires - ARG),2023/01/27,https://checkout.busplus.com.ar/images/v2/empr...,15:00 Vier 27/01,20:30 Vier 27/01,Directo,05:30hs,Cama Ejecutivo,1.0,$6045
3,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,7,Pehuajo - ESSO (Buenos Aires - ARG),2023/01/27,https://checkout.busplus.com.ar/images/v2/empr...,20:15 Vier 27/01,01:15 Sáb 28/01,Directo,05:00hs,Cama Ejecutivo,6.0,$6045
4,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,7,Pehuajo - ESSO (Buenos Aires - ARG),2023/01/28,https://checkout.busplus.com.ar/images/v2/empr...,15:00 Sáb 28/01,20:30 Sáb 28/01,Directo,05:30hs,Cama Ejecutivo,6.0,$6045
5,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,7,Pehuajo - ESSO (Buenos Aires - ARG),2023/01/28,https://checkout.busplus.com.ar/images/v2/empr...,20:15 Sáb 28/01,01:15 Dom 29/01,Directo,05:00hs,Cama Ejecutivo,22.0,$6045
6,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,8,Carlos Casares - Shell (Buenos Aires - ARG),2023/01/26,https://checkout.busplus.com.ar/images/v2/empr...,15:00 Jue 26/01,19:55 Jue 26/01,Directo,04:55hs,Cama Ejecutivo,8.0,$5070
7,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,8,Carlos Casares - Shell (Buenos Aires - ARG),2023/01/26,https://checkout.busplus.com.ar/images/v2/empr...,20:15 Jue 26/01,00:40 Vier 27/01,Directo,04:25hs,Cama Ejecutivo,17.0,$5070
8,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,8,Carlos Casares - Shell (Buenos Aires - ARG),2023/01/27,https://checkout.busplus.com.ar/images/v2/empr...,15:00 Vier 27/01,19:55 Vier 27/01,Directo,04:55hs,Cama Ejecutivo,1.0,$5070
9,28,(BUE) Retiro - Terminal (Capital Federal - ARG...,8,Carlos Casares - Shell (Buenos Aires - ARG),2023/01/27,https://checkout.busplus.com.ar/images/v2/empr...,20:15 Vier 27/01,00:40 Sáb 28/01,Directo,04:25hs,Cama Ejecutivo,6.0,$5070
