## Extração de dados 

In [1]:
# Selenium imports
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Beautiful Soup imports
import bs4
from bs4 import BeautifulSoup

import utils
import random
import requests
import time
import re


In [2]:
# Creating a selenium driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.set_page_load_timeout(60)

In [3]:
# Accessing blacktag
url = 'https://blacktag.com.br/eventos?cidade=S%C3%A3o+Paulo'

# Using random user agents to avoid host suspition
u_agent = random.choice(utils.user_agents)

arguments = [
    "--disable-cookies",
    "--disable-local-storage",
    "--disable-session-storage",
    "--block-third-party-cookies"
    f"user-agent={u_agent}"
]

chrome_options = Options()
for arg in arguments:
    chrome_options.add_argument(arg)

driver = webdriver.Chrome(options=chrome_options)

driver.get(url=url)
driver.implicitly_wait(10)

In [15]:
timeout = 10
wait = WebDriverWait(driver, timeout)

button = driver.find_element(By.ID, 'load-more-events')
while button.is_displayed():
    button.click()
driver.quit()

In [5]:
soup = BeautifulSoup(driver.page_source, 'html.parser')

tags = soup.find_all("a", class_ = "w-100")

events = ["https://blacktag.com.br" + tag.get("href") + "/ingressos" for tag in tags]
events

['https://blacktag.com.br/eventos/20385/quintafunk-a-noite-do-pecado/ingressos',
 'https://blacktag.com.br/eventos/20578/lancamento-med-tvii/ingressos',
 'https://blacktag.com.br/eventos/20635/sextou-open-bar-premium-10-05/ingressos',
 'https://blacktag.com.br/eventos/20604/audicao-album-rudies-flacko/ingressos',
 'https://blacktag.com.br/eventos/20483/rave-in-sp-noites-insanas-bday-guerreiro/ingressos',
 'https://blacktag.com.br/eventos/20787/pekando/ingressos',
 'https://blacktag.com.br/eventos/20475/cervejada-de-10-anos/ingressos',
 'https://blacktag.com.br/eventos/19225/amanha-te-conto/ingressos',
 'https://blacktag.com.br/eventos/20431/baile-de-gala-uninove-saude-4062/ingressos',
 'https://blacktag.com.br/eventos/20866/open-sunset-11-05-open-bar/ingressos',
 'https://blacktag.com.br/eventos/19531/pandora-studios-15-anos/ingressos',
 'https://blacktag.com.br/eventos/20634/open-double-open-bar-premium-c-dj-rody-11-05/ingressos',
 'https://blacktag.com.br/eventos/20432/baile-de-gala-

In [16]:
event_info = { index: {"url": url} for index, url in enumerate(events[:])}
print(event_info)

{0: {'url': 'https://blacktag.com.br/eventos/20385/quintafunk-a-noite-do-pecado/ingressos'}, 1: {'url': 'https://blacktag.com.br/eventos/20578/lancamento-med-tvii/ingressos'}, 2: {'url': 'https://blacktag.com.br/eventos/20635/sextou-open-bar-premium-10-05/ingressos'}, 3: {'url': 'https://blacktag.com.br/eventos/20604/audicao-album-rudies-flacko/ingressos'}, 4: {'url': 'https://blacktag.com.br/eventos/20483/rave-in-sp-noites-insanas-bday-guerreiro/ingressos'}, 5: {'url': 'https://blacktag.com.br/eventos/20787/pekando/ingressos'}, 6: {'url': 'https://blacktag.com.br/eventos/20475/cervejada-de-10-anos/ingressos'}, 7: {'url': 'https://blacktag.com.br/eventos/19225/amanha-te-conto/ingressos'}, 8: {'url': 'https://blacktag.com.br/eventos/20431/baile-de-gala-uninove-saude-4062/ingressos'}, 9: {'url': 'https://blacktag.com.br/eventos/20866/open-sunset-11-05-open-bar/ingressos'}, 10: {'url': 'https://blacktag.com.br/eventos/19531/pandora-studios-15-anos/ingressos'}, 11: {'url': 'https://blackta

In [17]:
import time

for index, url in enumerate(events[:]):
    time.sleep(random.uniform(0.5, 1.5))    

    headers = {
        'User-Agent': random.choice(utils.user_agents),
    }

    params = {
        'localStorageEnabled': 'false',
        'sessionStorageEnabled': 'false',
    }
    
    response = requests.get(url, headers=headers, cookies={}, params=params)
    
    soup = BeautifulSoup(response.text, 'html.parser')
    
    a_elements = soup.find("a", class_ = "text-dark")
    event_name = a_elements.text.strip()
    
    p_elements = soup.find_all("p", class_ = "text-primary mb-sm-2 mb-1")
    event_date = p_elements[0].text.strip()
    event_time = p_elements[1].text.strip()

    cols = soup.find_all("div", class_ = "col py-2 px-4")
    
    tickets = dict()

    for ticket in cols:
        ticket_type = ticket.find("div", class_ = "ml-md-2").text.strip()
        lote = ticket.find("div", class_ = "col-md-3 col-12 allotment").text.strip()
        lote_n = re.findall(r'\d+', lote)
        if lote_n:
            lote_n = lote_n[0]
        else:
            lote_n = lote
        
        price = ticket.find("div", class_ ="col-md-3 col-12 price").text.strip()
        price_n = re.findall(r'\d+,\d+', price)
        if price_n:
            price_n = price_n[0]
        else:
            price_n = price
        tickets[ticket_type] = dict()
        tickets[ticket_type]["lot"] = lote_n
        tickets[ticket_type]["price"] = price_n

    event_info[index]["name"] = event_name     
    event_info[index]["date"] = event_date
    event_info[index]["time"] = event_time
    event_info[index]["tickets"] = tickets    

In [18]:
event_info

{0: {'url': 'https://blacktag.com.br/eventos/20385/quintafunk-a-noite-do-pecado/ingressos',
  'name': 'QuintaFunk: A Noite do Pecado',
  'date': 'Qui 09 de Mai',
  'time': '23:00',
  'tickets': {'ALUNO ESPM': {'lot': '4', 'price': '121,00'},
   'NÃO ALUNO': {'lot': '4', 'price': '136,00'}}},
 1: {'url': 'https://blacktag.com.br/eventos/20578/lancamento-med-tvii/ingressos',
  'name': 'MED T7 SUNSET',
  'date': 'Sex 10 de Mai',
  'time': '16:30',
  'tickets': {'Ingesso Festa de Lançamento - 1º Lote': {'lot': '1',
    'price': '100,00'}}},
 2: {'url': 'https://blacktag.com.br/eventos/20635/sextou-open-bar-premium-10-05/ingressos',
  'name': '#SEXTOU - OPEN BAR',
  'date': 'Sex 10 de Mai',
  'time': '20:00',
  'tickets': {'Antecipado': {'lot': '1', 'price': '59,90'}}},
 3: {'url': 'https://blacktag.com.br/eventos/20604/audicao-album-rudies-flacko/ingressos',
  'name': 'Audição álbum “Menino bom, Mlk Ruim”  Rudies Convida !  LIL GIELA  /  THE BOY  / BABY G2C / MOBTIES §',
  'date': 'Sex 10 