# Eventbite Webcrawler

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd


In [2]:
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

# url = 'https://www.eventbrite.com/d/australia/business--events/?page=1'

def eventbite_webpage_crawler(url,catagory):
    events = []
    driver = webdriver.Chrome('chromedriver', options=options)
    driver.get(url)
    wait = WebDriverWait(driver, 1)
    wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'div.search-event-card-wrapper')))
    event_items = driver.find_elements(By.CSS_SELECTOR, 'div.eds-event-card--consumer')


    print(len(event_items))

    for event_item in event_items:
        try:
            event_name = event_item.find_element(By.CSS_SELECTOR, 'div.eds-event-card__formatted-name--is-clamped').text.strip()
            print(event_name)
        except Exception:
            event_name = ''
        try:
            event_time = event_item.find_element(By.CSS_SELECTOR, 'div.eds-event-card-content__sub-title').text.strip()
        except Exception:
            event_time = ''
        try:
            event_location = event_item.find_element(By.CSS_SELECTOR, 'div.card-text--truncated__one').text.strip()
        except Exception:
            event_location = ''
        try:
            event_link = event_item.find_element(By.CSS_SELECTOR, 'a.eds-event-card-content__action-link')
            event_link_text = event_link.get_attribute('href').strip()
        except Exception:
            event_link_text = ''
        try:
            if event_link and event_name:
                driver2 = webdriver.Chrome('chromedriver', options=options)
                driver2.get(event_link_text)
                wait = WebDriverWait(driver2, 10)
                wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'div.event-details__main')))
                try:
                    event_description = driver2.find_element(By.CSS_SELECTOR, 'p.summary').text.strip()
                    #print(event_description)
                except Exception:
                    event_description = ''
            else:
                event_description = ''
        except Exception:
            event_description = ''


        event = {
            'event_name': event_name,
            'event_time': event_time,
            'event_location': event_location,
            'event_category': catagory,
            'event_link': event_link_text,
            'event_description': event_description
        }
        events.append(event)
    driver.quit()
    return events

In [3]:
# events = []

# catagories = ['business--events',
#               'travel-and-outdoor--events',
#               'music--events',
#               'arts--events',
#               'auto-boat-and-air--events',
#               'spirituality--events',
#               'science-and-tech--events']

# pages = 15

# for catagory in catagories:
#     print(catagory)
#     for page_number in range(1, pages):     
#         url = f'https://www.eventbrite.com/d/australia/{catagory}/?page={page_number}'
#         print(url)
#         try:
#             new_events = eventbite_webpage_crawler(url,catagory)
#         except Exception:
#             pass
#         for e in new_events:
#             events.append(e)


In [4]:
import concurrent.futures

events = []

catagories = ['business--events',
              'travel-and-outdoor--events',
              'music--events',
              'arts--events',
              'auto-boat-and-air--events',
              'spirituality--events',
              'science-and-tech--events']

pages = 15

def process_page(catagory, page_number):
    url = f'https://www.eventbrite.com/d/australia/{catagory}/?page={page_number}'
    print(url)
    new_events = []
    try:
        new_events = eventbite_webpage_crawler(url, catagory)
    except Exception:
        pass
    return new_events

with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for catagory in catagories:
        print(catagory)
        for page_number in range(1, pages):
            futures.append(executor.submit(process_page, catagory, page_number))

    for future in concurrent.futures.as_completed(futures):
        events.extend(future.result())

business--events
https://www.eventbrite.com/d/australia/business--events/?page=1
https://www.eventbrite.com/d/australia/business--events/?page=2
https://www.eventbrite.com/d/australia/business--events/?page=3
https://www.eventbrite.com/d/australia/business--events/?page=4
https://www.eventbrite.com/d/australia/business--events/?page=5
https://www.eventbrite.com/d/australia/business--events/?page=6
https://www.eventbrite.com/d/australia/business--events/?page=7
https://www.eventbrite.com/d/australia/business--events/?page=8
https://www.eventbrite.com/d/australia/business--events/?page=9
https://www.eventbrite.com/d/australia/business--events/?page=10
https://www.eventbrite.com/d/australia/business--events/?page=11
https://www.eventbrite.com/d/australia/business--events/?page=12
travel-and-outdoor--events
music--events
arts--events
auto-boat-and-air--events
spirituality--events
science-and-tech--events
40

Careers with Purpose Conversations event | Sydney
40

The Future of Built-To-Rent


# Meetup Webscraper

In [5]:
# The meetup ended up not working. I think they limited or banned my IP :(

# driver = webdriver.Chrome('chromedriver', options=options)

# def scrape_event_details(url):
#     driver.get(url)
#     wait = WebDriverWait(driver, 20)
#     wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'body')))

#     try:
#         event_description = driver.find_element(By.CSS_SELECTOR, 'div.p-description').text.strip()
#     except Exception:
#         event_description = ''

#     return {
#         'event_description': event_description
#     }

# url = 'https://www.meetup.com/find/events/?allMeetups=true&radius=Infinity&userFreeform=Australia&gcResults=Australia%7CAU'

# while url:
#     driver.get(url)
#     wait = WebDriverWait(driver, 20)
    
#     try:
#         wait.until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'div.event-listing-container')))
#         event_items = driver.find_elements(By.CSS_SELECTOR, 'li.list-item')

#         for event_item in event_items:
#             event_name = event_item.find_element(By.CSS_SELECTOR, 'span.event-group-name').text.strip()
#             event_time = event_item.find_element(By.CSS_SELECTOR, 'time').text.strip()
#             event_location = event_item.find_element(By.CSS_SELECTOR, 'address').text.strip()
#             event_link = event_item.find_element(By.CSS_SELECTOR, 'a.eventCard--link')
#             event_link_text = event_link.get_attribute('href').strip()

#             event_details = scrape_event_details(event_link_text)

#             event = {
#                 'event_name': event_name,
#                 'event_time': event_time,
#                 'event_location': event_location,
#                 'event_category': '',
#                 'event_link': event_link_text,
#                 'event_description': event_details['event_description']
#             }
#             events.append(event)

#         next_page = driver.find_elements(By.CSS_SELECTOR, 'a.page-link.next')
#         url = next_page[0].get_attribute('href') if next_page else None

#     except TimeoutException:
#         print("TimeoutException occurred. Skipping this page...")
#         break

# print(events)
# driver.quit()

In [6]:
for e in events:
    print(e)

{'event_name': '', 'event_time': '', 'event_location': '', 'event_category': 'business--events', 'event_link': 'https://www.eventbrite.com.au/e/digital-marketing-ai-and-website-foundations-for-small-business-tickets-594273507057?aff=ebdssbdestsearch', 'event_description': ''}
{'event_name': 'Digital Marketing, AI and Website Foundations for Small Business', 'event_time': 'Tue, May 16, 9:30 AM', 'event_location': 'Wanneroo Civic Centre, Banksia Room 2 • Wanneroo, WA', 'event_category': 'business--events', 'event_link': 'https://www.eventbrite.com.au/e/digital-marketing-ai-and-website-foundations-for-small-business-tickets-594273507057?aff=ebdssbdestsearch', 'event_description': 'Learn how to elevate your online presence and reach your ideal customers through effective digital marketing and AI strategies.'}
{'event_name': '', 'event_time': '', 'event_location': '', 'event_category': 'business--events', 'event_link': 'https://www.eventbrite.com.au/e/thrive-together-tickets-559927677757?af

In [7]:
df = pd.DataFrame(events)

df

Unnamed: 0,event_name,event_time,event_location,event_category,event_link,event_description
0,,,,business--events,https://www.eventbrite.com.au/e/digital-market...,
1,"Digital Marketing, AI and Website Foundations ...","Tue, May 16, 9:30 AM","Wanneroo Civic Centre, Banksia Room 2 • Wanner...",business--events,https://www.eventbrite.com.au/e/digital-market...,Learn how to elevate your online presence and ...
2,,,,business--events,https://www.eventbrite.com.au/e/thrive-togethe...,
3,Thrive Together,"Fri, May 19, 11:00 AM + 2 more events",Work Tank Coworking & Serviced Offices • Niddr...,business--events,https://www.eventbrite.com.au/e/thrive-togethe...,Thrive Together - A monthly networking event e...
4,,,,business--events,https://www.eventbrite.com.au/e/unsw-undergrad...,
...,...,...,...,...,...,...
3553,SciencePlay PD/PL for Primary and early second...,"Thu, Jun 8, 10:00 AM","SciencePlay Kids • Altona North, VIC",science-and-tech--events,https://www.eventbrite.com.au/e/scienceplay-pd...,We'll share some low-tech approaches to create...
3554,,,,science-and-tech--events,https://www.eventbrite.com.au/e/code-club-scra...,
3555,Code Club: Scratch - Cabramatta,"Tue, May 2, 5:00 PM + 7 more events","Whitlam Library Cabramatta • Cabramatta, NSW",science-and-tech--events,https://www.eventbrite.com.au/e/code-club-scra...,Begin your Code Club Australia Scratch journey...
3556,,,,science-and-tech--events,https://www.eventbrite.com.au/e/lego-bricq-ess...,


In [8]:
from datetime import datetime
current_date_time = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
df.to_pickle("data/df_request_"+str(current_date_time)+".pkl")

In [9]:
df

Unnamed: 0,event_name,event_time,event_location,event_category,event_link,event_description
0,,,,business--events,https://www.eventbrite.com.au/e/digital-market...,
1,"Digital Marketing, AI and Website Foundations ...","Tue, May 16, 9:30 AM","Wanneroo Civic Centre, Banksia Room 2 • Wanner...",business--events,https://www.eventbrite.com.au/e/digital-market...,Learn how to elevate your online presence and ...
2,,,,business--events,https://www.eventbrite.com.au/e/thrive-togethe...,
3,Thrive Together,"Fri, May 19, 11:00 AM + 2 more events",Work Tank Coworking & Serviced Offices • Niddr...,business--events,https://www.eventbrite.com.au/e/thrive-togethe...,Thrive Together - A monthly networking event e...
4,,,,business--events,https://www.eventbrite.com.au/e/unsw-undergrad...,
...,...,...,...,...,...,...
3553,SciencePlay PD/PL for Primary and early second...,"Thu, Jun 8, 10:00 AM","SciencePlay Kids • Altona North, VIC",science-and-tech--events,https://www.eventbrite.com.au/e/scienceplay-pd...,We'll share some low-tech approaches to create...
3554,,,,science-and-tech--events,https://www.eventbrite.com.au/e/code-club-scra...,
3555,Code Club: Scratch - Cabramatta,"Tue, May 2, 5:00 PM + 7 more events","Whitlam Library Cabramatta • Cabramatta, NSW",science-and-tech--events,https://www.eventbrite.com.au/e/code-club-scra...,Begin your Code Club Australia Scratch journey...
3556,,,,science-and-tech--events,https://www.eventbrite.com.au/e/lego-bricq-ess...,
