In [None]:
# Import necessary libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup as bs
import pandas as pd
import time

# LinkedIn Credentials
username = ""
password = ""

# Initialize Chrome options
chrome_options = Options()

# Set LinkedIn search page URL for scraping
base_url = "https://www.linkedin.com/search/results/events/?origin=SWITCH_SEARCH_VERTICAL&sid=ie7"

# Initialize WebDriver for Chrome
browser = webdriver.Chrome(options=chrome_options)

# Open LinkedIn login page
browser.get('https://www.linkedin.com/login')

# Wait for page to load
time.sleep(2)

# Enter login credentials and submit
elementID = browser.find_element(By.ID, "username")
elementID.send_keys(username)
elementID = browser.find_element(By.ID, "password")
elementID.send_keys(password)
elementID.submit()

# Wait for login to complete
time.sleep(5)

# Define a data structure to hold all the event information
events_data = []

# Main loop to process each page
for page_number in range(1, 11):  # A loop was created for a total of 10 pages
    search_url = f"{base_url}&page={page_number}"  # Add the page number to the URL
    browser.get(search_url)  # Visit the page

    # Wait for page to load
    time.sleep(5)

    # Scroll down to load more events
    SCROLL_PAUSE_TIME = 2
    last_height = browser.execute_script("return document.body.scrollHeight")

    while True:
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(SCROLL_PAUSE_TIME)
        new_height = browser.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Parse the page source with BeautifulSoup
    events_page = browser.page_source
    linkedin_soup = bs(events_page, "html.parser")

    # Extract event containers from the HTML
    event_containers = linkedin_soup.find_all("div", {"class": "entity-result__divider"})

    # Loop through each event container on the page
    for container in event_containers:
        event_name_element = container.find("a", {"class": "app-aware-link"})
        event_name = event_name_element.text.strip() if event_name_element else ""
        event_link = event_name_element['href'] if event_name_element else ""
        date_element = container.find("div", {"class": "entity-result__primary-subtitle"})
        date = date_element.text.strip() if date_element else ""
        platform_element = container.find("div", {"class": "entity-result__secondary-subtitle"})
        platform = platform_element.text.strip() if platform_element else ""
        about_element = container.find("p", {"class": "entity-result__summary"})
        about = about_element.text.strip() if about_element else ""
        attendees_element = container.find("span", {"class": "reusable-search-simple-insight__text"})
        attendees = attendees_element.text.strip() if attendees_element else ""

        # Append event data to the list if any information is available
        if any([event_name, event_link, date, platform, about, attendees]):
            events_data.append({
                "Event Name": event_name,
                "Event Link": event_link,
                "Date": date,
                "Platform": platform,
                "About": about,
                "Attendees": attendees
            })

# Convert the data into a DataFrame
events_df = pd.DataFrame(events_data)

# Export the DataFrame to an Excel file
excel_file = "linkedin_events.xlsx"
events_df.to_excel(excel_file, index=False)
print(f"Events data exported to {excel_file}")

# Quit the browser
browser.quit()
