In [None]:
#Selenium is a web testing framework that automates browser interactions.
#It provides a suite of tools to simulate user actions, interact with web elements,
#and perform automated testing of web applications. Selenium supports various programming languages 
#and is widely used for web scraping, testing, and automating repetitive tasks.

In [10]:
# import libraries
from selenium import webdriver #used for automating web browser interactions.
from selenium.webdriver.common.by import By  #The By class is used to locate elements on a web page.
from selenium.webdriver.support.ui import WebDriverWait#WebDriverWait is used for waiting until a certain condition is met before the execution.
from selenium.webdriver.support import expected_conditions as EC#his module provides a set of predefined conditions to wait for.
from selenium.common.exceptions import TimeoutException# This exception is raised when an operation times out.
from tqdm import tqdm#which is used to create progress bars in the console.
#It's not directly related to Selenium but might be used to show the progress of some task.
import csv

In [9]:

if __name__ == '__main__':
    # Setting up the Chrome WebDriver
    driver = webdriver.Chrome()

    # Creating a CSV file and loading the data
    with open('twitter_data.csv', 'w', newline='', encoding='utf-8') as csv_file:
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(['Profile', 'Bio', 'Following Count', 'Followers Count', 'Location', 'Website'])

        # Here is the profiles to extract data from
        profiles = ['GTNUK1', 'whatsapp', 'aacb_CBPTrade', 'aacbdotcom', 'AAWindowPRODUCT',
                    'aandb_kia', 'ABHomeInc', 'Abrepro', 'ACChristofiLtd',
                    'aeclothing1', 'wix', 'AGInsuranceLLC','chiproytx','Ron4California','myogiadityanath']

        # Iterating through the profiles
        for profile in tqdm(profiles):
            driver.get(f'https://twitter.com/{profile}')

            # WebDriverWait waits for elements
            try:
                WebDriverWait(driver, 20).until(
                    EC.presence_of_element_located((By.XPATH, '//div[@data-testid="UserDescription"]'))
                )
            except TimeoutException:
                print(f"Timed out waiting for {profile} page to load.")
                continue

            # Scraping user bio with WebDriverWait
            try:
                bio = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//div[@data-testid="UserDescription"]'))
                ).text
            except TimeoutException:
                bio = 'NA'
            
            # Scraping the following count
            try:
                following_count = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '(//a[@role="link"]/span/span)[1]'))
                ).text
            except TimeoutException:
                following_count = '-'

            # Scraping the followers count
            try:
                followers_count = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '(//a[@role="link"]/span/span)[3]'))
                ).text
            except TimeoutException:
                followers_count = '-'

            # Scraping the user location
            try:
                location = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//span[@data-testid="UserLocation"]/span/span'))
                ).text
            except TimeoutException:
                location = 'NA'

            # Scraping the user website if present
            try:
                website = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, '//a[@data-testid="UserUrl"]/span'))
                ).text
            except TimeoutException:
                website = 'NA'

            # Writing data to CSV file
            csv_writer.writerow([profile, bio, following_count, followers_count, location, website])

    # Closing the browser
    driver.quit()



100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [01:26<00:00,  5.77s/it]
