In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException, WebDriverException
import os
import csv
import time
import threading
import re 

def setup_driver():
    try:
        chrome_options = Options()
        chrome_options.add_argument("user-data-dir=selenium") 
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=chrome_options)
        return driver
    except WebDriverException as e:
        print(f"Error setting up WebDriver: {e}")
        return None

def split_date_time_name(a):
    pattern = r'\[(.*?), (.*?)\] (.*?):'
    match = re.match(pattern, a)
    if match:
        return match.group(1), match.group(2), match.group(3)
    else:
        return None, None, None

def extract_messages_to_csv(driver, writer, group_name):
    try:
        x_path_of_all_text_messages = '//div[@role = "row"]//div[contains(@class , "copyable-text")]'
        message_elements = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, x_path_of_all_text_messages))
        )
        
        for message_element in message_elements:
            try:
                text_data = message_element.get_attribute("data-pre-plain-text")
                text_message_xpath = './/span[@dir = "ltr"]/span'
                text_message = message_element.find_element(By.XPATH, text_message_xpath)
                
                splitted_data = split_date_time_name(text_data)
                if all(splitted_data):
                    append_data = [group_name, splitted_data[1], splitted_data[2], splitted_data[0], text_message.text]
                    writer.writerow(append_data)
                else:
                    print(f"Skipping message due to parsing error: {text_data}")
            except NoSuchElementException:
                print(f"Could not find text message element in message: {text_data}")
            except Exception as e:
                print(f"Error processing message: {e}")
    except TimeoutException:
        print(f"Timeout waiting for messages in group: {group_name}")

def main():
    driver = setup_driver()
    if not driver:
        return

    try:
        driver.get("https://web.whatsapp.com/")
        group_names = ["ScrapingTesting", "G1 Branch"]

        with open('whatsapp_messages.csv', 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(["Group Name", "Date", "Sender Name", "Time", "Message"])
            
            for group_name in group_names:
                try:
                    x_path = f'//span[@dir = "auto" and @title ="{group_name}"]'
                    chathead_element = WebDriverWait(driver, 60).until(
                        EC.element_to_be_clickable((By.XPATH, x_path))
                    )
                    chathead_element.click()
                    time.sleep(20)  # Allow time for messages to load
                    extract_messages_to_csv(driver, writer, group_name)
                except TimeoutException:
                    print(f"Could not find or click on group: {group_name}")
                except Exception as e:
                    print(f"Error processing group {group_name}: {e}")
                
            file.flush()
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        driver.quit()

if __name__ == "__main__":
    main()