In [11]:
import time
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException, NoSuchElementException, WebDriverException
import random
from datetime import datetime, timedelta
import os

# Initialize the Selenium WebDriver
def init_driver():
    options = uc.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    
    # Set the user data directory to store the session
    options.add_argument(r'--user-data-dir=C:\Users\Irfan Ullah\Desktop\sessions')

    driver = uc.Chrome(options=options)
    return driver

# Function to handle the process
def main_handler(username, password, univer):
    driver = init_driver()
    driver.get("https://www.facebook.com/")
    time.sleep(5)  # Allow page to load
    
    # Check if already logged in
    if "login" in driver.current_url:
        print(f"[ERROR] Login required for {username}. Skipping...")
        driver.quit()
        return
    else:
        print(f"[INFO] Using existing session for {username}.")
        
        # Load visited accounts from CSV file
        visited_accounts_file = "visited_accounts.csv"
        if os.path.exists(visited_accounts_file):
            visited_accounts = pd.read_csv(visited_accounts_file)
        else:
            visited_accounts = pd.DataFrame(columns=["link"])

        # Initialize message counter and timestamp
        message_counter = 0
        first_message_time = None
        
        # Iterate through each university name in the CSV file
        for index, row in univer.iterrows():
            try:
                university_name = row['university']
                print(f"[INFO] Searching for university: {university_name} for {username}.")
                
                # Navigate to the groups search URL
                driver.get(f"https://www.facebook.com/search/groups?q={university_name}")
                time.sleep(4)  # Allow page to load

                # Click on the filter to set it to public groups only
                set_public_groups_filter(driver)

                # Match university name with searched items and click on them
                match_and_click_searched_items(driver, univer, message_counter, first_message_time, visited_accounts, visited_accounts_file)

            except WebDriverException as e:
                if "invalid session id" in str(e):
                    print("[ERROR] Invalid session id. Reinitializing driver...")
                    driver.quit()
                    driver = init_driver()
                else:
                    print(f"[ERROR] Unexpected error while searching for university: {e}")

    driver.quit()

def set_public_groups_filter(driver):
    try:
        # Click on the filter to set it to public groups only
        public_groups_filter = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//input[@aria-label='Public groups']"))
        )
        public_groups_filter.click()
        time.sleep(5)  # Wait for the filter to apply

        print("[INFO] Set the filter to public groups only.")

    except TimeoutException:
        print("[ERROR] Timeout while waiting for the public groups filter to be clickable.")
    except NoSuchElementException:
        print("[ERROR] Public groups filter element not found.")
    except Exception as e:
        print(f"[ERROR] Error while setting the filter to public groups only: {e}")

def match_and_click_searched_items(driver, univer, message_counter, first_message_time, visited_accounts, visited_accounts_file):
    try:
        # Find all searched items
        searched_items = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[@role='article']//a"))
        )

        # Extract links of matched items
        matched_links = []
        for item in searched_items:
            title = item.text
            for index, row in univer.iterrows():
                university_name = row['university']
                if university_name.lower() in title.lower():
                    matched_links.append(item.get_attribute('href'))
                    break

        # Limit to top 3 matched links
        matched_links = matched_links[:3]

        # Click on matched links one by one
        for link in matched_links:
            if link not in visited_accounts['link'].values:
                driver.get(link)
                time.sleep(5)  # Wait for the page to load
                print(f"[INFO] Clicked on the link: {link}")

                # Click on the "People" tab
                click_people_tab(driver)

                # Match university name with people in the "People" tab and click on their profile
                match_and_click_people(driver, univer, message_counter, first_message_time, visited_accounts, visited_accounts_file, link)

    except TimeoutException:
        print("[ERROR] Timeout while waiting for the searched items to be clickable.")
    except NoSuchElementException:
        print("[ERROR] Searched items element not found.")
    except Exception as e:
        print(f"[ERROR] Error while matching and clicking on the searched items: {e}")

def click_people_tab(driver):
    try:
        # Find the "People" tab
        people_tab = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//a[contains(@href, 'members') and contains(@role, 'tab')]"))
        )
        # Scroll the "People" tab into view
        driver.execute_script("arguments[0].scrollIntoView(false);", people_tab)
        time.sleep(1)  # Wait for the scrolling to complete

        # Click on the "People" tab
        people_tab.click()
        time.sleep(5)  # Wait for the people tab to load

        print("[INFO] Clicked on the 'People' tab.")

    except TimeoutException:
        print("[ERROR] Timeout while waiting for the 'People' tab to be clickable.")
    except NoSuchElementException:
        print("[ERROR] 'People' tab element not found.")
    except Exception as e:
        print(f"[ERROR] Error while clicking on the 'People' tab: {e}")

def match_and_click_people(driver, univer, message_counter, first_message_time, visited_accounts, visited_accounts_file, group_link):
    try:
        # Scroll and load more people
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            # Scroll down to the bottom
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Wait for the page to load

            # Calculate new scroll height and compare with last scroll height
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Find all people in the "People" tab
        people_items = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.x9f619.x1n2onr6.x1ja2u2z.x78zum5.xdt5ytf.x2lah0s.x193iq5w.x1gslohp.x12nagc.xzboxd6.x14l7nz5 a"))
        )

        # Debugging information
        print(f"[INFO] Found {len(people_items)} people items.")

        # Extract links of matched people
        matched_people_links = []
        for item in people_items:
            try:
                # Ensure the link is a profile link and not an invite button
                link = item.get_attribute('href')
                if link and ("profile.php?id=" in link or "/user/" in link or "/groups/" in link):
                    matched_people_links.append(link)
                else:
                    print(f"[DEBUG] Skipped non-profile link: {link}")
            except NoSuchElementException:
                print("[ERROR] Title element not found for a person item.")

        # Debugging information
        print(f"[INFO] Matched profile links: {matched_people_links}")

        # Skip the first link (group itself) and click on matched people links one by one
        for link in matched_people_links[1:4]:  # Skip the first link and limit to top 3
            if link not in visited_accounts['link'].values:
                driver.get(link)
                time.sleep(5)  # Wait for the profile page to load
                print(f"[INFO] Clicked on the profile link: {link}")

                # Add the profile link to the visited accounts CSV file
                visited_accounts = pd.concat([visited_accounts, pd.DataFrame({"link": [link]})], ignore_index=True)
                visited_accounts.to_csv(visited_accounts_file, index=False)

                # Click on the "View Profile" link
                click_view_profile(driver)

                # Check the default overview section for "studies" or "studied"
                check_overview_section(driver, univer, message_counter, first_message_time, visited_accounts, visited_accounts_file, link)

    except TimeoutException:
        print("[ERROR] Timeout while waiting for the people items to be clickable.")
    except NoSuchElementException:
        print("[ERROR] People items element not found.")
    except Exception as e:
        print(f"[ERROR] Error while matching and clicking on the people items: {e}")

def click_view_profile(driver):
    try:
        # Find the "View Profile" link
        view_profile_link = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//span[contains(text(),'View profile')]"))
        )
        view_profile_link.click()
        time.sleep(5)  # Wait for the profile page to load

        print("[INFO] Clicked on the 'View Profile' link.")

    except TimeoutException:
        print("[ERROR] Timeout while waiting for the 'View Profile' link to be clickable.")
    except NoSuchElementException:
        print("[ERROR] 'View Profile' link element not found.")
    except Exception as e:
        print(f"[ERROR] Error while clicking on the 'View Profile' link: {e}")

def send_message(driver, message, message_counter, first_message_time):
    try:
        # Check if the message limit is reached
        if message_counter >= 2:
            current_time = datetime.now()
            if first_message_time is None:
                first_message_time = current_time
            elapsed_time = current_time - first_message_time
            if elapsed_time < timedelta(hours=1):
                wait_time = timedelta(hours=1) - elapsed_time
                print(f"[INFO] Message limit reached. Waiting for {wait_time} before sending more messages.")
                time.sleep(wait_time.total_seconds())
                message_counter = 0
                first_message_time = datetime.now()

        # Find the message button
        message_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//span[@class='x1lliihq x6ikm8r x10wlt62 x1n2onr6 xlyipyv xuxw1ft'][normalize-space()='Message']"))
        )
        message_button.click()
        time.sleep(5)  # Wait for the message window to load

        # Find the message input box
        message_box = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//p[@class='xat24cr xdj266r']"))
        )
        
        messagebox_close = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//*[name()='path' and contains(@d,'m98.095 91')]")))
        # Type the message letter by letter
        for char in message:
            message_box.send_keys(char)
            time.sleep(random.uniform(0.5, 1))  # Adjust the delay as needed
        time.sleep(8)
# message_box.send_keys(Keys.RETURN) # Uncomment to send the message
        print("[INFO] Message sent successfully.")
        messagebox_close.click()
        print("[INFO] Message box closed successfully.")
        time.sleep(3)

        # Increment the message counter
        message_counter += 1

    except TimeoutException:
        print("[ERROR] Timeout while waiting for the message button or input box to be clickable.")
    except NoSuchElementException:
        print("[ERROR] Message button or input box element not found.")
    except Exception as e:
        print(f"[ERROR] Error while sending the message: {e}")

def smooth_scroll(driver, scroll_amount):
    driver.execute_script(f"window.scrollBy(0, {scroll_amount});")
    time.sleep(2)

def scroll_to_element_smoothly(driver, element):
    driver.execute_script("arguments[0].scrollIntoView({ behavior: 'smooth', block: 'center' });", element)
    time.sleep(2)

def extract_text_from_element(driver, xpath):
    try:
        element = driver.find_element(By.XPATH, xpath)
        return element.text.strip()
    except NoSuchElementException:
        return None

def check_overview_section(driver, univer, message_counter, first_message_time, visited_accounts, visited_accounts_file, profile_link):
    try:
        about_button = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.XPATH, "//span[text()='About']"))
        )
        driver.execute_script("arguments[0].click();", about_button)
        time.sleep(7)
        print("Clicked About section.")

        studies_xpath = "//div[contains(@class, 'x13faqbe') and contains(@class, 'x78zum5') and contains(@class, 'xdt5ytf')]//span[contains(text(), 'Studies')]"
        studies_text = extract_text_from_element(driver, studies_xpath)
        print(studies_text)

        if studies_text:
            for index, row in univer.iterrows():
                university_name = row['university']
                if university_name.lower() in studies_text.lower():
                    print(f"[INFO] Found 'studies' at {university_name}' in the overview section.")
                    print("He/she is a student and we can message him/her.")
                    send_message(driver, "Hi, how are you?", message_counter, first_message_time)
                    break
            else:
                print("Not a Student.")
        else:
            print("Not a Student.")

        # Add the profile link to the visited accounts CSV file
        visited_accounts = pd.concat([visited_accounts, pd.DataFrame({"link": [profile_link]})], ignore_index=True)
        visited_accounts.to_csv(visited_accounts_file, index=False)

    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    # Load account credentials
    accounts = pd.read_csv("C:/Users/Irfan Ullah/Documents/main/staticfiles/account.csv")
    univer = pd.read_csv("C:/Users/Irfan Ullah/Documents/main/staticfiles/uniall.csv")  # Ensure correct path
    
    for i, row in accounts.iterrows():
        uname = row['username']
        pas = row['password']
        main_handler(uname, pas, univer)



ParserError: Error tokenizing data. C error: Expected 1 fields in line 60, saw 2


In [14]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import undetected_chromedriver as uc
from selenium.common.exceptions import NoSuchElementException
import pandas as pd

def init_driver():
    options = uc.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    
    # Set the user data directory to store the session
    options.add_argument(r'--user-data-dir=C:\Users\Irfan Ullah\Desktop\sessions')

    driver = uc.Chrome(options=options)
    return driver

def match_and_click_people(driver, univer):
    # Find all people in the "People" tab
    people_items = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div[data-visualcompletion='ignore-dynamic'] a"))
    )
    
    # Extract links of matched people
    matched_people_links = []
    
    for item in people_items:
        try:
            # For each item, we need to find the title inside that specific "person" item
            title_element = item.find_element(By.XPATH, "span.x193iq5w")  # Adjusted XPath
            title = title_element.text
            print(f"[DEBUG] Found title: {title}")
            
            # Loop through the DataFrame of universities to match titles
            for index, row in univer.iterrows():
                university_name = row['university']
                if university_name.lower() in title.lower():
                    link = item.get_attribute('href')
                    if link and isinstance(link, str):
                        matched_people_links.append(link)
                        break
                    else:
                        print("[ERROR] Invalid URL found for a person item.")
        except NoSuchElementException:
            print("[ERROR] Title element not found for a person item.")
    
    return matched_people_links


# Initialize the driver
driver = init_driver()
driver.get("https://www.facebook.com/groups/383691761755058/members")

# Example usage of the match_and_click_people function
# Replace 'univer' with a pandas DataFrame containing university names for matching
data = data = {'university': ['Abertay University', 'Aberystwyth University']}
univer = pd.DataFrame(data)

matched_links = match_and_click_people(driver, univer)
print(matched_links)


[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found for a person item.
[ERROR] Title element not found