In [3]:
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException  # Updated import
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options


# ------------------------------------------------------------------------------
# USER CONFIGURATION
# ------------------------------------------------------------------------------
INSTAGRAM_USERNAME = "educo2019"
INSTAGRAM_PASSWORD = "sol2019"
TARGET_USER = "Ajandsmart"
OUTPUT_FILE       = "followers_of_instagram.csv"
SCROLL_LIMIT      = 10                  # <-- how many scroll attempts to load more followers
TIMEOUT           = 15                  # <-- max wait time for element loads
# ------------------------------------------------------------------------------

def init_driver(headless=False):
    """
    Initializes the Chrome WebDriver using webdriver_manager to ensure
    the correct version of ChromeDriver is installed.
    """
    chrome_options = Options()
    if headless:
        chrome_options.add_argument("--headless")
    # Optional: reduce log noise
    chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)
    return driver

def login_to_instagram(driver, username, password, timeout=TIMEOUT):
    """
    Logs into Instagram and handles cookie banners or other pop-ups.
    Raises an Exception if login fails (e.g., incorrect credentials).
    """
    driver.get("https://www.instagram.com/accounts/login/")
    wait = WebDriverWait(driver, timeout)

    # 1. Handle cookie pop-up (if it appears)
    #    The text of the cookie button can vary by region
    try:
        cookie_button = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Allow essential')]"))
        )
        cookie_button.click()
        print("Cookie banner dismissed.")
    except:
        print("No (or different) cookie banner found.")

    # 2. Wait for login fields to appear
    try:
        username_input = wait.until(
            EC.visibility_of_element_located((By.NAME, "username"))
        )
        password_input = wait.until(
            EC.visibility_of_element_located((By.NAME, "password"))
        )
    except:
        # Fallback if NAME='username' or NAME='password' changed
        print("Could not locate login fields by NAME. Trying alternative locators.")
        username_input = wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "input[name='username']"))
        )
        password_input = wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "input[name='password']"))
        )

    # 3. Enter credentials & submit
    username_input.clear()
    username_input.send_keys(username)
    password_input.clear()
    password_input.send_keys(password)
    password_input.send_keys(Keys.RETURN)

    # 4. Wait a bit for the main feed or next prompt
    time.sleep(5)

    # 5. Handle "Save your login info?" or "Turn on Notifications?" if it appears
    #    This text can vary
    try:
        not_now_btn = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Not Now')]"))
        )
        not_now_btn.click()
        print("Dismissed 'Save Info'/'Notifications' prompt.")
    except:
        pass

    # 6. Final check to ensure we are logged in
    time.sleep(3)
    if "login" in driver.current_url.lower():
        raise Exception("Login may have failed. Still on /login page. Check credentials or new login prompts.")

def scrape_followers(driver, target_user, scroll_limit=SCROLL_LIMIT, timeout=TIMEOUT):
    """
    Navigate to target user's profile, open the followers list, then scroll
    to load and collect followers. Returns a list of usernames.
    """
    wait = WebDriverWait(driver, timeout)

    # 1. Go to the target user's profile
    profile_url = f"https://www.instagram.com/{target_user}/"
    driver.get(profile_url)
    time.sleep(3)

    # 2. Click the 'followers' link
    try:
        followers_link = wait.until(
            EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "followers"))
        )
        followers_link.click()
    except:
        raise Exception(f"Could not find a followers link on profile: {profile_url}")

    # 3. Wait for the dialog containing the followers list
    time.sleep(2)
    try:
        wait.until(
            EC.visibility_of_element_located((By.XPATH, "//div[@role='dialog']"))
        )
    except:
        raise Exception("Followers dialog didn't appear. Possibly a private account or unexpected layout.")

    followers = set()
    last_height = 0

    for i in range(scroll_limit):
        # Each iteration, we re-locate the popup to avoid stale references
        try:
            popup = driver.find_element(By.XPATH, "//div[@role='dialog']")
        except:
            print("Could not re-locate the followers dialog. Possibly closed or changed.")
            break

        # Locate follower elements
        user_elems = popup.find_elements(By.XPATH, ".//a[contains(@href, '/') and not(contains(@href, '/accounts/'))]")
        for elem in user_elems:
            href = elem.get_attribute("href")
            if href and href.endswith("/"):
                username = href.split("/")[-2]
                if username:
                    followers.add(username)

        # Scroll
        try:
            driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", popup)
        except StaleElementReferenceException:
            print("Stale element while scrolling. Re-locating popup next iteration.")
            continue

        time.sleep(2)  # wait for more content to load

        # Check if new content loaded
        try:
            new_height = driver.execute_script("return arguments[0].scrollHeight", popup)
            if new_height == last_height:
                print("No more followers loaded. Stopping scroll.")
                break
            last_height = new_height
        except StaleElementReferenceException:
            print("Stale element while checking scrollHeight. Re-locating popup next iteration.")
            continue

    return list(followers)

def main():
    driver = init_driver(headless=False)  # If True, runs without opening a visible browser
    try:
        print("Logging in...")
        login_to_instagram(driver, INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD)
        print("Login successful. Now scraping followers for:", TARGET_USER)

        followers_list = scrape_followers(driver, TARGET_USER, scroll_limit=SCROLL_LIMIT)
        print(f"Found {len(followers_list)} followers for user '{TARGET_USER}'.")

        # 5. Save to CSV
        if not os.path.exists("data"):
            os.makedirs("data")
        output_path = os.path.join("data", OUTPUT_FILE)

        with open(output_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(["follower", "followed"])
            for follower in followers_list:
                writer.writerow([follower, TARGET_USER])

        print(f"Follower data saved to: {output_path}")

    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Logging in...
No (or different) cookie banner found.
Login successful. Now scraping followers for: Ajandsmart
No more followers loaded. Stopping scroll.
Found 10 followers for user 'Ajandsmart'.
Follower data saved to: data/followers_of_instagram.csv


In [1]:
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    StaleElementReferenceException,
    TimeoutException,
    WebDriverException
)
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# ------------------------------------------------------------------------------
# USER CONFIGURATION
# ------------------------------------------------------------------------------
INSTAGRAM_USERNAME = "educo2019"       # <-- Change to your Instagram username
INSTAGRAM_PASSWORD = "sol2019"        # <-- Change to your Instagram password
TARGET_USER       = "Ajandsmart"      # <-- The account whose followers you want to scrape
OUTPUT_FILE       = "followers_of_instagram.csv"
TIMEOUT           = 15                # Max wait time (seconds) for elements
# ------------------------------------------------------------------------------

def init_driver(headless=False):
    """
    Initializes the Chrome WebDriver using webdriver_manager to ensure
    the correct version of ChromeDriver is installed.
    Sets a higher script timeout to avoid 'script timeout' issues.
    """
    chrome_options = Options()
    if headless:
        chrome_options.add_argument("--headless")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Increase script timeout to handle large/slow pages
    driver.set_script_timeout(60)  # 60 seconds, can adjust as needed

    return driver

def login_to_instagram(driver, username, password, timeout=TIMEOUT):
    """
    Logs into Instagram, handles cookie banners or prompts, and verifies login success.
    Raises an Exception if login fails.
    """
    driver.get("https://www.instagram.com/accounts/login/")
    wait = WebDriverWait(driver, timeout)

    # 1. Optional cookie banner
    try:
        cookie_button = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Allow essential')]"))
        )
        cookie_button.click()
        print("Cookie banner dismissed.")
    except:
        print("No (or different) cookie banner found.")

    # 2. Wait for login fields
    try:
        username_input = wait.until(EC.visibility_of_element_located((By.NAME, "username")))
        password_input = wait.until(EC.visibility_of_element_located((By.NAME, "password")))
    except:
        print("Could not locate login fields by NAME. Trying CSS selectors.")
        username_input = wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "input[name='username']"))
        )
        password_input = wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "input[name='password']"))
        )

    # 3. Enter credentials
    username_input.clear()
    username_input.send_keys(username)
    password_input.clear()
    password_input.send_keys(password)
    password_input.send_keys(Keys.RETURN)

    # 4. Wait for potential next steps
    time.sleep(5)

    # 5. Dismiss "Save your login info?" or "Turn on notifications?" pop-up if present
    try:
        not_now_btn = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Not Now')]"))
        )
        not_now_btn.click()
        print("Dismissed 'Save Info'/'Notifications' prompt.")
    except:
        pass

    # 6. Verify login success
    time.sleep(3)
    if "login" in driver.current_url.lower():
        raise Exception("Login may have failed. Still on /login page.")

def scrape_followers(driver, target_user, timeout=TIMEOUT):
    """
    1) Goes to target_user's profile,
    2) Clicks "followers," 
    3) Scrolls until no new followers appear (or Instagram stops loading),
    4) Returns a sorted list of follower usernames.
    """
    wait = WebDriverWait(driver, timeout)

    # 1. Open target user profile
    profile_url = f"https://www.instagram.com/{target_user}/"
    driver.get(profile_url)
    time.sleep(3)

    # 2. Click "followers" link
    try:
        followers_link = wait.until(
            EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "followers"))
        )
        followers_link.click()
    except:
        raise Exception(f"Could not find 'followers' link on profile: {profile_url}")

    # 3. Wait for dialog
    time.sleep(2)
    try:
        wait.until(
            EC.visibility_of_element_located((By.XPATH, "//div[@role='dialog']"))
        )
    except:
        raise Exception("Followers dialog didn't appear. Possibly private account or layout changed.")

    # 4. Indefinite scroll until no new followers appear in multiple rounds
    followers = set()
    prev_count = 0
    same_count_repeats = 0
    MAX_SAME_COUNT_ROUNDS = 3  # adjust if needed

    while True:
        # Re-locate the popup each loop to avoid stale references
        try:
            popup = driver.find_element(By.XPATH, "//div[@role='dialog']")
        except:
            print("Could not re-locate followers dialog. Possibly closed.")
            break

        # Find follower elements
        user_elems = popup.find_elements(By.XPATH, ".//a[contains(@href, '/') and not(contains(@href, '/accounts/'))]")
        for elem in user_elems:
            # Instead of get_attribute("href"), use get_dom_attribute("href") to avoid script timeouts
            try:
                href = elem.get_dom_attribute("href")
            except (TimeoutException, WebDriverException):
                print("Timeout/WebDriver error getting href; skipping element.")
                continue

            if href and href.endswith("/"):
                username = href.split("/")[-2]
                if username:
                    followers.add(username)

        # Scroll the popup
        try:
            driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", popup)
        except StaleElementReferenceException:
            print("Stale element while scrolling. Will retry next iteration.")
            continue

        time.sleep(2)  # let new items load

        current_count = len(followers)
        print(f"Collected {current_count} followers so far...")

        if current_count == prev_count:
            same_count_repeats += 1
            if same_count_repeats >= MAX_SAME_COUNT_ROUNDS:
                print("No new followers found after multiple rounds. Stopping.")
                break
        else:
            same_count_repeats = 0

        prev_count = current_count

    return sorted(followers)

def main():
    driver = init_driver(headless=False)  # If True, browser won't be visible
    try:
        print("Logging in...")
        login_to_instagram(driver, INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD)
        print("Login successful. Now scraping followers for:", TARGET_USER)

        followers_list = scrape_followers(driver, TARGET_USER)
        print(f"Found {len(followers_list)} total followers for user '{TARGET_USER}'.")

        # Save to CSV
        if not os.path.exists("data"):
            os.makedirs("data")
        output_path = os.path.join("data", OUTPUT_FILE)

        with open(output_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(["follower", "followed"])
            for follower in followers_list:
                writer.writerow([follower, TARGET_USER])

        print(f"Follower data saved to: {output_path}")
    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Logging in...
No (or different) cookie banner found.
Login successful. Now scraping followers for: Ajandsmart
Collected 0 followers so far...
Collected 0 followers so far...
Collected 10 followers so far...
Collected 10 followers so far...
Collected 10 followers so far...
Collected 10 followers so far...
No new followers found after multiple rounds. Stopping.
Found 10 total followers for user 'Ajandsmart'.
Follower data saved to: data/followers_of_instagram.csv


In [2]:
import time
import csv
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    StaleElementReferenceException,
    TimeoutException,
    WebDriverException
)
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options

# ------------------------------------------------------------------------------
# USER CONFIGURATION
# ------------------------------------------------------------------------------
INSTAGRAM_USERNAME = "educo2019"       # <-- Replace with your Instagram username
INSTAGRAM_PASSWORD = "sol2019"        # <-- Replace with your Instagram password
TARGET_USER       = "Ajandsmart"      # <-- The account whose followers you want to scrape
OUTPUT_FILE       = "followers_of_instagram.csv"
TIMEOUT           = 15                # Max wait time (seconds) for elements
SCROLL_ATTEMPTS   = 2000              # Max number of scrolling iterations
# ------------------------------------------------------------------------------

def init_driver(headless=False):
    """
    Initializes the Chrome WebDriver using webdriver_manager to ensure
    the correct version of ChromeDriver is installed.
    Sets a higher script timeout to avoid 'script timeout' issues.
    """
    chrome_options = Options()
    if headless:
        chrome_options.add_argument("--headless")
    chrome_options.add_experimental_option("excludeSwitches", ["enable-logging"])

    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Increase script timeout to handle large/slow pages
    driver.set_script_timeout(60)  # 60 seconds, can adjust as needed

    return driver

def login_to_instagram(driver, username, password, timeout=TIMEOUT):
    """
    Logs into Instagram, handles cookie banners or prompts, and verifies login success.
    Raises an Exception if login fails.
    """
    driver.get("https://www.instagram.com/accounts/login/")
    wait = WebDriverWait(driver, timeout)

    # 1. Optional cookie banner
    try:
        cookie_button = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Allow essential')]"))
        )
        cookie_button.click()
        print("Cookie banner dismissed.")
    except:
        print("No (or different) cookie banner found.")

    # 2. Wait for login fields
    try:
        username_input = wait.until(EC.visibility_of_element_located((By.NAME, "username")))
        password_input = wait.until(EC.visibility_of_element_located((By.NAME, "password")))
    except:
        print("Could not locate login fields by NAME. Trying CSS selectors.")
        username_input = wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "input[name='username']"))
        )
        password_input = wait.until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "input[name='password']"))
        )

    # 3. Enter credentials
    username_input.clear()
    username_input.send_keys(username)
    password_input.clear()
    password_input.send_keys(password)
    password_input.send_keys(Keys.RETURN)

    # 4. Wait for potential next steps
    time.sleep(5)

    # 5. Dismiss "Save your login info?" or "Turn on notifications?" pop-up if present
    try:
        not_now_btn = wait.until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Not Now')]"))
        )
        not_now_btn.click()
        print("Dismissed 'Save Info'/'Notifications' prompt.")
    except:
        pass

    # 6. Verify login success
    time.sleep(3)
    if "login" in driver.current_url.lower():
        raise Exception("Login may have failed. Still on /login page.")

def scrape_followers(driver, target_user, timeout=TIMEOUT, scroll_attempts=SCROLL_ATTEMPTS):
    """
    1) Goes to target_user's profile,
    2) Clicks "followers," 
    3) Scrolls a fixed number of times (scroll_attempts), each time waiting 5 seconds,
    4) Returns a sorted list of follower usernames.
    
    NOTE: If Instagram doesn't load new followers, we won't get them, no matter how many times we scroll.
    """
    wait = WebDriverWait(driver, timeout)

    # 1. Open target user profile
    profile_url = f"https://www.instagram.com/{target_user}/"
    driver.get(profile_url)
    time.sleep(3)

    # 2. Click "followers" link
    try:
        followers_link = wait.until(
            EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "followers"))
        )
        followers_link.click()
    except:
        raise Exception(f"Could not find 'followers' link on profile: {profile_url}")

    # 3. Wait for dialog
    time.sleep(2)
    try:
        wait.until(
            EC.visibility_of_element_located((By.XPATH, "//div[@role='dialog']"))
        )
    except:
        raise Exception("Followers dialog didn't appear. Possibly private account or layout changed.")

    followers = set()

    for i in range(scroll_attempts):
        # Re-locate the popup each loop to avoid stale references
        try:
            popup = driver.find_element(By.XPATH, "//div[@role='dialog']")
        except:
            print("Could not re-locate followers dialog. Possibly closed.")
            break

        # Find follower elements
        user_elems = popup.find_elements(By.XPATH, ".//a[contains(@href, '/') and not(contains(@href, '/accounts/'))]")
        for elem in user_elems:
            # Use get_dom_attribute("href") to avoid script timeouts
            try:
                href = elem.get_dom_attribute("href")
            except (TimeoutException, WebDriverException):
                print("Timeout/WebDriver error getting href; skipping element.")
                continue

            if href and href.endswith("/"):
                username = href.split("/")[-2]
                if username:
                    followers.add(username)

        # Scroll the popup
        try:
            driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", popup)
        except StaleElementReferenceException:
            print("Stale element while scrolling. Will retry next iteration.")
            continue

        # Increase sleep to give more time for new followers to load
        time.sleep(5)

        print(f"Scroll attempt {i+1}/{scroll_attempts} | Collected {len(followers)} followers so far...")

    return sorted(followers)

def main():
    driver = init_driver(headless=False)  # If True, browser won't be visible
    try:
        print("Logging in...")
        login_to_instagram(driver, INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD)
        print("Login successful. Now scraping followers for:", TARGET_USER)

        followers_list = scrape_followers(driver, TARGET_USER)
        print(f"Found {len(followers_list)} total followers for user '{TARGET_USER}'.")

        # Save to CSV
        if not os.path.exists("data"):
            os.makedirs("data")
        output_path = os.path.join("data", OUTPUT_FILE)

        with open(output_path, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow(["follower", "followed"])
            for follower in followers_list:
                writer.writerow([follower, TARGET_USER])

        print(f"Follower data saved to: {output_path}")
    finally:
        driver.quit()

if __name__ == "__main__":
    main()


Logging in...
No (or different) cookie banner found.
Could not locate login fields by NAME. Trying CSS selectors.


TimeoutException: Message: 
Stacktrace:
0   chromedriver                        0x0000000104852168 chromedriver + 6177128
1   chromedriver                        0x000000010484982a chromedriver + 6141994
2   chromedriver                        0x00000001042d3e00 chromedriver + 417280
3   chromedriver                        0x00000001043254d7 chromedriver + 750807
4   chromedriver                        0x00000001043256f1 chromedriver + 751345
5   chromedriver                        0x00000001043756d4 chromedriver + 1078996
6   chromedriver                        0x000000010434b71d chromedriver + 907037
7   chromedriver                        0x0000000104372a22 chromedriver + 1067554
8   chromedriver                        0x000000010434b4c3 chromedriver + 906435
9   chromedriver                        0x0000000104317885 chromedriver + 694405
10  chromedriver                        0x00000001043184f1 chromedriver + 697585
11  chromedriver                        0x000000010480e950 chromedriver + 5900624
12  chromedriver                        0x00000001048129e1 chromedriver + 5917153
13  chromedriver                        0x00000001047e8e14 chromedriver + 5746196
14  chromedriver                        0x000000010481340b chromedriver + 5919755
15  chromedriver                        0x00000001047d7574 chromedriver + 5674356
16  chromedriver                        0x0000000104836e88 chromedriver + 6065800
17  chromedriver                        0x0000000104837050 chromedriver + 6066256
18  chromedriver                        0x00000001048493f1 chromedriver + 6140913
19  libsystem_pthread.dylib             0x00007ff816884df1 _pthread_start + 99
20  libsystem_pthread.dylib             0x00007ff816880857 thread_start + 15
