In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# Set up Selenium WebDriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)

# Function to extract comments from a Nairaland thread page
def extract_comments(url):
    driver.get(url)
    time.sleep(5)  # Wait for the page to load completely

    comments = []
    try:
        comment_sections = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, 'narrow'))
        )
    except Exception as e:
        print(f"Error waiting for comment sections: {e}")
        return comments

    for comment_div in comment_sections:
        try:
            comment_text = comment_div.text.strip()
            comments.append({'text': comment_text})
        except Exception as e:
            print(f"Error extracting comment: {e}")

    return comments

# Function to navigate through multiple pages of the thread
def scrape_all_comments(base_url, start_page=1, end_page=10):
    all_comments = []
    for page_num in range(start_page, end_page + 1):
        url = f"{base_url}/{page_num}"
        comments = extract_comments(url)
        all_comments.extend(comments)
        print(f"Scraped page {page_num}")
    
    return all_comments

# Example usage
base_url = 'https://www.nairaland.com/1546964/konga-jumia-which-more-reliable'  # Provided thread base URL
start_page = 1  # Starting page number
end_page = 10  # Ending page number, adjust based on the total number of pages in the thread

all_comments = scrape_all_comments(base_url, start_page, end_page)
if all_comments:
    comments_df = pd.DataFrame(all_comments)
    comments_df.to_csv('nairaland_comments.csv', index=False)
    print("Scraping completed and saved to 'nairaland_comments.csv'")
else:
    print("No comments found.")

# Close the WebDriver
driver.quit()


Scraped page 1
Scraped page 2
Scraped page 3
Scraped page 4
Scraped page 5
Scraped page 6
Scraped page 7
Scraped page 8
Scraped page 9
Scraped page 10
Scraping completed and saved to 'nairaland_comments.csv'
