## **Data Scraping**

##### **Import All Required Libraries**

In [1]:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from convert_curr_code import convert_curr_codes
from selenium.webdriver.common.by import By
from datetime import datetime, timezone
from selenium import webdriver
import pandas as pd
import time

# Store scraped entries to get rid of duplicates
scraped_entries = set()
#Final List of entries
total_entries = []

##### **WebDriver Function**

In [2]:
def web_driver(headless = False):
    """
    Initializes the Selenium WebDriver and opens the target website.
    Returns: WebDriver: The initialized WebDriver instance.
    """
    # Initialize WebDriver
    options = webdriver.ChromeOptions()

    #Headless Option
    if headless:
        options.add_argument('--headless')

    driver = webdriver.Chrome(options=options)

    # Open the target website
    driver.get("https://www.primeopinion.com/")

    #Closing cookie Alert
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "onetrust-accept-btn-handler"))).click()

    return driver

#### **Payout Information Funtion**

In [3]:
def payout_info(driver):
    """
    Extracts payout information like payout provider names, payout amounts,and currency code from a specified web page.
    Returns: list of dict: Each dictionary has the following keys:
            - "payout_provider": The name of the payout provider.
            - "payout_amount": The amount of the payout.
            - "payout_currency": The currency of the payout.
            - "timestamp": The timestamp when the data was extracted.
    """
    try:
        # Explicit Wait til the payout transactions are present.
        all_transactions = WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.CLASS_NAME, "card-list")))
        
        #List of all transactions.
        entries = WebDriverWait(all_transactions, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.card-item[data-v-eb25139e]')))
        
        # Storing required data.
        new_data =[]
        
        try:
            #Iterating on each payout.
            for entry in entries:
                
                #Extracting Payout Provider Name.
                payout_provider = WebDriverWait(entry, 20).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "card-item__label"))).text.strip()
                
                
                #Extracting Payout Amount + Currency Symbol.
                payout_amount = WebDriverWait(entry, 20).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "card-item__amount"))).text.strip()
                
                
                #Extracting amount in number and converting it into float data type.
                amount = float(payout_amount.split()[0])
                
                #Converting Currency Symbol to Currency Code.
                currency_code = convert_curr_codes(payout_amount.split()[1])
                
                #Generating Timestamp for each payout.
                timestamp = datetime.now(timezone.utc).isoformat()
                
                #Creating a data tuple to get rid of duplicates.  
                entry_id = (payout_provider,amount,currency_code, timestamp)
                
                #Finding for Duplicates entries. 
                if entry_id not in scraped_entries:
                    
                    #If not duplicate add it into a set.
                    scraped_entries.add(entry_id)
                    
                    #Creating a dictionary entry and adding it into a list. 
                    new_data.append({
                        "payout_provider": payout_provider,
                        "payout_amount": amount,
                        "payout_currency": currency_code,
                        "timestamp": timestamp})

            #Returning new data.
            return new_data
        
        #Exception Handling for Stale Element reference Exception.
        except Exception as e:
            print(f"Exception Handled")

    #Exception Handling for other Exceptions.
    except Exception as e:
        print(f"Exception Handled, {e}")

**Export data to CSV File**

In [4]:
def export_data(total_entries):
    """ 
    Converts a list of raw data entries into a 
    pandas DataFrame and exports it to a CSV file.
    Return: None
    """
    #Converting raw data into a DataFrame
    df = pd.DataFrame(total_entries)
    #Export data into CSV File
    df.to_csv("ScrappedData.csv", index=False)

##### **Main Function**

In [5]:
def main():
    """
    Main function uses an infinite loop to repeatedly call the `payout_info` function,
    which extracts new payout transactions from the web page. The new data is printed for
    testing purposes and appended to a global list of total entries. The function
    handles keyboard interrupts to allow graceful termination of the scraper.
    Returns: None
    """
    #Opening a WebDriver
    driver = web_driver()
    try:
        while True:
            #Extracting new data.
            new_data = payout_info(driver)
            
            if new_data:
                for entry in new_data:
                    #For Testing.
                    print(entry)
                    
                    #Appending all the entries to a final list.
                    total_entries.append(entry)
                    
            #Sleeping for 1.5 second so that new data get loaded.
            time.sleep(1.5)
            
    #Exception Handling for Keyboard Interrupt.
    except KeyboardInterrupt:
        print("Stopping the scraper...")

    finally:
        driver.quit()
        export_data(total_entries)

In [None]:
#Calling a main Function
if __name__ == "__main__":
    main()