In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from tqdm import tqdm
import math
import time
import os
import polars as pl
import concurrent.futures

def driversetup():
    options = webdriver.ChromeOptions()
    #run Selenium in headless mode
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    #overcome limited resource problems
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument("lang=en")
    #open Browser in maximized mode
    options.add_argument("start-maximized")
    #disable infobars
    options.add_argument("disable-infobars")
    #disable extension
    options.add_argument("--disable-extensions")
#     options.add_argument("--incognito")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    driver = webdriver.Chrome(options=options)
    driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined});")
    return driver

BASE_URL = 'https://www.fpds.gov/ezsearch/search.do?indexName=awardfull&templateName=1.5.3&s=FPDS.GOV&q='
cur_dir = os.path.dirname(os.getcwd())
naics_data_dir = os.path.join(cur_dir, 'NAICS Processed')
df = pl.read_csv(os.path.join(naics_data_dir, '236220_second_proc.csv'))
df.head()

special_comp_params = ["Follow On to Competed Action",
                       "Not Available for Competition",
                       "Not Competed"]

In [2]:
ranges = [40000,45000]

In [4]:
# initial run through
driver = driversetup()
   
comp_type = []
bids = []

driver.get(BASE_URL)

for row in tqdm(df[ranges[0]:ranges[1]].iter_rows(named=True), total=ranges[1] - ranges[0], desc="Processing rows"):
    contract_id = row['Contract ID']
    try:
        search_input = driver.find_element("id", "searchText")
    except:
        comp_type.append('Error')
        bids.append(0)
        continue
    search_input.clear()  # clear input
    search_input.send_keys(contract_id)  # send new contract
    time.sleep(0.1)
    # click search button
    driver.find_element(By.XPATH, "/html/body/form/table/tbody/tr[2]/td[2]/input").click()
    original_window = driver.current_window_handle
    # get the original contract
    view_query = f'/html/body/table[4]/tbody/tr/td[2]/table/tbody/tr[2]/td/table/tbody/tr/td/table[1]/tbody/tr[1]/td[2]/span/a[3]'
    try:
        driver.find_element(By.XPATH, view_query).click()
    except:
        comp_type.append('Error')
        bids.append(0)
        continue
    # switch tabs
    all_windows = driver.window_handles

    for window in all_windows:
        if window != original_window:
            driver.switch_to.window(window)
            break  # Switch to the newest tab and exit the loop

    # get extent competed
    try:
        extent = driver.find_element(By.CSS_SELECTOR, "#extentCompeted option:checked").text
    except:
        comp_type.append('Error')
        bids.append(0)
        continue
    
    # get number of bids
    bid_num = 1
    if extent not in special_comp_params: # if normal competition
        bid_num = driver.find_element(By.ID, "numberOfOffersReceived").get_attribute("value")

    # switch back to original window and close the others
    for window in driver.window_handles:
        if window != original_window:
            driver.switch_to.window(window)
            driver.close() 
            
    # add to lists
    comp_type.append('Open' if extent not in special_comp_params else 'Restricted')
    bids.append(bid_num)
            
    driver.switch_to.window(original_window)

Processing rows: 100%|█████████████████████████████████████████████████████████████| 5000/5000 [03:20<00:00, 24.95it/s]


In [None]:
df = df[ranges[0]:ranges[1]]
merged_df = df.with_columns([
    pl.Series('Competition Type', comp_type),
    pl.Series('Bids', bids)
])

save_dir = os.path.join(os.getcwd(), f'{ranges[0]}_{ranges[1]}.csv')
merged_df.write_csv(save_dir)