In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import pandas as pd
import time, os, re

# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ CONFIG ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
DEPARTMENT_NAME  = "Orissa Lift Irrigation Corporation"
DEPARTMENT_VALUE = "120"
URL              = ("https://tendersodisha.gov.in/"
                    "nicgep/app?page=WebTenderStatusLists&service=page")
MAX_RECORDS      = None        # change to None/large number in production
# ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ

def clean_value(raw: str) -> str:
    """Strip ‚Çπ, commas, spaces ‚Üí return digits (string)."""
    return re.sub(r"[^\d.]", "", raw.strip())

driver = webdriver.Chrome()
driver.maximize_window()
print(f"\nüîç Scraping tenders for: {DEPARTMENT_NAME}")
driver.get(URL)
time.sleep(2)

# Select department
Select(driver.find_element(By.ID, "OrganName")).select_by_value(DEPARTMENT_VALUE)
time.sleep(1)

# Set Published From and To dates
published_from = "01/06/2024"  # dd/MM/yyyy format
published_to = "11/07/2025"

published_from_input = driver.find_element(By.ID, "publishedFromDate")
driver.execute_script("arguments[0].removeAttribute('readonly')", published_from_input)
published_from_input.clear()
published_from_input.send_keys(published_from)

published_to_input = driver.find_element(By.ID, "publishedToDate")
driver.execute_script("arguments[0].removeAttribute('readonly')", published_to_input)
published_to_input.clear()
published_to_input.send_keys(published_to)

time.sleep(1)

# Manual CAPTCHA once
open("captcha.png", "wb").write(
    driver.find_element(By.ID, "captchaImage").screenshot_as_png
)
try:
    os.startfile("captcha.png")
except FileNotFoundError:
    pass
driver.find_element(By.ID, "captchaText").send_keys(input("Enter CAPTCHA: "))
driver.find_element(By.ID, "Search").click()

# Wait for tender table
try:
    WebDriverWait(driver, 25).until(
        EC.presence_of_element_located((By.ID, "tabList"))
    )
except TimeoutException:
    print("‚ùå Table failed to load ‚Äì bad CAPTCHA?")
    driver.quit()
    raise SystemExit

all_rows, done = [], 0
row_ptr = 1  # table-row pointer (skip header)

while MAX_RECORDS is None or done < MAX_RECORDS:
    try:
        table_rows = driver.find_element(
            By.ID, "tabList"
        ).find_elements(By.XPATH, ".//tr[td]")
        for idx in range(row_ptr, len(table_rows) + 1):
            if MAX_RECORDS and done >= MAX_RECORDS:
                break

            try:
                row = driver.find_element(
                    By.XPATH, f"//table[@id='tabList']//tr[{idx+1}]"
                )
                cols = row.find_elements(By.TAG_NAME, "td")
                if len(cols) < 6:
                    continue

                sno, tender_id, title = map(
                    str.strip, [cols[0].text, cols[1].text, cols[2].text]
                )
                org_chain, stage, status = map(
                    str.strip, [cols[3].text, cols[4].text, cols[5].text]
                )

                # Open status window
                driver.execute_script(
                    "arguments[0].click();", cols[5].find_element(By.TAG_NAME, "a")
                )
                WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable(
                        (By.XPATH, "//a[contains(text(),'View More Details')]")
                    )
                ).click()

                # Switch tab
                WebDriverWait(driver, 10).until(
                    lambda d: len(d.window_handles) > 1
                )
                driver.switch_to.window(driver.window_handles[-1])

                # Parse details
                publish = bid_end = tv = addr = loc = pin = ""
                for tr in driver.find_elements(By.XPATH, "//table//tr"):
                    cells = tr.find_elements(By.TAG_NAME, "td")
                    for i, cell in enumerate(cells[:-1]):  # avoid index error
                        label = cell.text.strip()
                        value = cells[i + 1].text.strip()

                        if "Publish Date" in label:
                            publish = value
                        elif "Bid Submission End Date" in label:
                            bid_end = value
                        elif "Tender Value" in label:
                            tv = clean_value(value)
                        elif label.startswith("Address"):
                            addr = value
                        elif label.startswith("Location"):
                            loc = value
                        elif label.startswith("Pincode"):
                            pin = value

                all_rows.append(
                    {
                        "S.No": sno,
                        "Tender ID": tender_id,
                        "Title/Ref": title,
                        "Organisation Chain": org_chain,
                        "Stage": stage,
                        "Status": status,
                        "Publish Date": publish,
                        "Bid Submission End Date": bid_end,
                        "Tender Value (‚Çπ)": tv,
                        "Address": addr,
                        "Location": loc,
                        "Pincode": pin,
                    }
                )
                done += 1
                print(f"‚úÖ {sno} | {tender_id} | Pin:{pin}")

                # Cleanup: close detail tab ‚Üí back to list
                driver.close()
                driver.switch_to.window(driver.window_handles[0])
                WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.ID, "PageLink_15"))
                ).click()
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.ID, "tabList"))
                )

            except Exception as e:
                print(f"‚ö†Ô∏è  Row {idx} skipped ‚Äî {e}")
                continue

        # Next page if more records desired
        if MAX_RECORDS is None or done < MAX_RECORDS:
            try:
                driver.find_element(By.ID, "loadNext").click()
                time.sleep(2)
                row_ptr = 1
            except Exception:
                break
        else:
            break
    except Exception as outer:
        print(f"‚ùå Loop broken ‚Äî {outer}")
        break

pd.DataFrame(all_rows).to_excel(
    "odisha_tenders_by_Orissa Lift Irrigation Corporation.xlsx", index=False
)
print("\nüìÅ Results saved ‚Üí odisha_tenders_by_Orissa Lift Irrigation Corporation.xlsx")
driver.quit()



üîç Scraping tenders for: Orissa Lift Irrigation Corporation


Enter CAPTCHA:  ax7znh


‚úÖ 1. | 2025_OLIC_115330_1 | Pin:751012
‚úÖ 2. | 2025_OLIC_115325_1 | Pin:751012
‚úÖ 3. | 2025_OLIC_115284_1 | Pin:751012
‚úÖ 4. | 2025_OLIC_115281_1 | Pin:751012
‚úÖ 5. | 2025_OLIC_115279_1 | Pin:751012
‚úÖ 6. | 2025_OLIC_115272_1 | Pin:751012
‚úÖ 7. | 2025_OLIC_115270_1 | Pin:751012
‚úÖ 8. | 2025_OLIC_115265_1 | Pin:751012
‚úÖ 9. | 2025_OLIC_115262_1 | Pin:751012
‚úÖ 10. | 2025_OLIC_115228_9 | Pin:756056
‚úÖ 11. | 2025_OLIC_115228_8 | Pin:756056
‚úÖ 12. | 2025_OLIC_115228_7 | Pin:756056
‚úÖ 13. | 2025_OLIC_115228_6 | Pin:756056
‚úÖ 14. | 2025_OLIC_115228_5 | Pin:756056
‚úÖ 15. | 2025_OLIC_115228_44 | Pin:756056
‚úÖ 16. | 2025_OLIC_115228_43 | Pin:756056
‚úÖ 17. | 2025_OLIC_115228_42 | Pin:756056
‚úÖ 18. | 2025_OLIC_115228_41 | Pin:756056
‚úÖ 19. | 2025_OLIC_115228_40 | Pin:756056
‚úÖ 20. | 2025_OLIC_115228_4 | Pin:756056
‚úÖ 21. | 2025_OLIC_115228_39 | Pin:756056
‚úÖ 22. | 2025_OLIC_115228_38 | Pin:756056
‚úÖ 23. | 2025_OLIC_115228_37 | Pin:756056
‚úÖ 24. | 2025_OLIC_115228_36 | Pin