# Website Address
https://ceoandhra.nic.in/ceoap_new/ceo/index.html

### Libraries - Makesure chrome web browser is installed on your system
* pip install selenium
* pip install webdriver_manager
* pip install easyocr
* pip install torch
* pip install opencv-python

In [1]:
import os
import cv2
import numpy as np
from PIL import Image
import time
from matplotlib import pyplot as plt
import base64

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException, TimeoutException

In [3]:
def WAIT_TIME(t=0.5) :
    time.sleep(t)

In [4]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

In [5]:
WEBSITE_URL = "https://ceoandhra.nic.in/ceoap_new/ceo/index.html"

In [6]:
driver.get(WEBSITE_URL)

In [7]:
tab_name = "Electoral Rolls"
sub_tab_name = "Final SSR Eroll-2024"

In [8]:
# Click on the tab and go to sub tab name

tab = driver.find_element(By.XPATH, f"//a[contains(text(), '{tab_name}')]")
tab.click()

sub_tab = driver.find_element(By.XPATH, f"//a[contains(text(), '{sub_tab_name}')]")
sub_tab.click()

# Go to the next page make it active
driver.switch_to.window(driver.window_handles[1])

WAIT_TIME(2)

In [9]:
# Ask State, District, Assembly Constituency, Select Language as inputs

# state = input("Enter State: ")
# district = input("Enter District: ")
# assembly = input("Enter Assembly Constituency: ")
# language = input("Enter Language: ")

# make default as Andhra Pradesh , Anakapalli , Madugula , ENG

state = "Andhra Pradesh"
district = "Anakapalli"
assembly = "Madugula"
language = "ENG"

In [10]:
# show current active window name
def show_current_window():
    print(driver.current_url)
    # driver.get(driver.current_url)

show_current_window()

https://voters.eci.gov.in/download-eroll?stateCode=S01


In [11]:
# wait some time to load the page
WAIT_TIME(5)

In [12]:
district_dropdown = driver.find_element(By.NAME, "district")

# Create a Select object
select = Select(district_dropdown)

# Select an option by its visible text
select.select_by_visible_text(district)

# Verify the selected option
selected_option = select.first_selected_option

print(f"Selected option: {selected_option.text}")

Selected option: Anakapalli


In [13]:
dropdown_input = driver.find_element(By.ID, "react-select-2-input")

# Click the input to open the dropdown
dropdown_input.click()

# Wait for the dropdown options to appear
WAIT_TIME(1)

# Enter the desired option text
dropdown_input.send_keys(assembly)

# Press Enter to select the option
dropdown_input.send_keys(Keys.ENTER)

WAIT_TIME(2)

In [14]:
lang_dropdown = driver.find_element(By.NAME, "langCd")

# Create a Select object
select = Select(lang_dropdown)

# Select an option by its visible text
select.select_by_visible_text(language)

# Verify the selected option
selected_option = select.first_selected_option

print(f"Selected option: {selected_option.text}")

WAIT_TIME(2)

Selected option: ENG


In [15]:
# Need to get the captcha from the user , scan it using OCR and enter it in the input box

def save_captcha(i=0):
    # Locate the captcha image in the current window
    # driver.get(driver.current_url)
    captcha_element = driver.find_element(By.XPATH, "//img[@alt='captcha']")

    # captcha_element = driver.find_element(By.TAG_NAME,'img')
    # print(captcha_element)
    # print(dir(captcha_element))
    captcha_base64 = captcha_element.get_attribute('src').split(',')[1]
    # print(captcha_base64)

    # # Decode the base64 data
    captcha_data = base64.b64decode(captcha_base64)

    # Save the CAPTCHA as an image file
    captcha_path = f'captcha.jpg'
    with open(captcha_path, 'wb') as f:
        f.write(captcha_data)

    print(f"CAPTCHA saved at {captcha_path}")

# set text in the captcha input box
def set_captcha_text(text):
    captcha_input = driver.find_element(By.NAME, "captcha")
    # clear the captcha input box
    captcha_input.clear()
    captcha_input.send_keys(text)

def refresh_and_save(i=0) :
    # Locate the element by its alt attribute
    element = driver.find_element(By.XPATH, "//img[@alt='refresh']")
    # Perform the click action
    ActionChains(driver).move_to_element(element).click().perform()
    # re save the captcha
    save_captcha(i=i)

def find_text():
    from infer import CNNRNNModel,Image2Text
    img2text = Image2Text()
    img = cv2.imread('captcha.jpg')
    text = img2text.FindText(img)
    return text

In [16]:
# Dataset Preparation to train a model
# for i in range(0,5000):
#     refresh_and_save(i=i)
#     time.sleep(1)

In [17]:
# # plot images and ask text to resave the image with that name
# PATH = 'Images/'
# import os

# for i in os.listdir(PATH):
#     if i.endswith('.jpg'):
#         img = cv2.imread(PATH+i)
#         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#         plt.imshow(img)
#         plt.show()
#         text = input("Enter the text: ")
#         # save the image with the text
#         cv2.imwrite(PATH+text+'.jpg', img)

In [18]:
# Select 50 Records
dropdown = driver.find_element(By.CSS_SELECTOR, 'select.search-bar.search-bar2')

# Create a Select object
select = Select(dropdown)

# Select the last option (e.g., 50 Records)
select.select_by_index(len(select.options) - 1)

# Verify the selection (optional)
selected_option = select.first_selected_option
print(f"Selected option: {selected_option.text}")

WAIT_TIME(2)

Selected option: Show 50 Records


In [19]:
element = driver.find_element(By.XPATH, '//tr/td[@role="cell"]')
print(element.text)

1 - krishnampalem


In [20]:
# Find all the table rows
rows = driver.find_elements(By.XPATH, '//tbody/tr')

# Extract names from the first column of each row
names = []
for row in rows:
    cell_text = row.find_element(By.XPATH, './td[1]').text
    # Extract the name part after the number and hyphen
    print(cell_text)
    # name = cell_text.split(' - ')[1]
    # names.append(name)

# print(names)

1 - krishnampalem
2 - krishnampalem
3 - M.Koduru
4 - M.Koduru
5 - M. KODURU
6 - Tatiparthi
7 - Kasipuram
8 - Sagaram
9 - D.Suravaram
10 - D.Gottivada
11 - Jampena
12 - Veeranarayanam
13 - VEERANARAYANAM
14 - veeranarayanam
15 - Chintaluru
16 - M. Kotapadu
17 - M KOTAPADU
18 - M. KOTAPADU
19 - M.kotapadu vallapuram
20 - V.Madugula
21 - MADUGULA
22 - V.Madugula
23 - V.Madugula
24 - Madugula
25 - V.Madugula
26 - V MADUGULA
27 - V.Madugula
28 - V.Madugula
29 - MLAADUGU
30 - V.MADUGULA
31 - V.Madugula
32 - V.Madugula
33 - Jalampalli
34 - JALAMPALLI
35 - Jalampalli
36 - Narasayya peta
37 - NARASAYYAPETA H/O L PONNAVOLU
38 - Jammadevipeta
39 - JAMMADEVIPETA
40 - Satyavaram
41 - SATYAVARAM
42 - Gadirayi
43 - GADHIRAYI
44 - M.Krishnapuram
45 - Vommali
46 - VOMMALI
47 - vommali
48 - V.J. Puram
49 - kinthali
50 - Kinthali Vallapuram


In [21]:
def HandleCaptcha() :
    # save the captcha
    save_captcha()
    # find the text in the captcha
    captcha_text = find_text()
    print(f"CAPTCHA Text: {captcha_text}")
    # set the captcha text in the input box
    set_captcha_text(captcha_text)

In [22]:
def get_row_name(row) :
    cell_text = row.find_element(By.XPATH, f'./td[1]').text
    # print(cell_text)
    return cell_text

In [23]:
def process_row(row):
    try:
        HandleCaptcha()
        # Extract the text from the first <td> cell
        cell = row.find_element(By.XPATH, ".//td[@role='cell']").text
        # Extract number and name
        number, name = cell.split(" - ", 1)
        print(f"Number: {number.strip()}, Name: {name.strip()}")

        # Find all download icons in the row using alt as download icon
        download_icons = row.find_elements(By.XPATH, ".//img[@alt='download icon']")

        # Ensure there are at least three download icons
        if len(download_icons) < 3:
            print("Less than 3 download icons found in the row.")
            return False

        # Click the third download icon
        download_icons[2].click()
        print('Clicked the third download icon!')

        # Wait for the alert message to be present and check if it exists
        alert = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div.alert_global'))
        )

        try :
            # Find the complete alert message with sub-elements
            alert_message = alert.find_element(By.XPATH, './div')
            print('=' * 50)
            print(f'{alert_message.text}')
            print('-' * 50)
        except NoSuchElementException:
            print("No alert message displayed.")

        return True

    except (NoSuchElementException, TimeoutException) as e:
        print(f"Exception encountered: {e}")
        HandleCaptcha()
        # Retry processing the same row
        time.sleep(2)  # Wait a bit before retrying
        return False

In [24]:
# HandleCaptcha()
def DownloadFiles() :
    rows = driver.find_elements(By.XPATH, "//tbody//tr")
    count = 0
    for row in rows:
        # print(row)
        if count % 10 == 0:
            if process_row(row):
                cell_text = row.find_element(By.XPATH, './td[1]').text
                # Extract the name part after the number and hyphen
                print(f"{cell_text} - Successfully processed the row.")
                print(f'=' * 50)
                WAIT_TIME(20)
            else :
                print("Failed to process the row.")
                break
            # print(get_row_name(row))
        count += 1

In [25]:
while True :
    DownloadFiles()
    buttons = driver.find_elements(By.CSS_SELECTOR, "button.control-btn")
    button_to_click = None
    for button in buttons:
        if button.text.strip() == ">":
            button_to_click = button
            break

    if not button_to_click:
        print(f'There is no button to click. Downloaded all files. from the {district} district , {assembly} assembly constituency.')
        break

    if button_to_click.get_attribute("disabled"):
        print(f'Downloaded all files. from the {district} district , {assembly} assembly constituency.')
        break
    else:
        button_to_click.click()
        print("Button clicked!")

    WAIT_TIME(2)
    # Update page

CAPTCHA saved at captcha.jpg
CAPTCHA Text: h4s9c7
Number: 1, Name: krishnampalem
Clicked the third download icon!
No alert message displayed.
1 - krishnampalem - Successfully processed the row.
CAPTCHA saved at captcha.jpg
CAPTCHA Text: u4zgb7
Number: 11, Name: Jampena
Clicked the third download icon!
No alert message displayed.
11 - Jampena - Successfully processed the row.
CAPTCHA saved at captcha.jpg
CAPTCHA Text: wvhrta
Number: 21, Name: MADUGULA
Clicked the third download icon!
No alert message displayed.
21 - MADUGULA - Successfully processed the row.
CAPTCHA saved at captcha.jpg
CAPTCHA Text: heog8w
Number: 31, Name: V.Madugula
Clicked the third download icon!
No alert message displayed.
31 - V.Madugula - Successfully processed the row.
CAPTCHA saved at captcha.jpg
CAPTCHA Text: k8vj23
Number: 41, Name: SATYAVARAM
Clicked the third download icon!
No alert message displayed.
41 - SATYAVARAM - Successfully processed the row.
Button clicked!
CAPTCHA saved at captcha.jpg
CAPTCHA Tex

KeyboardInterrupt: 