In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
import pandas as pd
import requests
import threading

In [2]:
def is_url_valid(url):
    r=requests.get(url,verify=False)
    if r.status_code==404:
        return False
    else:
        return True

In [3]:
# Function to generate URLs and validate them concurrently
def generate_and_validate_urls(urls, url_type, range1, range2, base_url):
    for i in range(1, range1):
        for j in range(1, range2):
            url = f"{base_url}{url_type}{str(i).zfill(2)}{str(j)}"
            if is_url_valid(url):
                urls.append(url)
            else:
                break

In [4]:
base_url = "https://results.eci.gov.in/PcResultGenJune2024/Constituencywise"
all_urls = []

# Create threads for Union Territories and States
threads = []
threads.append(threading.Thread(target=generate_and_validate_urls, args=(all_urls, 'U', 10, 40, base_url)))
threads.append(threading.Thread(target=generate_and_validate_urls, args=(all_urls, 'S', 30, 80, base_url)))

# Start threads
for thread in threads:
    thread.start()

# Wait for all threads to complete
for thread in threads:
    thread.join()





In [5]:
print(f"Valid URLs collected: {len(all_urls)}")
if all_urls:
    for url in all_urls:
        print(url)
else:
    print("No valid URLs found.")



Valid URLs collected: 542
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseU011
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS011
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS012
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS013
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseU021
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS014
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS015
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseU031
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS016
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseU032
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS017
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS018
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseS019
https://results.eci.gov.in/PcResultGenJune2024/ConstituencywiseU051
https://results.eci.go

In [6]:
len(all_urls)

542

In [7]:
driver = webdriver.Chrome()

# Lists to store the extracted data
all_candidate_names = []
all_party_names = []
all_evm_votes = []
all_postal_votes = []
all_total_votes = []
all_percent_votes = []
all_states=[]
all_constituency_codes=[]
all_constituency_names=[]

seen_rows = set()     #Preventing duplicates

for url in all_urls:
    driver.get(url)
    state_element = driver.find_element(By.XPATH, '//h2/span/strong')
    state_name = state_element.text.strip()
    
    constituency_info = driver.find_element(By.XPATH, '//h2/span').text.strip()
    constituency_code = constituency_info.split('-')[0]
    constituency_name =constituency_info.split('-')[1]
    constituency_code = constituency_code.strip()
    constituency_name = constituency_name.strip()
    constituency_name=constituency_name.split("(")[0]
    
    rows = driver.find_elements(By.XPATH, '//table//tbody//tr')

    for row in rows:
        columns = row.find_elements(By.TAG_NAME, 'td')
        if columns:
            row_data = (
                columns[1].text,
                columns[2].text,
                columns[3].text,
                columns[4].text,
                columns[5].text,
                columns[6].text,
                state_name,
                constituency_code,
                constituency_name,
                
            )
            
            # Check if the row is already seen
            if row_data not in seen_rows:
                seen_rows.add(row_data)  # Add the row to the set of seen rows
                all_candidate_names.append(columns[1].text)
                all_party_names.append(columns[2].text)
                all_evm_votes.append(columns[3].text)
                all_postal_votes.append(columns[4].text)
                all_total_votes.append(columns[5].text)
                all_percent_votes.append(columns[6].text)
                all_states.append(state_name)
                all_constituency_codes.append(constituency_code)
                all_constituency_names.append(constituency_name)

# Closing the WebDriver
driver.quit()

# Creating a DataFrame from the accumulated data
df = pd.DataFrame({
    'Year' : 2024,
    'Candidate Name': all_candidate_names,
    'State': all_states,
    'Constituency Name': all_constituency_names,
    'Constituency Code': all_constituency_codes,
    'Party Name': all_party_names,
    'EVM Votes': all_evm_votes,
    'Postal Votes': all_postal_votes,
    'Total Votes': all_total_votes,
    'Percent Votes': all_percent_votes,
    

    
    
})



In [8]:
df.head(5)

Unnamed: 0,Year,Candidate Name,State,Constituency Name,Constituency Code,Party Name,EVM Votes,Postal Votes,Total Votes,Percent Votes
0,2024,BISHNU PADA RAY,(Andaman & Nicobar Islands),Andaman & Nicobar Islands,1,Bharatiya Janata Party,102182,254,102436,50.58
1,2024,KULDEEP RAI SHARMA,(Andaman & Nicobar Islands),Andaman & Nicobar Islands,1,Indian National Congress,77829,211,78040,38.54
2,2024,MANOJ PAUL,(Andaman & Nicobar Islands),Andaman & Nicobar Islands,1,Andaman Nicobar Democratic Congress,8236,18,8254,4.08
3,2024,D AYYAPPAN,(Andaman & Nicobar Islands),Andaman & Nicobar Islands,1,Communist Party of India (Marxist),6009,8,6017,2.97
4,2024,V.K. ABDUL AZIZ,(Andaman & Nicobar Islands),Andaman & Nicobar Islands,1,Independent,2195,8,2203,1.09


In [13]:
df['State'] = df['State'].str.replace(r'[()]', '', regex=True)

In [14]:
df.head(5)

Unnamed: 0,Year,Candidate Name,State,Constituency Name,Constituency Code,Party Name,EVM Votes,Postal Votes,Total Votes,Percent Votes
0,2024,BISHNU PADA RAY,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1,Bharatiya Janata Party,102182,254,102436,50.58
1,2024,KULDEEP RAI SHARMA,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1,Indian National Congress,77829,211,78040,38.54
2,2024,MANOJ PAUL,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1,Andaman Nicobar Democratic Congress,8236,18,8254,4.08
3,2024,D AYYAPPAN,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1,Communist Party of India (Marxist),6009,8,6017,2.97
4,2024,V.K. ABDUL AZIZ,Andaman & Nicobar Islands,Andaman & Nicobar Islands,1,Independent,2195,8,2203,1.09


In [15]:
df.to_csv("EC_Result.csv", index=False)