In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Base URL of the election results
base_url = 'https://results.eci.gov.in/PcResultGenJune2024/'

# URL of the main page
main_page_url = base_url + 'index.htm'

# Fetch the main page content
response = requests.get(main_page_url)

# Check if the request was successful
if response.status_code != 200:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")
    exit()

# Parse the main page content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find all links to the pages containing the tables
table_links = soup.find_all('a', href=True, string=True)

# Filter links to the ones that match the pattern we're interested in
table_urls = [base_url + link['href'] for link in table_links if 'partywisewinresultState' in link['href']]
data_list = []  # List to store the extracted data

for table_url in table_urls:
    print(f"Processing table page: {table_url}")
    
    # Fetch the table page content
    response = requests.get(table_url)
    
    # Check if the request was successful
    if response.status_code != 200:
        print(f"Failed to retrieve the table page: {table_url}. Status code: {response.status_code}")
        continue
    
    # Parse the table page content
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the link to the detailed candidate results page
    candidate_links = soup.find_all('a', href=True)
    candidate_urls = [base_url + link['href'] for link in candidate_links if 'candidateswise' in link['href']]
    
    for candidate_url in candidate_urls:
        # Fetch the candidate page content
        response = requests.get(candidate_url)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to retrieve the candidate page: {candidate_url}. Status code: {response.status_code}")
            continue
        
        # Parse the candidate page content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all divs with class "col-md-4 col-12"
        divs = soup.find_all('div', class_='col-md-4 col-12')
        
        if len(divs) > 1:  # Ensure there is at least a second div
            second_div = divs[1]  # Index 1 for the second div
            
            # Extract candidate name from <h5>
            candidate_name_tag = second_div.find('h5')
            candidate_name = candidate_name_tag.get_text(strip=True) if candidate_name_tag else '-'
            
            # Extract party name from <h6>
            party_name_tag = second_div.find('h6')
            party_name = party_name_tag.get_text(strip=True) if party_name_tag else '-'
            
        else:
            print(f"No second <div class='col-md-4 col-12'> found in the page: {candidate_url}")
            candidate_name = '-'
            party_name = '-'
        
        # Append the extracted data to the list
        data_list.append({
            'Trailing Candidate': candidate_name,
            'Trailing Party': party_name
        })

    print(f"Completed processing table page: {table_url}")

# Convert the list to a DataFrame
df = pd.DataFrame(data_list)

# Display the DataFrame
print("Final extracted data:")
print(df)


Processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-369.htm
No second <div class='col-md-4 col-12'> found in the page: https://results.eci.gov.in/PcResultGenJune2024/candidateswise-S0624.htm
Completed processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-369.htm
Processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-742.htm
Completed processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-742.htm
Processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-1680.htm
Completed processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-1680.htm
Processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-140.htm
Completed processing table page: https://results.eci.gov.in/PcResultGenJune2024/partywisewinresultState-140.htm
Processing 

In [6]:
import os
output_directory = r'C:\Users\Khsak\Downloads\scrapped data'
output_file = os.path.join(output_directory, 'leading_results.xlsx')
df.to_excel(output_file, index=True)