In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
# Define the URL
url = input("Enter CCR Link: ")

# Fetch the webpage content
response = requests.get(url)
html_content = response.content

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

# Extract state and parliamentary constituency name
header = soup.find("div", class_="page-title")
if header:
    h2_tag = header.find("h2")
    if h2_tag:
        # Extract text from the span and strong tags within the h2 tag
        span_tag = h2_tag.find("span")
        strong_tag = h2_tag.find("strong")
        
        if span_tag and strong_tag:
            # Get the raw text of pc_name and state
            pc_name_raw = span_tag.get_text(strip=True).strip()
            state = strong_tag.get_text(strip=True).strip('()')
            
            # Remove leading numeric characters, hyphens, and state information from pc_name
            pc_name = pc_name_raw.split('-')[1].split('(')[0].strip()

        else:
            raise ValueError("Could not find the span or strong tag with state and pc_name information")
    else:
        raise ValueError("Could not find the h2 tag with state and pc_name information")
else:
    raise ValueError("Could not find the header with state and pc_name")

# Initialize lists to store candidate details
candidates = []

# Extract candidate details from the table
table = soup.find('div', class_='row')
candidate_divs = table.find_all('div', class_='col-md-4 col-12')

for candidate_div in candidate_divs:
    # Extract candidate's image URL
    img_tag = candidate_div.find('img')
    img_url = img_tag['src'] if img_tag else ''

    # Extract candidate's status (won/lost) and votes
    status_div = candidate_div.find('div', class_='status')
    status = status_div.find('div').text.strip()
    votes_info = status_div.find_all('div')[1].text.strip()
    total_votes, margin = '', ''
    
    # Handle cases where votes_info doesn't split into two parts
    if '(' in votes_info and ')' in votes_info:
        total_votes = votes_info.split('(')[0].strip().replace(',', '')
        margin = votes_info.split('(')[1].split(')')[0].strip().replace('+', '').replace('-', '').strip()

    # Extract candidate's name and party
    name_party_div = candidate_div.find('div', class_='nme-prty')
    candidate_name = name_party_div.find('h5').text.strip() if name_party_div.find('h5') else ''
    party_name = name_party_div.find('h6').text.strip() if name_party_div.find('h6') else ''

    # Append candidate details to the list
    candidates.append({
        'state': state,
        'pc_name': pc_name,
        'candidate': candidate_name,
        'party': party_name,
        'status': status,
        'total_votes': total_votes,
        'margin': margin,
        'image_url': img_url
    })

# Create a DataFrame from the list of candidates
df = pd.DataFrame(candidates)

# Convert total_votes to numeric, handling errors by converting to NaN and then to 0
df['total_votes'] = pd.to_numeric(df['total_votes'], errors='coerce').fillna(0).astype(int)

# Calculate the sum of total_votes and add it as a new column
df['sum_totalvotes'] = df['total_votes'].sum()

# Generate a safe file name by replacing spaces with underscores and converting to lowercase
file_name = f"CCR_{state}_{pc_name.replace(' ', '_')}.csv"

# Save the DataFrame to a CSV file
df.to_csv(file_name, index=False)

print(f"Data extracted and saved to {file_name}")


Enter CCR Link: https://results.eci.gov.in/PcResultGenJune2024/candidateswise-S0112.htm
Data extracted and saved to CCR_Andhra Pradesh_Vijayawada.csv
