## Tamil Nadu Voter Roll Analysis (234 Assembly Constituencies)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
url_2024 = "https://www.elections.tn.gov.in/ACwise_Gendercount_01012024.aspx"
url_2025 = "https://www.elections.tn.gov.in/ACwise_Gendercount_06012025.aspx"

raw_2024 = pd.read_html(url_2024)[0]
raw_2025 = pd.read_html(url_2025)[0]

raw_2024.head(), raw_2025.head()

In [None]:
df_2024 = pd.DataFrame({
    "AC_Raw": raw_2024.iloc[:, 1],
    "Male_2024": raw_2024.iloc[:, 2],
    "Female_2024": raw_2024.iloc[:, 3],
    "ThirdGender_2024": raw_2024.iloc[:, 4],
    "Total_2024": raw_2024.iloc[:, 5]
})

# Extract AC number safely
df_2024["AC_No"] = df_2024["AC_Raw"].str.extract(r"(\d+)", expand=False)

df_2024["AC_No"] = df_2024["AC_No"].astype("float")

# Fill missing AC numbers using official order
df_2024["AC_No"] = df_2024["AC_No"].fillna(
    pd.Series(df_2024.index + 1, index=df_2024.index)
).astype(int)

# Clean AC name
df_2024["AC_Name"] = (
    df_2024["AC_Raw"]
    .str.replace(r"\d+", "", regex=True)
    .str.replace("-", "", regex=False)
    .str.strip()
)

df_2024.drop(columns="AC_Raw", inplace=True)
df_2024


In [None]:
df_2025 = pd.DataFrame({
    "AC_Raw": raw_2025.iloc[:, 1],
    "Male_2025": raw_2025.iloc[:, 2],
    "Female_2025": raw_2025.iloc[:, 3],
    "ThirdGender_2025": raw_2025.iloc[:, 4],
    "Total_2025": raw_2025.iloc[:, 5]
})

df_2025["AC_No"] = df_2025["AC_Raw"].str.extract(r"(\d+)", expand=False)
df_2025["AC_No"] = df_2025["AC_No"].astype("float")

df_2025["AC_No"] = df_2025["AC_No"].fillna(
    pd.Series(df_2025.index + 1, index=df_2025.index)
).astype(int)

df_2025["AC_Name"] = (
    df_2025["AC_Raw"]
    .str.replace(r"\d+", "", regex=True)
    .str.replace("-", "", regex=False)
    .str.strip()
)

df_2025.drop(columns="AC_Raw", inplace=True)
df_2025


In [None]:
assert df_2024.shape[0] == 234
assert df_2025.shape[0] == 234
assert df_2024["AC_No"].is_unique
assert df_2025["AC_No"].is_unique

df_2024.shape, df_2025.shape
