In [1]:
import requests
import os
import json
import pandas as pd
import re
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Step 1: Get the initial data
response = requests.get("https://2025electionresults.comelec.gov.ph/data/regions/local/5532000.json")
data = response.json()

# Step 2: Extract all the codes from the regions
codes = [region['code'] for region in data["regions"]]

# Step 3: Create a folder to store the JSON files (if it doesn't exist)
os.makedirs('data', exist_ok=True)

# Step 4: Loop through the codes and fetch the data
for code in codes:
    url = f"https://2025electionresults.comelec.gov.ph/data/regions/precinct/55/{code}.json"
    response_barangay = requests.get(url)
    data_barangay = response_barangay.json()

    # Extract the 'code' from the new data
    barangay_codes = [barangay['code'] for barangay in data_barangay["regions"]]

    # Step 5: Loop through each barangay code and save the data
    for barangay_code in barangay_codes:
        url = f"https://2025electionresults.comelec.gov.ph/data/er/553/{barangay_code}.json"
        print(f"Fetching: {url}")
        
        # Fetch the data from the final URL
        response_final = requests.get(url)
        
        if response_final.status_code == 200:
            data_final = response_final.json()
            
            # Extract voting center and remove "barangay" prefix if it exists
            voting_center = data_final["information"]["votingCenter"].lower().replace(" ", "-")
            voting_center = voting_center.replace("barangay-", "")

            # Step 6: Check if the file already exists, and append a counter if it does
            filename = f'data/{voting_center}.json'
            counter = 2
            while os.path.exists(filename):
                filename = f'data/{voting_center}_{counter}.json'
                counter += 1
            
            # Step 7: Save the data to the JSON file
            with open(filename, 'w', encoding='utf-8') as file:
                json.dump(data_final, file, ensure_ascii=False, indent=4)
                
            print(f"Saved: {filename}")
        else:
            print(f"Failed to fetch data for {barangay_code}")


Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320020.json
Saved: data/abanon.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320021.json
Saved: data/abanon_2.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320022.json
Saved: data/agdao.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320023.json
Saved: data/agdao_2.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320024.json
Saved: data/agdao_3.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320025.json
Saved: data/libas.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320026.json
Saved: data/ano.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320027.json
Saved: data/ano_2.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/55320028.json
Saved: data/antipangol.json
Fetching: https://2025electionresults.comelec.gov.ph/data/er/553/553200

In [3]:
def aggregate_barangay_data(data_folder):
    """
    Load and aggregate barangay JSON files, combining data for the same barangay
    from multiple files (like barangay.json, barangay_2.json).
    """
    # Dictionary to hold aggregated data by barangay and candidate
    aggregated_votes = defaultdict(int)
    barangay_info = {}
    
    # Get all JSON files in the data folder
    json_files = [f for f in os.listdir(data_folder) if f.endswith('.json')]
    
    for file_name in json_files:
        file_path = os.path.join(data_folder, file_name)
        
        # Extract base barangay name (remove _2, _3 suffixes if present)
        base_barangay = re.sub(r'_\d+\.json$', '', file_name)
        base_barangay = base_barangay.replace('.json', '')
        
        # Read JSON data
        with open(file_path, 'r') as file:
            data = json.load(file)
        
        # Store barangay information
        if base_barangay not in barangay_info:
            barangay_info[base_barangay] = {
                'location': data['information']['location'],
                'precinct_id': data['information']['precinctId'],
                'registered_voters': data['information']['numberOfRegisteredVoters'],
                'actual_voters': data['information']['numberOfActuallyVoters'],
                'turnout': data['information']['turnout']
            }
        
        # Process national contests
        for contest in data['national']:
            contest_name = contest['contestName']
            
            for candidate in contest['candidates']['candidates']:
                key = (base_barangay, 'National', contest_name, candidate['name'])
                aggregated_votes[key] += candidate['votes']
        
        # Process local contests
        for contest in data['local']:
            contest_name = contest['contestName']
            
            for candidate in contest['candidates']['candidates']:
                key = (base_barangay, 'Local', contest_name, candidate['name'])
                aggregated_votes[key] += candidate['votes']
    
    # Convert aggregated data to DataFrame format
    results = []
    for (barangay, contest_type, contest_name, candidate_name), votes in aggregated_votes.items():
        results.append({
            'barangay': barangay,
            'location': barangay_info[barangay]['location'],
            'precinct_id': barangay_info[barangay]['precinct_id'],
            'registered_voters': barangay_info[barangay]['registered_voters'],
            'actual_voters': barangay_info[barangay]['actual_voters'],
            'turnout': barangay_info[barangay]['turnout'],
            'contest_type': contest_type,
            'contest_name': contest_name,
            'candidate_name': candidate_name,
            'votes': votes
        })
    
    df = pd.DataFrame(results)
    
    # Calculate percentage for each candidate within their contest and barangay
    contest_total_votes = df.groupby(['barangay', 'contest_type', 'contest_name'])['votes'].transform('sum')
    df['percentage'] = (df['votes'] / contest_total_votes) * 100
    df['percentage'] = df['percentage'].round(2)
    
    return df

def main():
    # Replace with your data folder path
    data_folder = "data"
    
    # Aggregate barangay data
    df = aggregate_barangay_data(data_folder)
    
    # Display basic information
    print(f"Total candidates across all barangays: {len(df)}")
    print(f"Unique barangays: {df['barangay'].nunique()}")
    
    # Top candidates per contest (aggregated across all barangays)
    print("\nTop 5 candidates for Senator position:")
    senator_results = df[df['contest_name'] == 'SENATOR of PHILIPPINES']
    top_senators = senator_results.groupby('candidate_name')['votes'].sum().reset_index()
    top_senators = top_senators.sort_values('votes', ascending=False).head(5)
    print(top_senators)
    
    # Party List results
    print("\nTop 5 Party List results:")
    party_results = df[df['contest_name'] == 'PARTY LIST of PHILIPPINES']
    top_parties = party_results.groupby('candidate_name')['votes'].sum().reset_index()
    top_parties = top_parties.sort_values('votes', ascending=False).head(5)
    print(top_parties)
    
    # Save to CSV
    df.to_csv('aggregated_election_results.csv', index=False)
    print("\nAggregated data saved to 'aggregated_election_results.csv'")

if __name__ == "__main__":
    main()

Total candidates across all barangays: 15860
Unique barangays: 61

Top 5 candidates for Senator position:
              candidate_name  votes
44      5. AQUINO, BAM (KNP)  51346
54     59. SOTTO, TITO (NPC)  45514
59  63. TULFO, ERWIN (LAKAS)  44365
0    1. ABALOS, BENHUR (PFP)  41272
9     18. CAYETANO, PIA (NP)  40018

Top 5 Party List results:
             candidate_name  votes
79   3 FPJ PANDAY BAYANIHAN  17145
13                110 ABONO   9493
26        122 PINOY WORKERS   6406
6     104 SOLID NORTH PARTY   5479
103              51 AKBAYAN   5337

Aggregated data saved to 'aggregated_election_results.csv'


In [4]:
def create_contest_datasets(data_folder):
    """
    Create separate datasets for each contest with:
    - Candidates in rows (left side)
    - Barangays in columns (headers)
    - Vote counts in cells (aggregated across multiple files for the same barangay)
    
    Returns a tuple of (contest_datasets, all_barangays)
    """
    # Dictionary to store votes by contest, candidate, and barangay
    contest_data = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
    
    # List to track all barangays and contests we encounter
    all_barangays = set()
    all_contests = set()
    
    # Get all JSON files in the data folder
    json_files = [f for f in os.listdir(data_folder) if f.endswith('.json')]
    
    # Process each file
    for file_name in json_files:
        file_path = os.path.join(data_folder, file_name)
        
        # Extract base barangay name (remove _2, _3 suffixes if present)
        base_barangay = re.sub(r'_\d+\.json$', '', file_name)
        base_barangay = base_barangay.replace('.json', '')
        all_barangays.add(base_barangay)
        
        # Read JSON data
        with open(file_path, 'r') as file:
            data = json.load(file)
        
        # Process national contests
        for contest in data['national']:
            contest_name = contest['contestName']
            all_contests.add(contest_name)
            
            # Process each candidate
            for candidate in contest['candidates']['candidates']:
                # Add votes for this candidate in this barangay
                # Using defaultdict(int) ensures we're adding to the existing count
                contest_data[contest_name][candidate['name']][base_barangay] += candidate['votes']
        
        # Process local contests
        for contest in data['local']:
            contest_name = contest['contestName']
            all_contests.add(contest_name)
            
            # Process each candidate
            for candidate in contest['candidates']['candidates']:
                # Add votes for this candidate in this barangay
                contest_data[contest_name][candidate['name']][base_barangay] += candidate['votes']
    
    # Convert to DataFrames
    dataframes = {}
    all_barangays_list = sorted(list(all_barangays))
    
    for contest_name in all_contests:
        # Create a list of dictionaries (one per candidate)
        rows = []
        for candidate_name, barangay_votes in contest_data[contest_name].items():
            # Start with the candidate name
            row = {'Candidate': candidate_name}
            
            # Add votes for each barangay
            for barangay in all_barangays_list:
                row[barangay] = barangay_votes.get(barangay, 0)
            
            # Add a total column
            row['Total'] = sum(barangay_votes.values())
            
            rows.append(row)
        
        # Create DataFrame and sort by total votes
        df = pd.DataFrame(rows)
        if not df.empty:
            df = df.sort_values('Total', ascending=False)
            
            # Move Candidate column to the front
            candidate_col = df.pop('Candidate')
            df.insert(0, 'Candidate', candidate_col)
        
        # Store in our dictionary of dataframes
        dataframes[contest_name] = df
    
    # Return both the dataframes and the list of barangays
    return dataframes, all_barangays_list

def main():
    # Replace with your data folder path
    data_folder = "data"
    
    # Create results folder if it doesn't exist
    results_folder = "./results-per-position"
    if not os.path.exists(results_folder):
        os.makedirs(results_folder)
        print(f"Created directory: {results_folder}")
    
    # Create contest datasets and get the list of barangays
    contest_datasets, all_barangays = create_contest_datasets(data_folder)
    
    # Display summary
    print(f"Created {len(contest_datasets)} contest datasets\n")
    
    # Display each dataset (or just the first few rows)
    for contest_name, df in contest_datasets.items():
        print(f"Dataset: {contest_name}")
        print(f"Shape: {df.shape[0]} candidates × {df.shape[1]} columns")
        print(df.head(5))  # Show top 5 candidates
        print("\n" + "="*50 + "\n")
        
        # Save to CSV in the results folder
        clean_name = re.sub(r'[^\w\s]', '', contest_name).replace(' ', '_')
        filename = os.path.join(results_folder, f"{clean_name}_results.csv")
        df.to_csv(filename, index=False)
        print(f"Saved to {filename}")
        print("\n")

    # Create a summary DataFrame of total votes per barangay
    barangay_summary = pd.DataFrame(index=all_barangays)
    for contest_name, df in contest_datasets.items():
        if 'Total' in df.columns:
            # Save the contest totals without the 'Candidate' column
            contest_cols = [col for col in df.columns if col not in ['Candidate', 'Total']]
            contest_totals = df[contest_cols].sum()
            barangay_summary[contest_name] = contest_totals
    
    # Add total votes column
    barangay_summary['Total Votes'] = barangay_summary.sum(axis=1)
    
    # Save summary to the results folder
    summary_filename = os.path.join(results_folder, 'barangay_vote_summary.csv')
    barangay_summary.to_csv(summary_filename)
    print(f"Saved barangay summary to '{summary_filename}'")

if __name__ == "__main__":
    main()

Created directory: ./results-per-position
Created 9 contest datasets

Dataset: SENATOR of PHILIPPINES
Shape: 66 candidates × 63 columns
                   Candidate  abanon  agdao  ano  antipangol  aponit  bacnar  \
4       5. AQUINO, BAM (KNP)     564   1026  711         400     505    1393   
58     59. SOTTO, TITO (NPC)     441    893  596         434     487    1271   
62  63. TULFO, ERWIN (LAKAS)     449    790  687         455     469    1259   
0    1. ABALOS, BENHUR (PFP)     446    804  569         354     479    1056   
17    18. CAYETANO, PIA (NP)     402    781  581         392     414    1105   

    balaya  balayong  baldog  ...  sapinit  supo  talang  tamayo  tandoc  \
4      459       521     562  ...      388   366     807     586     892   
58     441       475     471  ...      352   323     699     672     794   
62     428       455     452  ...      349   313     729     678     811   
0      385       408     475  ...      337   270     694     609     825   
17 

In [12]:
def create_mayor_graphs(results_folder="./results-per-position"):
    """
    Create line and bar graphs showing mayoral votes per barangay
    """
    # Use the exact filename from the screenshot
    mayor_file = os.path.join(results_folder, "MAYOR_of_PANGASINAN__CITY_OF_SAN_CARLOS_results.csv")
    
    # Check if the file exists
    if not os.path.exists(mayor_file):
        print(f"Error: File does not exist: {mayor_file}")
        # List all files in the directory to debug
        print("Files in the directory:")
        for file in os.listdir(results_folder):
            print(f"  - {file}")
        return
    
    print(f"Found mayor file: {mayor_file}")
    contest_name = "MAYOR_of_PANGASINAN__CITY_OF_SAN_CARLOS"
    
    # Load the mayor data
    mayor_df = pd.read_csv(mayor_file)
    
    # Get the candidate and barangay columns
    candidates = mayor_df['Candidate'].tolist()
    
    # Simplify candidate names for better display (just get the name part)
    simplified_names = []
    for name in candidates:
        # Extract the name part between the number and the parenthesis
        match = re.search(r'\d+\.\s+(.*?)\s+\(', name)
        if match:
            simplified_names.append(match.group(1))
        else:
            simplified_names.append(name)
    
    # Get barangay columns (exclude 'Candidate' and 'Total')
    barangays = [col for col in mayor_df.columns if col not in ['Candidate', 'Total']]
    
    # Transpose the data for plotting (exclude 'Candidate' and 'Total' columns)
    plot_data = mayor_df[barangays].copy()
    plot_data.index = simplified_names
    
    # Create directory for graphs if it doesn't exist
    graphs_folder = os.path.join(results_folder, 'graphs')
    if not os.path.exists(graphs_folder):
        os.makedirs(graphs_folder)
        print(f"Created directory: {graphs_folder}")
    
    # Generate a clean name for filenames
    clean_contest = re.sub(r'[^\w\s]', '', contest_name).replace(' ', '_')
    
    # ========== LINE GRAPH ==========
    plt.figure(figsize=(14, 8))
    
    # Plot each candidate as a line
    for i, candidate in enumerate(simplified_names):
        plt.plot(barangays, plot_data.loc[candidate], marker='o', linewidth=2, 
                 label=candidate, markersize=8)
    
    # Add labels and title
    plt.title('Mayoral Votes per Barangay (Line Graph)', fontsize=16)
    plt.xlabel('Barangay', fontsize=14)
    plt.ylabel('Number of Votes', fontsize=14)
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.legend(title='Candidates', fontsize=12)
    plt.grid(True, linestyle='--', alpha=0.7)
    
    # Adjust layout and save
    plt.tight_layout()
    line_graph_path = os.path.join(graphs_folder, f'{clean_contest}_line_graph.png')
    plt.savefig(line_graph_path, dpi=300)
    plt.close()
    print(f"Saved line graph to: {line_graph_path}")
    
    # ========== BAR GRAPH ==========
    plt.figure(figsize=(14, 8))
    
    # Set the positions for the bars
    bar_width = 0.8 / len(candidates)
    positions = np.arange(len(barangays))
    
    # Plot each candidate as a group of bars
    for i, candidate in enumerate(simplified_names):
        offset = i * bar_width - (len(candidates) * bar_width / 2) + (bar_width / 2)
        bars = plt.bar(positions + offset, plot_data.loc[candidate], 
                        width=bar_width, label=candidate)
    
    # Add labels and title
    plt.title('Mayoral Votes per Barangay (Bar Graph)', fontsize=16)
    plt.xlabel('Barangay', fontsize=14)
    plt.ylabel('Number of Votes', fontsize=14)
    plt.xticks(positions, barangays, rotation=45, ha='right', fontsize=12)
    plt.legend(title='Candidates', fontsize=12)
    plt.grid(True, axis='y', linestyle='--', alpha=0.7)
    
    # Adjust layout and save
    plt.tight_layout()
    bar_graph_path = os.path.join(graphs_folder, f'{clean_contest}_bar_graph.png')
    plt.savefig(bar_graph_path, dpi=300)
    plt.close()
    print(f"Saved bar graph to: {bar_graph_path}")
    
    # ========== GROUPED BAR GRAPH (Alternative View) ==========
    plt.figure(figsize=(14, 8))
    
    # Transpose for grouped bar chart
    stacked_data = plot_data.T
    
    # Create grouped bar chart
    stacked_data.plot(kind='bar', stacked=False, figsize=(14, 8))
    
    # Add labels and title
    plt.title('Mayoral Votes per Barangay (Grouped Bar Graph)', fontsize=16)
    plt.xlabel('Barangay', fontsize=14)
    plt.ylabel('Number of Votes', fontsize=14)
    plt.xticks(rotation=45, ha='right', fontsize=12)
    plt.legend(title='Candidates', fontsize=12)
    plt.grid(True, axis='y', linestyle='--', alpha=0.7)
    
    # Adjust layout and save
    plt.tight_layout()
    grouped_bar_path = os.path.join(graphs_folder, f'{clean_contest}_grouped_bar_graph.png')
    plt.savefig(grouped_bar_path, dpi=300)
    plt.close()
    print(f"Saved grouped bar graph to: {grouped_bar_path}")

# Create the graphs
create_mayor_graphs("./results-per-position")

Found mayor file: ./results-per-position/MAYOR_of_PANGASINAN__CITY_OF_SAN_CARLOS_results.csv
Saved line graph to: ./results-per-position/graphs/MAYOR_of_PANGASINAN__CITY_OF_SAN_CARLOS_line_graph.png
Saved bar graph to: ./results-per-position/graphs/MAYOR_of_PANGASINAN__CITY_OF_SAN_CARLOS_bar_graph.png
Saved grouped bar graph to: ./results-per-position/graphs/MAYOR_of_PANGASINAN__CITY_OF_SAN_CARLOS_grouped_bar_graph.png


<Figure size 1400x800 with 0 Axes>