In [2]:
import geopandas as gpd

In [5]:
import json
from collections import Counter
import glob
import os

# Folder containing your JSONL files
folder_path = '/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel/Parallel_runs/results'  # <-- Change this to your folder

# Find all .jsonl files in the folder
jsonl_files = glob.glob(os.path.join(folder_path, '*.jsonl'))

# Counters for W and C
category_counts = Counter({'W': 0, 'C': 0})

for jsonl_file in jsonl_files:
    with open(jsonl_file, 'r') as f:
        for line in f:
            data = json.loads(line)
            for winner in data.get('winners', []):
                if winner.startswith('W'):
                    category_counts['W'] += 1
                elif winner.startswith('C'):
                    category_counts['C'] += 1

print("Aggregated category counts across all files:")
print(f"W: {category_counts['W']}")
print(f"C: {category_counts['C']}")

Aggregated category counts across all files:
W: 4018
C: 1276


In [18]:
import json
import os
import glob
from collections import defaultdict

def categorize_jsonl_folder(folder_path, file_pattern="*.jsonl"):
    """
    Process all JSONL files in a folder and categorize winners based on first letter.
    
    Args:
        folder_path: Path to folder containing JSONL files
        file_pattern: Pattern to match files (default: "*.jsonl")
    
    Returns:
        dict: Categories with combined results from all files
    """
    categories = {
        'both_W': [],
        'both_C': [],
        'mixed': []
    }
    
    # Find all JSONL files in the folder
    search_pattern = os.path.join(folder_path, file_pattern)
    jsonl_files = glob.glob(search_pattern)
    
    if not jsonl_files:
        return categories
    
    # Process each file
    for file_path in jsonl_files:
        try:
            with open(file_path, 'r') as file:
                for line in file:
                    line = line.strip()
                    if not line:
                        continue
                        
                    try:
                        data = json.loads(line)
                        winners = data.get('winners', [])
                        
                        if not winners:
                            continue
                        
                        # Get first letters of all winners
                        first_letters = set(winner[0] for winner in winners if winner)
                        
                        # Categorize based on first letters
                        if len(first_letters) == 1:
                            if 'W' in first_letters:
                                categories['both_W'].append(data)
                            elif 'C' in first_letters:
                                categories['both_C'].append(data)
                            else:
                                categories['mixed'].append(data)
                        else:
                            categories['mixed'].append(data)
                            
                    except json.JSONDecodeError:
                        continue
                        
        except (FileNotFoundError, Exception):
            continue
    
    # Print final results
    print(f"Both W: {len(categories['both_W'])}")
    print(f"Both C: {len(categories['both_C'])}")
    print(f"Mixed: {len(categories['mixed'])}")
    print(f"Total: {sum(len(items) for items in categories.values())}")
    
    #return categories

# Example usage
if __name__ == "__main__":
    # Change this to your folder path
    folder_path = "path/to/your/jsonl/folder"
    
    # Process folder
    results = categorize_jsonl_folder(folder_path)
    
    # Optional: specify custom file pattern
    # results = categorize_jsonl_folder(folder_path, "results_*.jsonl")

In [19]:
categorize_jsonl_folder("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel/Parallel_runs/practice results")


Both W: 1106
Both C: 104
Mixed: 905
Total: 2115


In [20]:
categorize_jsonl_folder("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel/Parallel_runs/practice results/borda")


Both W: 144
Both C: 0
Mixed: 156
Total: 300


In [21]:
categorize_jsonl_folder("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel/Parallel_runs/practice results/plurality")


Both W: 466
Both C: 47
Mixed: 387
Total: 900


In [22]:
categorize_jsonl_folder("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel/Parallel_runs/practice results/stv")


Both W: 496
Both C: 57
Mixed: 362
Total: 915


In [29]:
import json
import os
import glob
from collections import defaultdict

def categorize_jsonl(path, file_pattern="*.jsonl"):
    """
    Process JSONL file(s) and categorize winners based on first letter.
    
    Args:
        path: Path to a single JSONL file or folder containing JSONL files
        file_pattern: Pattern to match files if path is a folder (default: "*.jsonl")
    
    Returns:
        dict: Categories with combined results from all files
    """
    categories = {
        'both_W': [],
        'both_C': [],
        'mixed': []
    }
    
    # Determine if path is a file or folder
    if os.path.isfile(path):
        jsonl_files = [path]
    elif os.path.isdir(path):
        search_pattern = os.path.join(path, file_pattern)
        jsonl_files = glob.glob(search_pattern)
    else:
        return categories
    
    if not jsonl_files:
        return categories
    
    # Process each file
    for file_path in jsonl_files:
        try:
            with open(file_path, 'r') as file:
                for line in file:
                    line = line.strip()
                    if not line:
                        continue
                        
                    try:
                        data = json.loads(line)
                        winners = data.get('winners', [])
                        
                        if not winners:
                            continue
                        
                        # Get first letters of all winners
                        first_letters = set(winner[0] for winner in winners if winner)
                        
                        # Categorize based on first letters
                        if len(first_letters) == 1:
                            if 'W' in first_letters:
                                categories['both_W'].append(data)
                            elif 'C' in first_letters:
                                categories['both_C'].append(data)
                            else:
                                categories['mixed'].append(data)
                        else:
                            categories['mixed'].append(data)
                            
                    except json.JSONDecodeError:
                        continue
                        
        except (FileNotFoundError, Exception):
            continue
    
    # Print final results
    print(f"Both W: {len(categories['both_W'])}")
    print(f"Both C: {len(categories['both_C'])}")
    print(f"Mixed: {len(categories['mixed'])}")
    print(f"Total: {sum(len(items) for items in categories.values())}")
    
    #return categories

# Example usage
if __name__ == "__main__":
    # Works with both files and folders
    
    # Single file
    # results = categorize_jsonl("path/to/your/file.jsonl")
    
    # Folder with all JSONL files
    # results = categorize_jsonl("path/to/your/folder")
    
    # Folder with custom pattern
    # results = categorize_jsonl("path/to/your/folder", "results_*.jsonl")
    
    path = "path/to/your/jsonl/folder"  # Change this to your path
    results = categorize_jsonl(path)

In [30]:
categorize_jsonl("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel/Parallel_runs/results/plurality/")
print("plural")

Both W: 5
Both C: 1
Mixed: 24
Total: 30


In [31]:
categorize_jsonl("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel/Parallel_runs/results/stv/")


Both W: 13
Both C: 1
Mixed: 16
Total: 30


In [38]:
import json
import os
import glob
from collections import defaultdict

def count_winners_by_letter(path, file_pattern="*.jsonl"):
    """
    Process JSONL file(s) and count winners grouped by first letter.
    
    Args:
        path: Path to a single JSONL file or folder containing JSONL files
        file_pattern: Pattern to match files if path is a folder (default: "*.jsonl")
    
    Returns:
        dict: Dictionary with letter totals
    """
    letter_counts = defaultdict(int)
    
    # Determine if path is a file or folder
    if os.path.isfile(path):
        jsonl_files = [path]
    elif os.path.isdir(path):
        search_pattern = os.path.join(path, file_pattern)
        jsonl_files = glob.glob(search_pattern)
    else:
        return dict(letter_counts)
    
    if not jsonl_files:
        return dict(letter_counts)
    
    # Process each file
    for file_path in jsonl_files:
        try:
            with open(file_path, 'r') as file:
                for line in file:
                    line = line.strip()
                    if not line:
                        continue
                        
                    try:
                        data = json.loads(line)
                        winners = data.get('winners', [])
                        
                        # Count winners by first letter
                        for winner in winners:
                            if winner:  # Skip empty winners
                                first_letter = winner[0]
                                letter_counts[first_letter] += 1
                                
                    except json.JSONDecodeError:
                        continue
                        
        except (FileNotFoundError, Exception):
            continue
    
    # Print results
    for letter in sorted(letter_counts.keys()):
        print(f"{letter}: {letter_counts[letter]}")
    
    return dict(letter_counts)

# Example usage
if __name__ == "__main__":
    path = "path/to/your/jsonl/file_or_folder"  # Change this to your path
    results = count_winners_by_letter(path)

In [39]:
count_winners_by_letter("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan1/Parallel_runs/results/plurality")


C: 87
W: 213


{'W': 213, 'C': 87}

In [40]:
count_winners_by_letter("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan1/Parallel_runs/results/stv")


C: 70
W: 240


{'W': 240, 'C': 70}

In [41]:
count_winners_by_letter("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan2/Parallel_runs/results/plurality")


C: 74
W: 226


{'W': 226, 'C': 74}

In [42]:
count_winners_by_letter("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan2/Parallel_runs/results/stv")


C: 69
W: 231


{'W': 231, 'C': 69}

In [48]:
#count_winners_by_letter("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan3/Parallel_runs/results/plurality_cambridge_voters_13000_seats_1_samples_10_wc_(0p24_0p76)_cohesion_(0p75_0p25_0p3_0p7).jsonl")
print(extract_numbers_from_filenames("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan3/Parallel_runs/results/plurality_cambridge_voters_13000_seats_1_samples_10_wc_(0p24_0p76)_cohesion_(0p75_0p25_0p3_0p7).jsonl"))


{}


In [51]:
import os
import re

def extract_number_from_filename(file_path):
    """
    Extract one or two digit number before the first closing parenthesis from a filename.
    
    Args:
        file_path: Path to a single file (can be full path or just filename)
    
    Returns:
        int or None: Extracted number, or None if not found
    """
    # Get just the filename from the path
    filename = os.path.basename(file_path)
    
    # Pattern to match 1-2 digits before first closing parenthesis
    pattern = r'(\d{1,2})\)'
    
    match = re.search(pattern, filename)
    if match:
        return int(match.group(1))
    return None

def extract_numbers_from_filenames(directory_path):
    """
    Extract one or two digit numbers before the first closing parenthesis from filenames.
    
    Args:
        directory_path: Path to directory containing files
    
    Returns:
        dict: Dictionary mapping filename to extracted number
    """
    filename_numbers = {}
    
    if not os.path.exists(directory_path):
        return filename_numbers
    
    # Get all files in directory
    try:
        files = os.listdir(directory_path)
    except (PermissionError, OSError):
        return filename_numbers
    
    for filename in files:
        # Skip directories
        if os.path.isdir(os.path.join(directory_path, filename)):
            continue
            
        number = extract_number_from_filename(filename)
        if number is not None:
            filename_numbers[filename] = number
    
    return filename_numbers

def extract_numbers_and_print(directory_path):
    """
    Extract numbers from filenames and print results.
    
    Args:
        directory_path: Path to directory containing files
    
    Returns:
        dict: Dictionary mapping filename to extracted number
    """
    results = extract_numbers_from_filenames(directory_path)
    
    if results:
        print("Extracted numbers from filenames:")
        for filename, number in sorted(results.items()):
            print(f"{filename}: {number}")
        print(f"\nTotal files processed: {len(results)}")
    else:
        print("No files found with numbers before closing parenthesis")
    
    return results

# Example usage
if __name__ == "__main__":
    # For a single file
    file_path = "/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan3/Parallel_runs/results/plurality_cambridge_voters_13000_seats_1_samples_10_wc_(0p24_0p76)_cohesion_(0p75_0p25_0p3_0p7).jsonl"
    number = extract_number_from_filename(file_path)
    print(f"Extracted number: {number}")
    
    # For a directory
    # directory = "path/to/your/directory"
    # results = extract_numbers_and_print(directory)
    
    # Or just extract from directory without printing
    # results = extract_numbers_from_filenames(directory)

Extracted number: 76


In [53]:
extract_number_from_filename("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan3/Parallel_runs/results/plurality_cambridge_voters_13000_seats_1_samples_10_wc_(0p24_0p76)_cohesion_(0p75_0p25_0p3_0p7).jsonl")
count_winners_by_letter("/Users/bryanhuang/Documents/DenverVoteKit_practice/parallel_plan3/Parallel_runs/results/plurality_cambridge_voters_13000_seats_1_samples_10_wc_(0p24_0p76)_cohesion_(0p75_0p25_0p3_0p7).jsonl")


C: 10


{'C': 10}