# Project 1

## Introduction

In this notebook, we will be sharing the data on the 2024 U.S. Presidential Election on county-level

## Computing for Mean, Median, and Mode

First, import and the data first.

In [3]:
# --- Import Modules ---
import pandas as pd
import plotly.express as px
from statsmodels import robust

# --- 1. Read CSV ---
df = pd.read_csv("2024_US_County_Level_Presidential_Results.csv")

# --- 2. Inspect Columns ---
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3160 entries, 0 to 3159
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   state_name      3160 non-null   object 
 1   county_fips     3160 non-null   int64  
 2   county_name     3160 non-null   object 
 3   votes_gop       3160 non-null   int64  
 4   votes_dem       3160 non-null   int64  
 5   total_votes     3160 non-null   int64  
 6   diff            3160 non-null   float64
 7   per_gop         3160 non-null   float64
 8   per_dem         3160 non-null   float64
 9   per_point_diff  3160 non-null   float64
dtypes: float64(4), int64(4), object(2)
memory usage: 247.0+ KB


Now we find the Mean, Median, Mode using Pandas. In this case we calculate the Voters for Democratic Presidential Canditate in county-level.

In [None]:
# --- Import Modules ---
import pandas as pd
import plotly.express as px
from statsmodels import robust

# --- 1. Read CSV ---
df = pd.read_csv("2024_US_County_Level_Presidential_Results.csv")

# Example numeric column to analyze (replace with actual column name)
col = "votes_dem"  # adjust if needed

# --- 3. Compute Descriptive Stats ---
mean_val = df[col].mean()
median_val = df[col].median()
mode_val = df[col].mode().iloc[0]

print(f"Mean: {mean_val:.3f}")
print(f"Median: {median_val:.3f}")
print(f"Mode: {mode_val:.3f}")


Mean: 23736.310
Median: 3458.500
Mode: 146.000


This time, we calculate Mean, Median, Mode without Pandas.

In [9]:
import csv

# read the CSV manually

filename = "2024_US_County_Level_Presidential_Results.csv"
column = "votes_dem"
values = []

# Convert entries (rows) in column dem_votes to integer

with open(filename, newline="", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    for row in reader:
        val = row.get(column, "")
        if val.strip() != "":
            try:
                values.append(float(val))
            except:
                pass

# Compute the mean
total = 0
count = 0
for v in values:
    total = total + v
    count = count + 1

mean_val = total / count

# Compute the median
sorted_vals = sorted(values)
n = len(sorted_vals)

if n % 2 == 1:
    median_val = sorted_vals[n // 2]
else:
    mid1 = sorted_vals[n // 2 - 1]
    mid2 = sorted_vals[n // 2]
    median_val = (mid1 + mid2) / 2

# Compute Mode
freq = {}
for v in sorted_vals:
    if v in freq:
        freq[v] = freq[v] + 1
    else:
        freq[v] = 1

max_count = 0
mode_val = None

for value in freq:
    if freq[value] > max_count:
        max_count = freq[value]
        mode_val = value

# Print results
print(f"Mean: {mean_val:.2f}")
print(f"Median: {median_val:.2f}")
print(f"Mode: {mode_val:.2f}")
print(f"Number of data points: {len(values)}")


Mean: 23736.31
Median: 3458.50
Mode: 146.00
Number of data points: 3160


Now we visualize without plotly and other packages (Only Python). We use pound as symbol denoting histogram for Democrats voter in 2024 U.S. Presidential Election.

In [None]:
import csv

filename = "2024_US_County_Level_Presidential_Results.csv"
column = "votes_dem"

values = []

# Read CSV and collect votes_dem values
with open(filename, newline="", encoding="utf-8") as file:
    reader = csv.DictReader(file)
    for row in reader:
        val = row.get(column, "")
        if val.strip() != "":
            try:
                values.append(float(val))
            except:
                pass

    # Print text-based distribution
    bins = [0, 10000, 50000, 100000, 250000, 500000, 1000000, 2000000]
    counts = [0] * (len(bins) - 1)

    # Count values per bin
    for v in values:
        for i in range(len(bins) - 1):
            if bins[i] <= v < bins[i + 1]:
                counts[i] += 1
                break

    print("\nCounty-Level Distribution of Democratic Votes (votes_dem)")
    print("Units: Number of votes per county")
    print("Each # ≈ 50 counties\n")
    print("x-axis: Vote Count Range | y-axis: Number of Counties\n")

    scale = 50

    for i in range(len(counts)):
        label = f"{bins[i]:>7,}–{bins[i + 1]:<7,}"
        bar = "#" * max(1, counts[i] // scale)  # at least one #
        print(f"{label} | {bar}")


County-Level Distribution of Democratic Votes (votes_dem)
Units: Number of votes per county
Each # ≈ 50 counties

x-axis: Vote Count Range | y-axis: Number of Counties

      0–10,000  | #############################################
 10,000–50,000  | ###########
 50,000–100,000 | ##
100,000–250,000 | ##
250,000–500,000 | #
500,000–1,000,000 | #
1,000,000–2,000,000 | #
