![](../additional_materials/logos/darden_rice_logo_SM.png)

### VBM Demographic Analysis
#### 2021 Municipal Primary, Ballots Mailed
---

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import datetime

In [2]:
filepath = '../data/raw/2021_VBM_mailed_ballots.csv'

In [3]:
df = pd.read_csv(filepath)
df.head(3)

Unnamed: 0,Precinct,Age,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Gender,Unnamed: 8,Unnamed: 9,Ethnicity,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Party,Unnamed: 16,Unnamed: 17,Unnamed: 18,Total People
0,,18 to 24,25 to 34,35 to 49,50 to 64,65+,Unknown,Male,Female,Unknown,African Amer,Hispanic,White,Other,Unknown,Democrats,Independent,Republicans,Unknown,
1,101.0,123,222,374,666,621,0,829,1167,10,1052,42,781,131,0,1395,356,255,0,2006.0
2,102.0,33,93,163,258,418,0,378,583,4,466,17,428,54,0,707,134,124,0,965.0


In [4]:
# Define columns to be renamed based on headers in the first row (index 0) of df
column_names = {'Age': 'Age 18-24', 'Unnamed: 2': 'Age 25-34', 'Unnamed: 3': 'Age 35-49', 'Unnamed: 4': 'Age 50-64',
                'Unnamed: 5': 'Age 65+', 'Unnamed: 6': 'Age Unknown', 'Gender': 'Male', 'Unnamed: 8': 'Female', 
                'Unnamed: 9': 'Gender Unknown', 'Ethnicity': 'African American', 'Unnamed: 11': 'Hispanic', 
                'Unnamed: 12': 'White', 'Unnamed: 13': 'Other Ethnicity', 'Unnamed: 14': 'Ethnicity Unknown', 
                'Party': 'Democrat', 'Unnamed: 16': 'Independent', 'Unnamed: 17': 'Republican', 'Unnamed: 18': 'Other Party',
                'Unnamed: 19': 'Party Unknown'}

# Rename columns based on dictionary above
df.rename(column_names, axis=1, inplace=True)

In [5]:
# Drop the first row of the df, containing headers
df.drop(labels=0, axis=0, inplace=True)
# Drop final row of df, containing column totals
df.drop(df.tail(1).index, inplace=True)

# Reset df index
df.reset_index(drop = True, inplace = True)

In [6]:
# Remove commas from numeric entries and converting all columns to numeric values
df = df.apply(lambda x: x.str.replace(',', ''))

df = df.apply(pd.to_numeric)

In [7]:
df.dtypes

Precinct             int64
Age 18-24            int64
Age 25-34            int64
Age 35-49            int64
Age 50-64            int64
Age 65+              int64
Age Unknown          int64
Male                 int64
Female               int64
Gender Unknown       int64
African American     int64
Hispanic             int64
White                int64
Other Ethnicity      int64
Ethnicity Unknown    int64
Democrat             int64
Independent          int64
Republican           int64
Other Party          int64
Total People         int64
dtype: object

In [8]:
# Set index to precinct number
df.set_index(df['Precinct'], inplace = True)
df.drop('Precinct', axis = 1, inplace = True)

# df.head(3)

In [9]:
# Use this function to remove all columns that sum to 0 people, i.e., columns that don't represent any voters
def remove_zero_sum_cols(df):
    features = []
    columns = list(df.columns.values)
    for c in columns:
        if df[c].sum() != 0:
            features.append(c)
    df = df[features]
    return df

In [10]:
df = remove_zero_sum_cols(df)

In [11]:
df.head(3)

Unnamed: 0_level_0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+,Male,Female,Gender Unknown,African American,Hispanic,White,Other Ethnicity,Democrat,Independent,Republican,Total People
Precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
101,123,222,374,666,621,829,1167,10,1052,42,781,131,1395,356,255,2006
102,33,93,163,258,418,378,583,4,466,17,428,54,707,134,124,965
103,25,41,103,136,180,210,275,0,33,9,413,30,258,108,119,485


---
---
#### Save clean master csv here

In [11]:
df.to_csv('../data/processed/2021_VBM_mailed_ballots_master.csv')

---
#### Generate and Export Subset Dataframes

In [12]:
df.columns

Index(['Age 18-24', 'Age 25-34', 'Age 35-49', 'Age 50-64', 'Age 65+', 'Male',
       'Female', 'Gender Unknown', 'African American', 'Hispanic', 'White',
       'Other Ethnicity', 'Democrat', 'Independent', 'Republican',
       'Total People'],
      dtype='object')

In [13]:
# Subset dataframes containing age, gender, ethnicity, and party affiliation breakdowns by precinct
age_df = df[['Age 18-24', 'Age 25-34', 'Age 35-49', 'Age 50-64', 'Age 65+', 'Total People']].copy()
gen_df = df[['Male', 'Female', 'Gender Unknown', 'Total People']].copy()
eth_df = df[['African American', 'Hispanic', 'White', 'Other Ethnicity', 'Total People']].copy()
party_df = df[['Democrat', 'Independent', 'Republican', 'Total People']].copy()

In [14]:
# Saving all master dataframe subsets to individual csv files
age_df.to_csv('../data/processed/2021_VBM_age_breakdown.csv')
gen_df.to_csv('../data/processed/2021_VBM_gender_breakdown.csv')
eth_df.to_csv('../data/processed/2021_VBM_ethnicity_breakdown.csv')
party_df.to_csv('../data/processed/2021_VBM_party_breakdown.csv')

---
---

### Analysis

In [15]:
df = pd.read_csv('../data/processed/2021_VBM_mailed_ballots_master.csv')

In [16]:
# Set index to precinct number
df.set_index(df['Precinct'], inplace = True)
df.drop('Precinct', axis = 1, inplace = True)

# Drop 'Total People'
df.drop('Total People', axis = 1, inplace = True)

In [17]:
df.head(3)

Unnamed: 0_level_0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+,Male,Female,Gender Unknown,African American,Hispanic,White,Other Ethnicity,Democrat,Independent,Republican
Precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
101,123,222,374,666,621,829,1167,10,1052,42,781,131,1395,356,255
102,33,93,163,258,418,378,583,4,466,17,428,54,707,134,124
103,25,41,103,136,180,210,275,0,33,9,413,30,258,108,119


In [18]:
df.shape

(91, 15)

In [19]:
# df.isnull().sum()

In [20]:
# df.dtypes

---

In [21]:
summary_df = df.describe()
summary_df.drop('count', axis=0, inplace=True)

In [22]:
summary_df = summary_df.round(2)

**BELOW:** This table shows the mean, standard deviation, minimum and maximum values, and quartile values for all voters in Pinellas County (all precincts combined) who requested and wwere mailed ballots for the 2021 Municipal Primary.

In [23]:
summary_df

Unnamed: 0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+,Male,Female,Gender Unknown,African American,Hispanic,White,Other Ethnicity,Democrat,Independent,Republican
mean,49.56,135.98,208.1,304.76,363.67,455.26,603.22,3.58,171.73,43.36,774.32,72.66,563.86,231.82,266.38
std,36.36,105.84,140.64,179.64,248.16,275.7,348.78,3.39,250.06,35.67,574.25,49.42,325.32,154.48,226.31
min,0.0,1.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,3.0,0.0,2.0,0.0,0.0
25%,23.5,67.5,106.5,183.0,209.5,257.0,365.5,1.0,21.5,20.5,342.5,33.0,351.5,113.5,80.5
50%,41.0,107.0,180.0,266.0,317.0,412.0,542.0,3.0,52.0,37.0,701.0,65.0,544.0,206.0,202.0
75%,59.5,176.5,279.5,403.0,420.0,608.5,826.5,6.0,197.0,51.0,1164.5,93.0,706.0,326.0,378.5
max,191.0,532.0,687.0,836.0,1109.0,1296.0,1604.0,16.0,1106.0,183.0,2489.0,217.0,1449.0,686.0,954.0


In [24]:
summary_df.to_csv('../data/processed/2021_summary_stats.csv')

---
---

In [25]:
age_df = pd.read_csv('../data/processed/2021_VBM_age_breakdown.csv')
gen_df = pd.read_csv('../data/processed/2021_VBM_gender_breakdown.csv')
eth_df = pd.read_csv('../data/processed/2021_VBM_ethnicity_breakdown.csv')
party_df = pd.read_csv('../data/processed/2021_VBM_party_breakdown.csv')

age_df.drop('Total People', axis=1, inplace=True)
gen_df.drop('Total People', axis=1, inplace=True)
eth_df.drop('Total People', axis=1, inplace=True)
party_df.drop('Total People', axis=1, inplace=True)

age_df.set_index(age_df['Precinct'], inplace=True)
age_df.drop('Precinct', axis=1, inplace=True)
gen_df.set_index(gen_df['Precinct'], inplace=True)
gen_df.drop('Precinct', axis=1, inplace=True)
eth_df.set_index(eth_df['Precinct'], inplace=True)
eth_df.drop('Precinct', axis=1, inplace=True)
party_df.set_index(party_df['Precinct'], inplace=True)
party_df.drop('Precinct', axis=1, inplace=True)

In [26]:
age_cols = ['Age 18-24', 'Age 25-34', 'Age 35-49', 'Age 50-64', 'Age 65+']
gen_cols = ['Male', 'Female', 'Gender Unknown']
eth_cols = ['African American', 'Hispanic', 'White', 'Other Ethnicity']
party_cols = ['Democrat', 'Independent', 'Republican']


age_df[age_cols] = age_df[age_cols].div(age_df[age_cols].sum(axis=1), axis=0).round(3)
gen_df[gen_cols] = gen_df[gen_cols].div(gen_df[gen_cols].sum(axis=1), axis=0).round(3)
eth_df[eth_cols] = eth_df[eth_cols].div(eth_df[eth_cols].sum(axis=1), axis=0).round(3)
party_df[party_cols] = party_df[party_cols].div(party_df[party_cols].sum(axis=1), axis=0).round(3)

In [27]:
age_df

Unnamed: 0_level_0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+
Precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
101,0.061,0.111,0.186,0.332,0.310
102,0.034,0.096,0.169,0.267,0.433
103,0.052,0.085,0.212,0.280,0.371
104,0.052,0.108,0.190,0.315,0.335
105,0.056,0.122,0.190,0.280,0.352
...,...,...,...,...,...
237,0.036,0.100,0.191,0.309,0.364
239,0.052,0.134,0.212,0.277,0.325
240,0.036,0.119,0.200,0.309,0.336
241,0.043,0.081,0.157,0.335,0.383


In [28]:
age_df.to_csv('../data/processed/2021_VBM_age_percentages.csv')
gen_df.to_csv('../data/processed/2021_VBM_gender_percentages.csv')
eth_df.to_csv('../data/processed/2021_VBM_ethnicity_percentages.csv')
party_df.to_csv('../data/processed/2021_VBM_party_percentages.csv')

---

---
#### Age

---
#### Gender

---
#### Ethnicity

---
#### Party Affiliation