![](../additional_materials/logos/darden_rice_logo_SM.png)

### VBM Demographic Analysis
#### 2013 & 2017 Municipal Primaries, Ballots Mailed

---

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns

import datetime

In [2]:
df = pd.read_csv('../data/raw/2017_municipal_primary_VBM.csv')
# df.head(3)

In [3]:
# Define columns to be renamed based on headers in the first row (index 0) of df
column_names = {'Age': 'Age 18-24', 'Unnamed: 2': 'Age 25-34', 'Unnamed: 3': 'Age 35-49', 'Unnamed: 4': 'Age 50-64',
                'Unnamed: 5': 'Age 65+', 'Unnamed: 6': 'Age Unknown', 'Gender': 'Male', 'Unnamed: 8': 'Female', 
                'Unnamed: 9': 'Gender Unknown', 'Ethnicity': 'African American', 'Unnamed: 11': 'Hispanic', 
                'Unnamed: 12': 'White', 'Unnamed: 13': 'Other Ethnicity', 'Unnamed: 14': 'Ethnicity Unknown', 
                'Party': 'Democrat', 'Unnamed: 16': 'Independent', 'Unnamed: 17': 'Republican', 'Unnamed: 18': 'Other Party',
                'Unnamed: 19': 'Party Unknown'}

# Rename columns based on dictionary above
df.rename(column_names, axis=1, inplace=True)

In [4]:
# Drop the first row of the df, containing headers
df.drop(labels=0, axis=0, inplace=True)
# Drop final row of df, containing column totals
df.drop(df.tail(1).index, inplace=True)

# Reset df index
df.reset_index(drop = True, inplace = True)

In [5]:
# Remove commas from numeric entries and converting all columns to numeric values
df = df.apply(lambda x: x.str.replace(',', ''))

df = df.apply(pd.to_numeric)

In [6]:
df.dtypes

Precinct             int64
Age 18-24            int64
Age 25-34            int64
Age 35-49            int64
Age 50-64            int64
Age 65+              int64
Age Unknown          int64
Male                 int64
Female               int64
Gender Unknown       int64
African American     int64
Hispanic             int64
White                int64
Other Ethnicity      int64
Ethnicity Unknown    int64
Democrat             int64
Independent          int64
Republican           int64
Other Party          int64
Party Unknown        int64
Total People         int64
dtype: object

In [7]:
# Set index to precinct number
df.set_index(df['Precinct'], inplace = True)

df.drop('Precinct', axis = 1, inplace = True)

In [8]:
# Use this function to remove all columns that sum to 0 people, i.e., columns that don't represent any voters
def remove_zero_sum_cols(df):
    features = []
    columns = list(df.columns.values)
    for c in columns:
        if df[c].sum() != 0:
            features.append(c)
    df = df[features]
    return df

In [9]:
df = remove_zero_sum_cols(df)

In [10]:
# CAUTION: Only uncomment and run this cell for 2013 data cleaning!!!

# # There were no VBM ballots recorded for anyone in the 18-24 age range in 2013. I felt it was still important to know, 
# # so after removing zero-sum columns above, I re-added the 18-24 column with 0 for all its values.
# df['Age 18-24'] = 0
# df = df[['Age 18-24', 'Age 25-34', 'Age 35-49', 'Age 50-64', 'Age 65+', 'Male', 
#          'Female', 'Gender Unknown', 'African American', 'Hispanic', 'White',
#          'Other Ethnicity', 'Democrat', 'Independent', 'Republican', 'Total People']]

In [11]:
df.head(3)

Unnamed: 0_level_0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+,Male,Female,Gender Unknown,African American,Hispanic,White,Other Ethnicity,Democrat,Independent,Republican,Total People
Precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
101,9,38,63,199,282,237,352,2,361,10,198,22,459,58,74,591
102,2,7,40,80,204,119,214,0,149,2,173,9,251,27,55,333
103,5,7,37,60,91,86,114,0,11,2,179,8,108,34,58,200


---
---
#### Save clean master csv here

In [12]:
df.to_csv('../data/processed/2017_VBM_master.csv')

---
#### Generate and Export Subset Dataframes

In [13]:
df.columns

Index(['Age 18-24', 'Age 25-34', 'Age 35-49', 'Age 50-64', 'Age 65+', 'Male',
       'Female', 'Gender Unknown', 'African American', 'Hispanic', 'White',
       'Other Ethnicity', 'Democrat', 'Independent', 'Republican',
       'Total People'],
      dtype='object')

In [14]:
# Subset dataframes containing age, gender, ethnicity, and party affiliation breakdowns by precinct
age_df = df[['Age 18-24', 'Age 25-34', 'Age 35-49', 'Age 50-64', 'Age 65+', 'Total People']].copy()
gen_df = df[['Male', 'Female', 'Gender Unknown', 'Total People']].copy()
eth_df = df[['African American', 'Hispanic', 'White', 'Other Ethnicity', 'Total People']].copy()
party_df = df[['Democrat', 'Independent', 'Republican', 'Total People']].copy()

In [15]:
# Saving all master dataframe subsets to individual csv files
age_df.to_csv('../data/processed/2017_VBM_age_breakdown.csv')
gen_df.to_csv('../data/processed/2017_VBM_gender_breakdown.csv')
eth_df.to_csv('../data/processed/2017_VBM_ethnicity_breakdown.csv')
party_df.to_csv('../data/processed/2017_VBM_party_breakdown.csv')

---
---

### Analysis

In [16]:
df = pd.read_csv('../data/processed/2017_VBM_master.csv')

In [17]:
# Set index to precinct number
df.set_index(df['Precinct'], inplace = True)
df.drop('Precinct', axis = 1, inplace = True)

# Drop 'Total People'
df.drop('Total People', axis = 1, inplace = True)

In [18]:
df.head(3)

Unnamed: 0_level_0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+,Male,Female,Gender Unknown,African American,Hispanic,White,Other Ethnicity,Democrat,Independent,Republican
Precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
101,9,38,63,199,282,237,352,2,361,10,198,22,459,58,74
102,2,7,40,80,204,119,214,0,149,2,173,9,251,27,55
103,5,7,37,60,91,86,114,0,11,2,179,8,108,34,58


In [19]:
df.shape

(91, 15)

In [20]:
# df.isnull().sum()

In [21]:
# df.dtypes

---

In [22]:
summary_df = df.describe()
summary_df.drop('count', axis = 0, inplace = True)

In [23]:
summary_df = summary_df.round(2)

**BELOW:** This table shows the mean, standard deviation, minimum and maximum values, and quartile values for all voters in Pinellas County (all precincts combined) who requested and wwere mailed ballots for the 2021 Municipal Primary.

In [24]:
summary_df

Unnamed: 0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+,Male,Female,Gender Unknown,African American,Hispanic,White,Other Ethnicity,Democrat,Independent,Republican
mean,3.05,14.76,41.55,99.85,182.02,145.6,195.34,0.29,53.22,7.64,267.67,12.7,179.14,51.8,110.29
std,3.27,10.93,31.33,65.47,127.77,97.89,120.99,0.54,83.75,6.6,218.75,8.84,107.41,37.53,102.64
min,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
25%,1.0,6.0,19.0,58.0,96.0,71.5,114.0,0.0,5.0,2.0,100.5,5.5,105.5,23.5,31.5
50%,2.0,13.0,36.0,94.0,159.0,127.0,172.0,0.0,13.0,7.0,221.0,11.0,175.0,43.0,74.0
75%,4.5,20.5,57.0,129.0,225.0,193.5,252.5,0.0,56.5,10.5,394.0,17.5,238.5,72.5,168.5
max,15.0,53.0,169.0,304.0,569.0,416.0,529.0,2.0,374.0,28.0,868.0,40.0,511.0,154.0,429.0


In [25]:
summary_df.to_csv('../data/processed/2017_summary_stats.csv')

---
---

In [26]:
age_df = pd.read_csv('../data/processed/2017_VBM_age_breakdown.csv')
gen_df = pd.read_csv('../data/processed/2017_VBM_gender_breakdown.csv')
eth_df = pd.read_csv('../data/processed/2017_VBM_ethnicity_breakdown.csv')
party_df = pd.read_csv('../data/processed/2017_VBM_party_breakdown.csv')

age_df.drop('Total People', axis = 1, inplace = True)
gen_df.drop('Total People', axis = 1, inplace = True)
eth_df.drop('Total People', axis = 1, inplace = True)
party_df.drop('Total People', axis = 1, inplace = True)

age_df.set_index(age_df['Precinct'], inplace = True)
age_df.drop('Precinct', axis = 1, inplace = True)
gen_df.set_index(gen_df['Precinct'], inplace = True)
gen_df.drop('Precinct', axis = 1, inplace = True)
eth_df.set_index(eth_df['Precinct'], inplace = True)
eth_df.drop('Precinct', axis = 1, inplace = True)
party_df.set_index(party_df['Precinct'], inplace = True)
party_df.drop('Precinct', axis = 1, inplace = True)

In [27]:
age_cols = ['Age 18-24', 'Age 25-34', 'Age 35-49', 'Age 50-64', 'Age 65+']
gen_cols = ['Male', 'Female', 'Gender Unknown']
eth_cols = ['African American', 'Hispanic', 'White', 'Other Ethnicity']
party_cols = ['Democrat', 'Independent', 'Republican']


age_df[age_cols] = age_df[age_cols].div(age_df[age_cols].sum(axis=1), axis=0).round(3)
gen_df[gen_cols] = gen_df[gen_cols].div(gen_df[gen_cols].sum(axis=1), axis=0).round(3)
eth_df[eth_cols] = eth_df[eth_cols].div(eth_df[eth_cols].sum(axis=1), axis=0).round(3)
party_df[party_cols] = party_df[party_cols].div(party_df[party_cols].sum(axis=1), axis=0).round(3)

In [28]:
age_df

Unnamed: 0_level_0,Age 18-24,Age 25-34,Age 35-49,Age 50-64,Age 65+
Precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
101,0.015,0.064,0.107,0.337,0.477
102,0.006,0.021,0.120,0.240,0.613
103,0.025,0.035,0.185,0.300,0.455
104,0.015,0.054,0.113,0.330,0.488
105,0.012,0.047,0.126,0.264,0.551
...,...,...,...,...,...
237,0.000,0.059,0.059,0.324,0.559
239,0.008,0.043,0.125,0.283,0.541
240,0.003,0.049,0.091,0.307,0.550
241,0.003,0.031,0.085,0.304,0.577


In [29]:
age_df.to_csv('../data/processed/2017_VBM_age_percentages.csv')
gen_df.to_csv('../data/processed/2017_VBM_gender_percentages.csv')
eth_df.to_csv('../data/processed/2017_VBM_ethnicity_percentages.csv')
party_df.to_csv('../data/processed/2017_VBM_party_percentages.csv')

---

---
#### Age

---
#### Gender

---
#### Ethnicity

---
#### Party Affiliation