In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('precision', 2)

In [2]:
# Reading the CSV File
df = pd.read_csv('2020_elections.csv')

In [3]:
df.head()

Unnamed: 0,State,Total Ballots Counted (Estimate),Vote for Highest Office (President),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,Overseas Eligible,State Abv
0,United States,158835004,,66.40%,239247182,257605088,7.80%,1461074,1962811,616440,3294457,4971025.0,
1,Alabama,2306587,2297295.0,62.60%,3683055,3837540,2.30%,25898,50997,10266,67782,,AL
2,Alaska,367000,,69.80%,525568,551117,3.40%,4293,2074,1348,6927,,AK
3,Arizona,3400000,,65.50%,5189000,5798473,8.90%,38520,76844,7536,93699,,AZ
4,Arkansas,1212030,1206697.0,55.50%,2182375,2331171,3.60%,17510,36719,24698,64974,,AR


Dropping rows and columns that are either irrelevant or unusable due to many null-value data entries

In [4]:
# Dropping United States row entry because the analysis is focusing ont he individual states, not the country as a whole.
df = df.drop([0], axis=0)

In [5]:
# Dropping Vote for Highest Office (President) and Overseas Eligible due to many null-value entries
df = df.drop(['Vote for Highest Office (President)'], axis=1)
df = df.drop(['Overseas Eligible'], axis=1)

In [6]:
df.head()

Unnamed: 0,State,Total Ballots Counted (Estimate),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv
1,Alabama,2306587,62.60%,3683055,3837540,2.30%,25898,50997,10266,67782,AL
2,Alaska,367000,69.80%,525568,551117,3.40%,4293,2074,1348,6927,AK
3,Arizona,3400000,65.50%,5189000,5798473,8.90%,38520,76844,7536,93699,AZ
4,Arkansas,1212030,55.50%,2182375,2331171,3.60%,17510,36719,24698,64974,AR
5,California,16800000,64.70%,25962648,30783255,15.00%,104730,0,102586,207316,CA


The data entries are not integers or floats, so currently they are unusable; they need to be converted.

In [7]:
# To convert regular numbers and remove commas
def convert(column):
    lst = []
    for num in column:
        string = ''
        if len(num)<=7:
            string=num[-3:]+string
            string=num[-7:-4]+string
        else:
            string=num[-3:]+string
            string=num[-7:-4]+string
            string=num[:-8]+string
        lst.append(int(string))
    return lst
  
# To convert percents into numbers
def convertPercent(column):
    for i in range(0,len(column)):
      column[i] = float(column[i][:-1])
    return column

In [8]:
df['Total Ballots Counted (Estimate)'] = convert(list(df['Total Ballots Counted (Estimate)']))
df['Voting-Eligible Population (VEP)'] = convert(list(df['Voting-Eligible Population (VEP)']))
df['Voting-Age Population (VAP)'] = convert(list(df['Voting-Age Population (VAP)']))
df['Prison'] = convert(list(df['Prison']))
df['Probation'] = convert(list(df['Probation']))
df['Parole'] = convert(list(df['Parole']))
df['Total Ineligible Felon'] = convert(list(df['Total Ineligible Felon']))

df['VEP Turnout Rate'] = convertPercent(list(df['VEP Turnout Rate']))
df['% Non-citizen'] = convertPercent(list(df['% Non-citizen']))

In [9]:
df.head()

Unnamed: 0,State,Total Ballots Counted (Estimate),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv
1,Alabama,2306587,62.6,3683055,3837540,2.3,25898,50997,10266,67782,AL
2,Alaska,367000,69.8,525568,551117,3.4,4293,2074,1348,6927,AK
3,Arizona,3400000,65.5,5189000,5798473,8.9,38520,76844,7536,93699,AZ
4,Arkansas,1212030,55.5,2182375,2331171,3.6,17510,36719,24698,64974,AR
5,California,16800000,64.7,25962648,30783255,15.0,104730,0,102586,207316,CA


In [10]:
# Adding another column indicating which party each state voted for in the election
df['Party'] = ['R', 'R', 'D', 'R', 'D','D','D','D','D','R','D','D','R','D','R','R','R','R','R','D','D','D','D','D','R','R','R','R','D','D','D','D','D','R','R','R','R','D','D','D','R','R','R','R','R','D','D','D','R','D','R',]

In [11]:
df.head()

Unnamed: 0,State,Total Ballots Counted (Estimate),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv,Party
1,Alabama,2306587,62.6,3683055,3837540,2.3,25898,50997,10266,67782,AL,R
2,Alaska,367000,69.8,525568,551117,3.4,4293,2074,1348,6927,AK,R
3,Arizona,3400000,65.5,5189000,5798473,8.9,38520,76844,7536,93699,AZ,D
4,Arkansas,1212030,55.5,2182375,2331171,3.6,17510,36719,24698,64974,AR,R
5,California,16800000,64.7,25962648,30783255,15.0,104730,0,102586,207316,CA,D


What is the average percent of non-citizen residents of each state and the District?

In [12]:
average = df['% Non-citizen'].mean()

d_states = df['Party']=="D"
democrats = df[d_states]

r_states = df['Party']=="R"
republicans = df[r_states]

In [13]:
average

5.443137254901959

In [14]:
democrats['% Non-citizen'].mean()

6.907692307692306

In [15]:
republicans['% Non-citizen'].mean()

3.92

On average, each state and the district had 5.44% of its residents as non-citizens. Democratic-voting states had an average of 6.91% of their populations as non-citizens, and Republican-voting states had an average of 3.92% of their populations as non-citizens.

In [16]:
import plotly.express as px

In [17]:
# Moving rows in the dataset so that when the graphs are autogenerated the Democratic-voting states appear blue and the Republican-voting states appear red.
df = df.loc[3:5].append(df.loc[0:2]).append(df.loc[5:]).reset_index()
df = df.drop([2])
df = df.drop(['index'], axis = 1)
df

Unnamed: 0,State,Total Ballots Counted (Estimate),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv,Party
0,Arizona,3400000,65.5,5189000,5798473,8.9,38520,76844,7536,93699,AZ,D
1,Arkansas,1212030,55.5,2182375,2331171,3.6,17510,36719,24698,64974,AR,R
3,Alabama,2306587,62.6,3683055,3837540,2.3,25898,50997,10266,67782,AL,R
4,Alaska,367000,69.8,525568,551117,3.4,4293,2074,1348,6927,AK,R
5,California,16800000,64.7,25962648,30783255,15.0,104730,0,102586,207316,CA,D
6,Colorado,3295000,76.4,4313054,4595504,5.7,18905,0,0,18905,CO,D
7,Connecticut,1850000,71.1,2603327,2839560,7.7,13268,0,4452,17720,CT,D
8,Delaware,507805,70.5,720531,780822,5.8,5874,14176,350,15013,DE,D
9,District of Columbia,350000,64.7,540685,582065,7.1,0,0,0,0,DC,D
10,Florida,11150000,71.7,15551739,17543341,10.1,91674,205033,4345,223139,FL,R


In [18]:
averageBarChart = px.bar(df, x='State Abv', y='% Non-citizen', color='Party', title='Percent of Non-Citizen Residents by State (and District) and Party')
averageBarChart.show()

In [19]:
scatter = px.scatter(df, x="Voting-Eligible Population (VEP)", y="Total Ineligible Felon", hover_name='State Abv', 
              title='Voting Eligible Population vs Total Ineligible Population', color='Party', trendline='ols')
scatter.show()


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



How does the ratio of the Voting Eligible Population to the Voting Age Population differ between the red and blue states?

In [20]:
# Creating a new column in the dataset to contain the ratio
df['VEP/VAP'] = df['Voting-Eligible Population (VEP)']/df['Voting-Age Population (VAP)']
df.head()

Unnamed: 0,State,Total Ballots Counted (Estimate),VEP Turnout Rate,Voting-Eligible Population (VEP),Voting-Age Population (VAP),% Non-citizen,Prison,Probation,Parole,Total Ineligible Felon,State Abv,Party,VEP/VAP
0,Arizona,3400000,65.5,5189000,5798473,8.9,38520,76844,7536,93699,AZ,D,0.89
1,Arkansas,1212030,55.5,2182375,2331171,3.6,17510,36719,24698,64974,AR,R,0.94
3,Alabama,2306587,62.6,3683055,3837540,2.3,25898,50997,10266,67782,AL,R,0.96
4,Alaska,367000,69.8,525568,551117,3.4,4293,2074,1348,6927,AK,R,0.95
5,California,16800000,64.7,25962648,30783255,15.0,104730,0,102586,207316,CA,D,0.84


In [21]:
d_states = df['Party']=="D"
democrats = df[d_states]

r_states = df['Party']=="R"
republicans = df[r_states]

In [22]:
democrats['VEP/VAP'].mean()

0.9228606005459296

In [23]:
republicans['VEP/VAP'].mean()

0.9468045660854331

In [24]:
bar = px.bar(df, x='State Abv', y='VEP/VAP', color='Party', title='Ratio of Voting Eligible Population to Voting Age Population')
bar.show()

What is the average turnout rate in the states and District by party?

In [25]:
d_states = df['Party']=="D"
democrats = df[d_states]

r_states = df['Party']=="R"
republicans = df[r_states]

In [26]:
democrats['VEP Turnout Rate'].mean()

70.33846153846154

In [27]:
republicans['VEP Turnout Rate'].mean()

65.104

The average voter-eligible population turnout rate in Democratic-voting states was 70.34%, while the turnout rate in Republican-voting states was 65.10%.

In [28]:
turnout_bar = px.bar(df, x='State Abv', y='VEP Turnout Rate', color='Party', title='Voter Eligible Population Turnout Rate by Party (%)')
turnout_bar.show()

In [29]:
d_states = df['Party']=="D"
democrats = df[d_states]

r_states = df['Party']=="R"
republicans = df[r_states]

In [30]:
democrats['Voting-Eligible Population (VEP)'].mean()

5144103.5

In [31]:
republicans['Voting-Eligible Population (VEP)'].mean()

4035447.76

In [32]:
df['Voting-Eligible Population (VEP)'].mean()

4600644.803921568

The average size of the Voter-Eligible Population of all 50 states and the District is 4600645 people, while the average size for the Democratic-voting states is 5144106 and the average size for the Republican-voting states is 4035448 people.

In [33]:
vep_bar = px.bar(df, x='State Abv', y='Voting-Eligible Population (VEP)', color='Party', title='Voting Eligible Population by State')
vep_bar.show()