### Early voting statistics

In [1]:
import json
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
from pandas.io.json import json_normalize
from altair_saver import save
from altair import datum
import altair as alt
import altair_latimes as lat
import re
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 34000
pd.set_option('display.max_colwidth', None)

In [2]:
df = pd.read_html('http://electproject.github.io/Early-Vote-2020G/CA.html', attrs = {'class': 'table'})

In [3]:
df = df[0]

In [4]:
df = df.rename(columns={ 'Party':'party','Returned Ballots':'returned','Freq. Distribution':'electorate_share','Requested Ballots':'ballots', 'Return Rate':'return_rate' })

In [5]:
df[['party', 'ballots', 'returned', 'return_rate', 'electorate_share']]

Unnamed: 0,party,ballots,returned,return_rate,electorate_share
0,Democrats,10167333,5818369,57.2,51.8
1,Republicans,5341515,2668672,50.0,23.8
2,No Party Affiliation/Minor,6524840,2748994,42.1,24.5
3,TOTAL,22033688,11236035,51.0,100.0


In [6]:
mail_ballots_returned20 = df.iloc[(3,1)]

--- 

### How did California vote in the 2016 election?

In [7]:
# !wget -O 'input/03-voter-participation-stats-by-county.pdf' 'https://elections.cdn.sos.ca.gov/sov/2016-general/sov/03-voter-participation-stats-by-county.pdf'

In [8]:
# !PDFtoTEXT -layout 'input/03-voter-participation-stats-by-county.pdf' 'input/03-voter-participation-stats-by-county.txt'

In [9]:
state16 = pd.read_fwf(
    "input/03-voter-participation-stats-by-county.txt",
    sep=" ",
    header=None,
    skiprows=6,
    skipfooter=8,
    colspecs="infer",
    names=[
        "county",
        "precincts",
        "eligible",
        "reg_voters",
        "precinct_voters",
        "mail_voters",
        "total_voters",
        "pct_mail",
        "reg_turnout_pct",
        "eligible_turnout_pct",
    ],
)

In [10]:
int_columns = ['precincts', 'eligible', 'reg_voters', 'precinct_voters',
       'mail_voters', 'total_voters']

In [11]:
state16[int_columns] = state16[int_columns].replace(',', '', regex=True).replace('%', '', regex=True).astype(int)

In [12]:
state16.tail()

Unnamed: 0,county,precincts,eligible,reg_voters,precinct_voters,mail_voters,total_voters,pct_mail,reg_turnout_pct,eligible_turnout_pct
53,Tulare,252,260926,154000,41001,75105,116106,64.69%,75.39%,44.50%
54,Tuolumne,73,41222,31402,5885,20519,26404,77.71%,84.08%,64.05%
55,Ventura,684,550625,442951,144589,218696,363285,60.20%,82.01%,65.98%
56,Yolo,133,147156,111222,35791,48894,84685,57.74%,76.14%,57.55%
57,Yuba,45,48269,33694,8350,15208,23558,64.56%,69.92%,48.81%


--- 

### Aggregate statewide figures for 2016 general

In [13]:
# turnout
state_turnout16 = round(state16['total_voters'].sum() / state16['reg_voters'].sum()*100, 2)
round(state16['total_voters'].sum() / state16['reg_voters'].sum()*100, 2)

75.27

In [14]:
# vbm
state_vbm16 = round(state16['mail_voters'].sum() / state16['total_voters'].sum()*100, 2)
round(state16['mail_voters'].sum() / state16['total_voters'].sum()*100, 2)

57.79

In [15]:
state_voters16 = state16.total_voters.sum()
state16.total_voters.sum()

14610509

In [16]:
state_vvbm_total16 = state16.mail_voters.sum()
state16.mail_voters.sum()

8443594

---

## How does 2020 VBM turnout compare to 2016?

### What's the pct of mail ballots in 2020 (so far) vs. the mail total for all of 2016?

In [21]:
round((mail_ballots_returned20 / state_vvbm_total16)*100,2)

133.07

In [22]:
state_vvbm_total16

8443594

In [23]:
mail_ballots_returned20

11236035

### What's the pct of mail ballots in 2020 (so far) vs. the total for all of 2016?

In [18]:
round((mail_ballots_returned20 / state_voters16)*100,2)

76.9