### Election Date Project

#### Generating General Election Dates

In [840]:
cd projects/election-date-project/archive

[Errno 2] No such file or directory: 'projects/election-date-project/archive'
/Users/dp/projects/election-date-project


In [808]:
import datetime
import pandas as pd

In [809]:
states = pd.read_csv('states.csv')
set_state_name(states)
states.loc[:, 'state_abbr'] = states.state_abbr.str.strip(' ')

In [810]:
def set_date_col(df):
    df.columns = ['state_name', 'date']
    
    clean_dates = df.date.str.extract('(\s{0,1}\d{1,2}\/\d{1,2}\/\d{4})(.*)', expand=True)
    
    df.loc[:, 'date'] = clean_dates.iloc[:, 0]
    df.loc[:, 'date'] = pd.to_datetime(df.date, format='%m/%d/%Y')
    
    df.loc[:, 'etc'] = clean_dates.iloc[:, 1]
    
def set_election_type(df, estr):
    df.loc[:, 'election_type'] = estr
    
def set_other_cols(df):
    df.loc[:, 'party'] = ''
    df.loc[:, 'govt_type'] = ''
    df.loc[:, 'election_key'] = ''
    
def set_state_name(df):
    df.loc[:, 'state_name'] = df.state_name.str.strip().str.lower()
    df.loc[df.state_name == 'd.c.', 'state_name'] = 'district of columbia'
    df.loc[df.state_name == 'n. mariana islands', 'state_name'] = 'northern mariana islands'

In [811]:
current_year = datetime.datetime.today().year - 1
closest_year = current_year + (current_year % 2)

year = datetime.date(closest_year, 1, 1)

general_election_dates = (pd.date_range(end=year, periods=20, freq='2Y') 
                          + (11 * pd.offsets.MonthBegin()) 
                          + (1 * pd.offsets.WeekOfMonth(0, weekday=1)))

gen = pd.DataFrame(general_election_dates, columns=['date'])

set_election_type(gen, 'g')
set_other_cols(gen)
gen.loc[:, 'party'] = 'np'
gen.loc[:, 'govt_type'] = 'congress'
gen.head()



Unnamed: 0,date,election_type,party,govt_type,election_key
0,1982-11-02,g,np,congress,
1,1984-11-06,g,np,congress,
2,1986-11-04,g,np,congress,
3,1988-11-01,g,np,congress,
4,1990-11-06,g,np,congress,


In [812]:
closest_year = current_year + (current_year % 4)
pres_election_dates = (pd.date_range(end=year, periods=10, freq='4Y') 
                          + (11 * pd.offsets.MonthBegin()) 
                          + (1 * pd.offsets.WeekOfMonth(0, weekday=1)))

gen_pres = pd.DataFrame(pres_election_dates, columns=['date'])
set_election_type(gen_pres, 'g')
set_other_cols(gen_pres)
gen_pres.loc[:, 'party'] = 'np'
gen_pres.loc[:, 'govt_type'] = 'president'



In [813]:
congressional_file = 'primary-dates/congressional-primary-dates.csv'
presidential_file = 'primary-dates/presidential-primary-dates.csv'

In [814]:
congr = pd.read_csv(congressional_file, skipinitialspace=True, na_values=['', ' '])
pres = pd.read_csv(presidential_file, skipinitialspace=True, na_values=['', ' '])

In [815]:
congr_primary = congr.iloc[:, [0,1]].dropna()
congr_runoff = congr.iloc[:, [0,2]].dropna()
pres_primary = pres.iloc[:, [0,1]].dropna()
pres_cauc = pres.iloc[:, [0,2]].dropna()

In [816]:
primary_tables = [congr_primary, congr_runoff, pres_primary, pres_cauc]
election_strs = ['p', 'pr', 'pp', 'pc']

for k, v in zip(primary_tables, election_strs):
    set_date_col(k)
    set_election_type(k, v)
    set_other_cols(k)
    set_state_name(k)

In [817]:
# handling of congressional primaries
congr_primary.loc[:, 'govt_type'] = congr_primary.etc.str.lower().str.extract('(house|senate)', expand=False).fillna('congress')
congr_primary.loc[:, 'party'] = 'np'
congr_primary = congr_primary.drop('etc', axis=1)
congr_primary.head()

Unnamed: 0,state_name,date,election_type,party,govt_type,election_key
0,puerto rico,2000-11-09,p,np,congress,
1,california,2000-03-02,p,np,congress,
2,maryland,2000-03-02,p,np,congress,
3,ohio,2000-03-02,p,np,congress,
4,mississippi,2000-03-09,p,np,congress,


In [818]:
# handling of congressional runoff
congr_runoff.loc[:, 'govt_type'] = congr_runoff.etc.str.lower().str.extract('(house|senate)', expand=False).fillna('congress')
congr_runoff.loc[:, 'party'] = 'np'
congr_runoff = congr_runoff.drop('etc', axis=1)
congr_runoff.head()

Unnamed: 0,state_name,date,election_type,party,govt_type,election_key
4,mississippi,2000-03-30,pr,np,congress,
5,texas,2000-04-13,pr,np,congress,
11,arkansas,2000-06-08,pr,np,congress,
15,alabama,2000-06-29,pr,np,congress,
17,south dakota,2000-06-15,pr,np,congress,


In [819]:
# handling of presidential primaries
pres_primary.loc[:, 'party'] = pres_primary.etc.str.lower().str.extract('(r|d)', expand=False).map({'d': 'democratic', 
                                                                                                    'r': 'republican'}).fillna('np')
pres_primary.loc[:, 'govt_type'] = 'president'
pres_primary = pres_primary.drop('etc', axis=1)
pres_primary.head()

Unnamed: 0,state_name,date,election_type,party,govt_type,election_key
0,alabama,2000-06-06,pp,np,president,
5,arizona,2000-02-22,pp,np,president,
6,arizona,2000-03-11,pp,democratic,president,
7,arkansas,2000-05-23,pp,np,president,
8,california,2000-03-07,pp,np,president,


In [820]:
# handling of presidential caucuses 
cauc_party = pres_cauc.etc.str.lower().str.extract('(r|d)(.+)(c)')
pres_cauc.loc[:, 'party'] = cauc_party.iloc[:, 0].map({'d': 'democratic', 'r': 'republican'}).fillna('np')
pres_cauc.loc[:, 'election_type'] = pres_cauc.election_type + cauc_party.iloc[:, 2].fillna('')
pres_cauc.loc[:, 'govt_type'] = 'president'
pres_cauc = pres_cauc.drop('etc', axis=1)
pres_cauc.head()

Unnamed: 0,state_name,date,election_type,party,govt_type,election_key
1,alaska,2000-05-19,pcc,republican,president,
2,alaska,2000-05-20,pcc,democratic,president,
3,american samoa,2000-02-26,pc,np,president,
4,american samoa,2000-03-07,pc,np,president,
16,guam,2000-02-26,pc,np,president,


In [821]:
dfs = []
for r in gen.iterrows():
    row = r[1]
    df = states.copy()
    df.loc[:, 'date'] = row.date
    df.loc[:, 'party'] = row.party
    df.loc[:, 'election_type'] = row.election_type
    df.loc[:, 'govt_type'] = 'congress'
    df.loc[:, 'election_key'] = ''
    dfs.append(df)
 
gen = pd.concat(dfs)

In [822]:
dfs = []
for r in gen_pres.iterrows():
    row = r[1]
    df = states.copy()
    df.loc[:, 'date'] = row.date
    df.loc[:, 'party'] = row.party
    df.loc[:, 'election_type'] = row.election_type
    df.loc[:, 'govt_type'] = row.govt_type
    df.loc[:, 'election_key'] = ''
    dfs.append(df)
    
gen_pres = pd.concat(dfs)

In [823]:
gen = pd.concat([gen, gen_pres], ignore_index=True)

In [824]:
dfs = []
for df in primary_tables:
    df_join = df.join(states.set_index('state_name'), on='state_name', how='left')
    dfs.append(df_join)

In [825]:
prim = pd.concat(dfs).drop('etc', axis=1)

In [826]:
import yaml
with open('metadata.yaml') as f:
    metadata = yaml.load(f, Loader=yaml.FullLoader)

In [827]:
elections = pd.concat([prim, gen])

In [828]:
elections.loc[:, 'election_key'] = elections.date.dt.strftime('%Y-%m-%d-') + elections.election_type + '-' + elections.state_abbr

In [829]:
elections.to_csv('elections.csv', index=False)

In [835]:
elections.loc[elections.election_type.isin(['p', 'pp', 'pc', 'pr', 'pcc']), :].to_csv('primary-dates/primaries.csv')

In [838]:
elections.loc[~elections.election_type.isin(['p', 'pp', 'pc', 'pr', 'pcc']), :
             ].to_csv('general-dates/general.csv', index=False)