Skip to content

Commit

Permalink
Drop year field from election model, hard code more special election …
Browse files Browse the repository at this point in the history
…dates #32, fix loading of prop model
  • Loading branch information
gordonje committed Dec 20, 2016
1 parent aaa257e commit 98bf5b8
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 223 deletions.
24 changes: 24 additions & 0 deletions calaccess_processed/migrations/0015_auto_20161220_2212.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.3 on 2016-12-20 22:12
from __future__ import unicode_literals

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('calaccess_processed', '0014_auto_20161216_0534'),
]

operations = [
migrations.RemoveField(
model_name='election',
name='year',
),
migrations.AlterField(
model_name='election',
name='election_date',
field=models.DateField(help_text='Date of the election', verbose_name='election date'),
),
]
5 changes: 0 additions & 5 deletions calaccess_processed/models/campaign/elections.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ class Election(models.Model):
Derived from distinct year and type combinations in CandidateScrapedElection.
"""
year = models.IntegerField(
verbose_name="election year",
help_text="Election year",
)
ELECTION_TYPE_CHOICES = (
('P', 'Primary'),
('G', 'General'),
Expand Down Expand Up @@ -53,7 +49,6 @@ class Election(models.Model):
)
election_date = models.DateField(
verbose_name="election date",
null=True,
help_text='Date of the election',
)

Expand Down
296 changes: 96 additions & 200 deletions calaccess_processed/sql/load_election_model.sql
Original file line number Diff line number Diff line change
@@ -1,209 +1,105 @@
-- first, insert primary and general elections
-- by joining candidate elections to ballot proposition elections
INSERT INTO calaccess_processed_election (
year,
election_type,
office,
district,
election_date
)
election_type,
office,
district,
election_date
)
SELECT
cand.election_type,
cand.office,
cand.district,
prop.election_date
FROM (
SELECT
-- extract the four-digit year, cast as an int
substring(cand.name from '\d{4}')::INT AS year,
substring(name from '\d{4}')::INT AS year,
CASE
WHEN cand.name LIKE '%PRIMARY%' THEN 'P'
WHEN cand.name LIKE '%GENERAL%' THEN 'G'
WHEN cand.name LIKE '%RECALL%' THEN 'R'
WHEN cand.name LIKE '%SPECIAL ELECTION%' THEN 'SE'
WHEN cand.name LIKE '%SPECIAL RUNOFF%' THEN 'SR'
WHEN name LIKE '%PRIMARY%' THEN 'P'
WHEN name LIKE '%GENERAL%' THEN 'G'
WHEN name LIKE '%RECALL%' THEN 'R'
ELSE NULL
END AS election_type,
CASE
WHEN cand.name LIKE '%ASSEMBLY%' THEN 'ASM'
WHEN cand.name LIKE '%STATE SENATE%' THEN 'SEN'
WHEN cand.name LIKE '%GOVERNOR%' THEN 'GOV'
WHEN name LIKE '%ASSEMBLY%' THEN 'ASM'
WHEN name LIKE '%STATE SENATE%' THEN 'SEN'
WHEN name LIKE '%GOVERNOR%' THEN 'GOV'
ELSE NULL
END AS office,
-- extract the two digit chars found in the parenthesis, if any
substring(cand.name from '^\d{4}\s.+\(.+(\d{2})\)$')::INT AS district,
-- this field is populate by the update statement below
NULL as election_date
FROM calaccess_processed_candidatescrapedelection AS cand;

-- hardcode this election date because for some reason the CAL-ACCESS ballot props
-- page has two "primaries" for 2008
UPDATE calaccess_processed_election
SET election_date = '2008-2-5'
WHERE election_type = 'P'
AND year = 2008;

-- there was only one special election in the years 2003 and 2005
UPDATE calaccess_processed_election AS elec
SET election_date = prop.election_date
FROM (
SELECT substring(name from '\d{4}')::INT AS year,
substring(name from '\d{4}\s([A-Z])') AS election_type,
(regexp_matches(name, '^([A-Z]+\s\d{1,2},\s\d{4})\s.+$'))[1]::DATE AS election_date
FROM calaccess_processed_propositionscrapedelection
WHERE name LIKE '%2003%'
OR name LIKE '%2005%'
) AS prop
WHERE elec.year = prop.year
AND elec.election_type = 'SE';

-- these all come from here: http://www.sos.ca.gov/elections/prior-elections/special-elections/
UPDATE calaccess_processed_election
SET election_date = '2016-4-5'
WHERE year = 2016
AND election_type = 'SE'
AND office = 'ASM'
AND district = 31;

UPDATE calaccess_processed_election
SET election_date = '2015-3-17'
WHERE year = 2015
AND election_type = 'SE'
AND office = 'SEN'
AND district = 37;

UPDATE calaccess_processed_election
SET election_date = '2015-3-17'
WHERE year = 2015
AND election_type = 'SE'
AND office = 'SEN'
AND district = 21;

UPDATE calaccess_processed_election
SET election_date = '2014-3-25'
WHERE year = 2014
AND election_type = 'SE'
AND office = 'SEN'
AND district = 23;

UPDATE calaccess_processed_election
SET election_date = '2014-12-9'
WHERE year = 2014
AND election_type = 'SE'
AND office = 'SEN'
AND district = 35;

UPDATE calaccess_processed_election
SET election_date = '2013-12-3'
WHERE year = 2014
AND election_type = 'SE'
AND office = 'ASM'
AND district = 54;

UPDATE calaccess_processed_election
SET election_date = '2013-9-17'
WHERE year = 2013
AND election_type = 'SE'
AND office = 'SEN'
AND district = 26;

UPDATE calaccess_processed_election
SET election_date = '2013-5-21'
WHERE year = 2013
AND election_type = 'SE'
AND office = 'ASM'
AND district = 80;

UPDATE calaccess_processed_election
SET election_date = '2013-3-12'
WHERE year = 2013
AND election_type = 'SE'
AND office = 'SEN'
AND district = 40;

UPDATE calaccess_processed_election
SET election_date = '2013-1-8'
WHERE year = 2013
AND election_type = 'SE'
AND office = 'SEN'
AND district = 4;

UPDATE calaccess_processed_election
SET election_date = '2012-11-6'
WHERE year = 2012
AND election_type = 'SE'
AND office = 'SEN'
AND district = 4;

UPDATE calaccess_processed_election
SET election_date = '2011-2-15'
WHERE year = 2011
AND election_type = 'SE'
AND office = 'SEN'
AND district = 17;

UPDATE calaccess_processed_election
SET election_date = '2011-2-15'
WHERE year = 2011
AND election_type = 'SE'
AND office = 'SEN'
AND district = 28;

UPDATE calaccess_processed_election
SET election_date = '2011-1-4'
WHERE year = 2011
AND election_type = 'SR'
AND office = 'SEN'
AND district = 1;

UPDATE calaccess_processed_election
SET election_date = '2010-1-12'
WHERE year = 2010
AND election_type = 'SR'
AND office = 'ASM'
AND district = 72;

UPDATE calaccess_processed_election
SET election_date = '2010-11-2'
WHERE year = 2010
AND election_type = 'SE'
AND office = 'SEN'
AND district = 1;

UPDATE calaccess_processed_election
SET election_date = '2009-9-1'
WHERE year = 2009
AND election_type = 'SE'
AND office = 'ASM'
AND district = 51;

UPDATE calaccess_processed_election
SET election_date = '2009-11-17'
WHERE year = 2009
AND election_type = 'SE'
AND office = 'ASM'
AND district = 72;

UPDATE calaccess_processed_election
SET election_date = '2008-2-5'
WHERE year = 2008
AND election_type = 'SR'
AND office = 'ASM'
AND district = 55;

UPDATE calaccess_processed_election
SET election_date = '2007-5-15'
WHERE year = 2007
AND election_type = 'SE'
AND office = 'ASM'
AND district = 39;

UPDATE calaccess_processed_election
SET election_date = '2007-12-11'
WHERE year = 2007
AND election_type = 'SE'
AND office = 'ASM'
AND district = 55;

-- then populate the election_date field for prop elections that match cand elections
UPDATE calaccess_processed_election as elec
SET election_date = (regexp_matches(prop.name, '^([A-Z]+\s\d{1,2},\s\d{4})\s.+$'))[1]::DATE
FROM calaccess_processed_propositionscrapedelection AS prop
WHERE elec.year = substring(name from '\d{4}')::INT
AND elec.election_type = substring(name from '\d{4}\s([A-Z])')
-- exclude special elections from match b/c we can't be sure these all
-- occurred on the same date
AND prop.name NOT LIKE '%SPECIAL%'
AND elec.election_type NOT IN ('SE', 'SR')
AND elec.election_date IS NULL;
substring(name from '^\d{4}\s.+\(.+(\d{2})\)$')::INT AS district
FROM calaccess_processed_candidatescrapedelection
WHERE name NOT LIKE '%SPECIAL%'
) AS cand
JOIN (
SELECT
substring(name from '\d{4}')::INT AS year,
substring(name from '\d{4}\s([A-Z])') AS election_type,
(regexp_matches(name, '^([A-Z]+\s\d{1,2},\s\d{4})\s.+$'))[1]::DATE AS election_date
FROM calaccess_processed_propositionscrapedelection
WHERE name NOT LIKE '%SPECIAL%'
-- for some reason the CAL-ACCESS ballot props page has two "primary"
-- elections for 2008, so exclude this one
AND name <> 'JUNE 3, 2008 PRIMARY'
) AS prop
ON cand.year = prop.year
AND cand.election_type = prop.election_type;

-- then insert all the special elections
INSERT INTO calaccess_processed_election (
election_type,
office,
district,
election_date
) VALUES
('SE', 'ASM', 31, '2016-4-5'),
('SR', 'SEN', 7, '2015-5-19'),
('SE', 'SEN', 7, '2015-3-17'),
('SE', 'SEN', 21, '2015-3-17'),
('SE', 'SEN', 37, '2015-3-17'),
('SE', 'SEN', 35, '2014-12-9'),
('SE', 'SEN', 23, '2014-3-25'),
('SE', 'ASM', 54, '2013-12-3'),
('SR', 'ASM', 45, '2013-11-19'),
('SE', 'ASM', 45, '2013-9-17'),
('SE', 'SEN', 26, '2013-9-17'),
('SR', 'ASM', 52, '2013-9-24'),
('SE', 'ASM', 52, '2013-7-23'),
('SE', 'ASM', 80, '2013-5-21'),
('SR', 'SEN', 16, '2013-7-23'),
('SE', 'SEN', 16, '2013-5-21'),
('SR', 'SEN', 32, '2013-5-14'),
('SE', 'SEN', 32, '2013-3-12'),
('SE', 'SEN', 40, '2013-3-12'),
('SE', 'SEN', 4, '2013-1-8'),
('SE', 'SEN', 4, '2012-11-6'),
('SR', 'ASM', 4, '2011-5-3'),
('SE', 'ASM', 4, '2011-3-8'),
('SE', 'SEN', 17, '2011-2-15'),
('SE', 'SEN', 28, '2011-2-15'),
('SR', 'SEN', 1, '2011-1-4'),
('SE', 'SEN', 1, '2010-11-2'),
('SR', 'SEN', 15, '2010-8-17'),
('SE', 'SEN', 15, '2010-6-22'),
('SR', 'ASM', 43, '2010-6-8'),
('SE', 'ASM', 43, '2010-4-13'),
('SR', 'SEN', 37, '2010-6-8'),
('SE', 'SEN', 37, '2010-4-13'),
('SR', 'ASM', 72, '2010-1-12'),
('SE', 'ASM', 72, '2009-11-17'),
('SE', 'ASM', 51, '2009-9-1'),
('SR', 'SEN', 26, '2009-5-19'),
('SE', 'SEN', 26, '2009-3-24'),
('SR', 'ASM', 55, '2008-2-5'),
('SE', 'ASM', 55, '2007-12-11'),
('SE', 'ASM', 39, '2007-5-15'),
('SR', 'SEN', 35, '2006-6-6'),
('SE', 'SEN', 35, '2006-4-11'),
('SE', NULL, NULL, '2005-11-8'),
('SE', 'ASM', 53, '2005-9-13'),
('SE', 'GOV', NULL, '2003-10-7'),
('SE', 'ASM', 49, '2001-5-15'),
('SE', 'SEN', 24, '2001-3-26'),
('SR', 'ASM', 65, '2001-2-6'),
('SE', 'ASM', 65, '2001-4-3');
44 changes: 26 additions & 18 deletions calaccess_processed/sql/load_proposition_model.sql
Original file line number Diff line number Diff line change
@@ -1,21 +1,29 @@
INSERT INTO calaccess_processed_proposition (
id,
name,
election_id
)
id,
name,
election_id
)
SELECT
scraped.scraped_id::INTEGER AS id,
scraped.name AS name,
election.id AS election_id
FROM calaccess_processed_scrapedproposition AS scraped
JOIN (
SELECT id,
substring(name from '\d{4}\s([A-Z])') AS election_type,
(regexp_matches(name, '^([A-Z]+\s\d{1,2},\s\d{4})\s.+$'))[1]::DATE as election_date
FROM calaccess_processed_propositionscrapedelection
) AS scrapedelection
ON scraped.election_id = scrapedelection.id
JOIN calaccess_processed_election AS election
ON left(election.election_type, 1) = scrapedelection.election_type
AND election.election_date = scrapedelection.election_date
scraped.scraped_id::INTEGER AS id,
scraped.name AS name,
election.id AS election_id
FROM calaccess_processed_scrapedproposition AS scraped
JOIN (
SELECT
id,
-- some of the ballot measure elections on June 3, 2008, are labeled as
-- "PRIMARY", while the rest have "RECALL", because there was also a recall of
-- State Senate District 12 we're going to treat all the ballot measure
-- elections that happened on June 3, 2008, as the same election
CASE name
WHEN 'JUNE 3, 2008 PRIMARY' THEN 'R'
ELSE substring(name from '\d{4}\s([A-Z])')
END AS election_type,
(regexp_matches(name, '^([A-Z]+\s\d{1,2},\s\d{4})\s.+$'))[1]::DATE as election_date
FROM calaccess_processed_propositionscrapedelection
) AS scrapedelection
ON scraped.election_id = scrapedelection.id
JOIN calaccess_processed_election AS election
ON left(election.election_type, 1) = scrapedelection.election_type
AND election.election_date = scrapedelection.election_date
ORDER BY id;

0 comments on commit 98bf5b8

Please sign in to comment.