/
load_precinct.sql
176 lines (159 loc) · 6.02 KB
/
load_precinct.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
-- End-to-end script to load precinct data, openaddresses data, and pull voter list for a precinct
-- https://s3.amazonaws.com/dl.ncsbe.gov/data/ncvoter32.zip Durham is district 32.
-- curl https://s3.amazonaws.com/dl.ncsbe.gov/data/ncvoter32.zip > ncvoter32.zip
-- unzip ncvoter32.zip
-- convert the file: iconv -f UTF-8 -t UTF-8 -c ncvoter32.txt > ncvoter32_feb10.tsv
drop table if exists voters_feb_10;
create table voters_feb_10 (
county_id SMALLINT,
county VARCHAR(255),
voter_id VARCHAR(55),
status VARCHAR(10),
status_desc VARCHAR(55),
reason VARCHAR(10),
reason_desc VARCHAR(255),
absentee_indication VARCHAR(10),
name_prefix_code VARCHAR(10),
last_name VARCHAR(55),
first_name VARCHAR(55),
middle_name VARCHAR(55),
name_suffix_label VARCHAR(10),
residential_address VARCHAR(255),
residential_city VARCHAR(55),
residential_state VARCHAR(3),
residential_zip_code VARCHAR(12),
mailing_address_1 VARCHAR(255),
mailing_address_2 VARCHAR(255),
mailing_address_3 VARCHAR(255),
mailing_address_4 VARCHAR(255),
mailing_address_city VARCHAR(255),
mailing_address_state VARCHAR(3),
mailing_address_zip_code VARCHAR(12),
full_phone_number VARCHAR(16),
race_code VARCHAR(10),
ethnic_code VARCHAR(10),
party_code VARCHAR(10),
gender_code VARCHAR(10),
birth_age SMALLINT,
birth_state VARCHAR(3),
drivers_license_id VARCHAR(20),
registration_date DATE,
precinct_abbreviation VARCHAR(10),
precinct_description VARCHAR(55),
municipality_abbreviation VARCHAR(10),
municipality_full VARCHAR(55),
ward_abbreviation VARCHAR(10),
ward_full VARCHAR(55),
congressional_district_abbreviation VARCHAR(10),
supreme_court_district_abbreviation VARCHAR(10),
judicial_district_abbreviation VARCHAR(10),
nc_senate_district_abbreviation VARCHAR(10),
nc_house_district_abbreviation VARCHAR(10),
county_commissioner_abbreviation VARCHAR(10),
county_commissioner_full VARCHAR(55),
township_abbreviation VARCHAR(10),
township_full VARCHAR(55),
school_district_abbrev VARCHAR(10), --- "school_dist_abbrv"
school_district_full VARCHAR(55), --- "school_dist_desc"
fire_district_abbrev VARCHAR(10), --- "fire_dist_abbrv"
fire_district_full VARCHAR(55), --- "fire_dist_desc"
water_district_abbrev VARCHAR(10), --- "water_dist_abbrv"
water_district_full VARCHAR(55), --- "water_dist_desc"
sewer_district_abbrev VARCHAR(10), --- "sewer_dist_abbrv"
sewer_district_full VARCHAR(55), --- "sewer_dist_desc"
sanitation_district_abbrev VARCHAR(10), --- "sanit_dist_abbrv"
sanitation_district_full VARCHAR(55), --- "sanit_dist_desc"
rescue_district_abbrev VARCHAR(10), --- "rescue_dist_abbrv"
rescue_district_full VARCHAR(55), --- "rescue_dist_desc"
municipal_district_abbrev VARCHAR(10), --- "munic_dist_abbrv"
municipal_district_full VARCHAR(55), --- "munic_dist_desc"
district_1_abbrev VARCHAR(10), --- "dist_1_abbrv"
district_1_full VARCHAR(55), --- "dist_1_desc"
district_2_abbrev VARCHAR(10), --- "dist_2_abbrv"
district_2_full VARCHAR(55), --- "dist_2_desc"
confidential_id VARCHAR(20), --- "confidential_ind"
age_string VARCHAR(55), --- "age"
ncid VARCHAR(10), --- "ncid"
vtd_abbrev VARCHAR(10), --- "vtd_abbrv"
vtd_full VARCHAR(55) --- "vtd_desc"
);
COPY voters_feb_10 from '/Users/dave/cfd/ncvoter32_feb10.tsv' with csv delimiter ' ' header;
-- Now we have all the recent voter records. Now get the OpenAddresses data:
-- http://results.openaddresses.io/sources/us/nc/durham
-- https://s3.amazonaws.com/data.openaddresses.io/runs/155716/us/nc/durham.zip
-- unzip durham.zip
-- cp us/nc/durham.csv ./
DROP TABLE IF EXISTS openaddresses_feb_10;
CREATE TABLE openaddresses_feb_10 (
gps_lat FLOAT,
gps_long FLOAT,
address_number VARCHAR(15),
address_street VARCHAR(255),
address_suffix VARCHAR(15),
city_abbreviated VARCHAR(255),
district VARCHAR(15),
region VARCHAR(15),
zip VARCHAR(15),
id VARCHAR(55),
hash VARCHAR(16)
);
COPY openaddresses_feb_10 from '/Users/dave/cfd/durham.csv' with csv header;
-- Modify openaddresses to remove apartments and non-integer nonsense
DROP VIEW IF EXISTS openaddresses_feb_10_no_apartments;
CREATE VIEW openaddresses_feb_10_no_apartments AS
SELECT distinct(address_number || ' ' || address_street) address_full_no_apartment,
regexp_replace(address_number, '\s+.*$', '')::integer address_number_cleaned,
address_street address_street_cleaned
FROM openaddresses_feb_10;
-- Create precinct 39 view
DROP VIEW IF EXISTS voters_p39;
CREATE VIEW voters_p39 AS (
SELECT
voters_feb_10.*,
regexp_replace(
regexp_replace(
regexp_replace(
residential_address, '\s+\#.+$', ''),
'^\d+\s+', ''),
'\s+$', '') street_name_cleaned,
regexp_replace(
left(residential_address, position(' ' in residential_address)),
'[a-zA-Z]+\s*$', '')::integer street_number_cleaned,
regexp_replace(
regexp_replace(residential_address, '\s+\#.+$', ''),
'\s+$', '') residential_address_no_apartment
FROM voters_feb_10
WHERE precinct_abbreviation = '39'
);
-- Find bounding addresses for precinct streets
CREATE VIEW p39_bounds AS (
SELECT min(street_number_cleaned), max(street_number_cleaned), street_name_cleaned
FROM voters_p39
GROUP BY 3 ORDER BY 3 ASC);
-- All OpenAddresses within p39 bounds
DROP TABLE IF EXISTS openaddresses_p39;
CREATE TABLE openaddresses_p39 AS (
SELECT address_full_no_apartment precinct_39_address,
address_number_cleaned,
address_street_cleaned
FROM
openaddresses_feb_10_no_apartments oa
RIGHT JOIN
p39_bounds
ON
oa.address_street_cleaned = p39_bounds.street_name_cleaned
AND address_number_cleaned <= p39_bounds.max
AND address_number_cleaned >= p39_bounds.min
);
-- Now, join to get all addresses with voter count
SELECT
precinct_39_address,
address_number_cleaned,
address_street_cleaned,
count(voters.*)
FROM
openaddresses_p39
LEFT JOIN
(select * from voters_p39 where status_desc = 'ACTIVE') voters
ON openaddresses_p39.precinct_39_address = regexp_replace(voters.p2_address, '\s+$', '')
GROUP BY 1, 2, 3 ORDER BY 3,2;