Permalink
Browse files

Wrote test rig using Delaware data. Run front to back using batch_tes…

…t.sh.
  • Loading branch information...
1 parent 0962f00 commit 4e14a15252c95ad7120e90f133b09ceda228f6c9 cgroskopf committed May 10, 2011
@@ -5,7 +5,7 @@
./fetch_data.sh
./load_pl_geographies_2000.py data/ilgeo2000.csv
-./load_pl_2000.py data/il000012000.csv
+./load_pl_data_2000.py data/il000012000.csv
# ./load_dpsf_geographies_2010.py data/rigeo2010.csv
# ./load_dpsf_data_2010.py data/ri000012010.csv
@@ -15,7 +15,7 @@
# Note: the crosswalk can be computed before the 2010 data is loaded
./load_crosswalk.py data/us2010trf.csv
-./load_pl_2010.py data/il000012010.csv
+./load_pl_data_2010.py data/il000012010.csv
./load_pl_labels_2010.py data/PL2010_Table.csv
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# See batch.sh for notes.
+
+# echo "use census; db.dropDatabase();" | mongo
+
+./fetch_test_data.sh
+
+./load_pl_geographies_2000.py data/degeo2000.csv
+./load_pl_data_2000.py data/pl_data_2000_delaware_1.csv
+./load_pl_data_2000.py data/pl_data_2000_delaware_2.csv
+
+./load_pl_geographies_2010.py data/degeo2010.csv
+
+./load_crosswalk.py data/us2010trf.csv
+./load_pl_data_2010.py data/pl_data_2010_delaware_1.csv
+./load_pl_data_2010.py data/pl_data_2010_delaware_2.csv
+
+./load_pl_labels_2010.py data/pl_2010_data_labels.csv
+
+./crosswalk.py
+./compute_deltas.py
@@ -20,6 +20,10 @@
if 'pct_change' not in geography['data']:
geography['data']['pct_change'] = {}
+ # Skip geographies which did not have data in 2000 (e.g. newly established places)
+ if '2000' not in geography['data']:
+ continue
+
for table in geography['data']['2010']:
# Skip tables not in both years
if table not in geography['data']['2000']:
@@ -48,6 +48,8 @@
if not geography_2000:
print 'Couldn\'t find matching 2000 geography for %s (%s)' % (geography['metadata']['NAME'], geography['geoid'])
+ continue
+
geography['data']['2000'] = geography_2000['data']['2000']
collection.save(geography)
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# NO NATIONAL DATA!!!
+
+rm -r data
+mkdir data
+cd data
+
+# 2000 - DELAWARE - PL1 and PL2
+wget http://www2.census.gov/census_2000/datasets/redistricting_file--pl_94-171/Delaware/de00001.upl.zip
+unzip de00001.upl.zip
+
+wget http://www2.census.gov/census_2000/datasets/redistricting_file--pl_94-171/Delaware/de00002.upl.zip
+unzip de00002.upl.zip
+
+wget http://www2.census.gov/census_2000/datasets/redistricting_file--pl_94-171/Delaware/degeo.upl.zip
+unzip degeo.upl.zip
+
+wget http://www2.census.gov/census_2000/datasets/redistricting_file--pl_94-171/0File_Structure/Access2000/PL2000_Access2000.mdb
+mdb-export PL2000_Access2000.mdb PL_Part1 > pl_data_2000_headers_1.csv
+mdb-export PL2000_Access2000.mdb PL_Part2 > pl_data_2000_headers_2.csv
+mdb-export PL2000_Access2000.mdb tables > pl_2000_data_labels.csv
+
+rm pl_data_2000_delaware_1.csv
+cat pl_data_2000_headers_1.csv > pl_data_2000_delaware_1.csv
+cat de00001.upl >> pl_data_2000_delaware_1.csv
+
+rm pl_data_2000_delaware_2.csv
+cat pl_data_2000_headers_2.csv > pl_data_2000_delaware_2.csv
+cat de00002.upl >> pl_data_2000_delaware_2.csv
+
+in2csv -f fixed -s ../census2000_geo_schema.csv degeo.upl > degeo2000.csv
+
+# 2010 - DELAWARE - PL1 and PL2
+wget http://www2.census.gov/census_2010/redistricting_file--pl_94-171/Delaware/de2010.pl.zip
+unzip de2010.pl.zip
+
+wget http://www2.census.gov/census_2010/redistricting_file--pl_94-171/PL2010_Access2003.mdb
+mdb-export PL2010_Access2003.mdb PL_Part1 >> pl_data_2010_headers_1.csv
+mdb-export PL2010_Access2003.mdb PL_Part2 >> pl_data_2010_headers_2.csv
+mdb-export PL2010_Access2003.mdb Table > pl_2010_data_labels.csv
+
+rm pl_2010_delaware_1.csv
+cat pl_data_2010_headers_1.csv > pl_data_2010_delaware_1.csv
+cat de000012010.pl >> pl_data_2010_delaware_1.csv
+
+rm pl_2010_delaware_2.csv
+cat pl_data_2010_headers_2.csv > pl_data_2010_delaware_2.csv
+cat de000022010.pl >> pl_data_2010_delaware_2.csv
+
+in2csv -f fixed -s ../census2010_geo_schema.csv degeo2010.pl > degeo2010.csv
+
+# Crosswalk
+
+wget http://www.census.gov/geo/www/2010census/tract_rel/trf_txt/us2010trf.txt
+echo "STATE00,COUNTY00,TRACT00,GEOID00,POP00,HU00,PART00,AREA00,AREALAND00,STATE10,COUNTY10,TRACT10,GEOID10,POP10,HU10,PART10,AREA10,AREALAND10,AREAPT,AREALANDPT,AREAPCT00PT,AREALANDPCT00PT,AREAPCT10PT,AREALANDPCT10PT,POP10PT,POPPCT00,POPPCT10,HU10PT,HUPCT00,HUPCT10" > us2010trf.csv
+cat us2010trf.txt >> us2010trf.csv
@@ -43,7 +43,7 @@
tables = {}
for k, v in row_dict.items():
- t = 'PL' + k[3]
+ t = k[0] + k[3]
if t not in tables:
tables[t] = {}
@@ -43,7 +43,7 @@
tables = {}
for k, v in row_dict.items():
- t = 'P' + k[3]
+ t = k[0] + k[3]
if t not in tables:
tables[t] = {}
@@ -99,6 +99,8 @@
'parent': parent
}
+ inserts += 1
+
last_key = key
# Save final table

0 comments on commit 4e14a15

Please sign in to comment.