Permalink
Browse files

Added optional support for loading blocks. (Not finished vetting yet.)

  • Loading branch information...
1 parent 7a8a34a commit af9775c7eb3ae74876ab782c5425769c53bfc1d9 @onyxfish onyxfish committed Jul 14, 2011
@@ -22,14 +22,14 @@ echo 'Ensuring mongo indexes.'
./ensure_indexes.sh
echo 'Fetching data'
-./fetch_sf_data_2000.sh "$STATE_NAME_SPACE_FIXED" "$STATE_NAME_LOWER" "$STATE_NAME_ABBR"
-./fetch_sf_data_2010.sh "$STATE_NAME_SPACE_FIXED" "$STATE_NAME_LOWER" "$STATE_NAME_ABBR"
+#./fetch_sf_data_2000.sh "$STATE_NAME_SPACE_FIXED" "$STATE_NAME_LOWER" "$STATE_NAME_ABBR" "$STATE_FIPS"
+#./fetch_sf_data_2010.sh "$STATE_NAME_SPACE_FIXED" "$STATE_NAME_LOWER" "$STATE_NAME_ABBR"
echo 'Loading 2000 geographies'
./load_sf_geographies_2000.py data/${STATE_NAME_ABBR}geo2000.csv || exit $?
echo 'Loading 2000 data'
-for i in {1..39}
+for i in {1..1}
do
./load_sf_data_2000.py data/sf_data_2000_${STATE_NAME_LOWER}_$i.csv
done
@@ -52,10 +52,11 @@ if [ "$FAKE" = "FAKE" ]; then
./load_crosswalk.py $STATE_FIPS $FAKE || exit $?
else
./load_crosswalk.py $STATE_FIPS data/us2010trf.csv || exit $?
+ ./load_crosswalk_blocks.py $STATE_FIPS data/TAB2000_TAB2010_ST_${STATE_FIPS}_v2.txt || exit $?
fi
echo 'Loading 2010 data'
-for i in {1..47}
+for i in {1..1}
do
if [ "$FAKE" = "FAKE" ]; then
# Load 2000 data as 2010 for testing
@@ -54,10 +54,12 @@
data = {}
- # TRACTS - require true crosswalk
- if geography['sumlev'] == config.SUMLEV_TRACT:
+ # TRACTS & BLOCKS - require true crosswalk
+ if geography['sumlev'] in [config.SUMLEV_TRACT, config.SUMLEV_BLOCK]:
geography_2000s = list(utils.find_geographies_for_xwalk(collection_2000, geography, fields=['data', 'geoid']))
+ print geography['geoid']
+
# Tract is new
if not geography_2000s:
continue
@@ -3,6 +3,7 @@
STATE_NAME=$1
STATE_NAME_LOWER=$2
STATE_NAME_ABBR=$3
+STATE_FIPS=$4
DATAPROCESSING_DIR=`pwd`
@@ -46,3 +47,6 @@ in2csv -e "latin-1" -f fixed -s ${DATAPROCESSING_DIR}/census2000_geo_schema.csv
wget http://www.census.gov/geo/www/2010census/tract_rel/trf_txt/us2010trf.txt
echo "STATE00,COUNTY00,TRACT00,GEOID00,POP00,HU00,PART00,AREA00,AREALAND00,STATE10,COUNTY10,TRACT10,GEOID10,POP10,HU10,PART10,AREA10,AREALAND10,AREAPT,AREALANDPT,AREAPCT00PT,AREALANDPCT00PT,AREAPCT10PT,AREALANDPCT10PT,POP10PT,POPPCT00,POPPCT10,HU10PT,HUPCT00,HUPCT10" > us2010trf.csv
cat us2010trf.txt >> us2010trf.csv
+
+wget http://www.census.gov/geo/www/2010census/t00t10/TAB2000_TAB2010_ST_${STATE_FIPS}_v2.zip
+unzip TAB2000_TAB2010_ST_${STATE_FIPS}_v2.zip
@@ -5,7 +5,6 @@
from csvkit.unicsv import UnicodeCSVReader
from pymongo import objectid
-import config
import utils
if len(sys.argv) < 2:
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+
+import sys
+
+from csvkit.unicsv import UnicodeCSVReader
+from pymongo import objectid
+
+import config
+import utils
+
+if len(sys.argv) < 2:
+ sys.exit('You must provide a state fips code and the filename of a CSV as an argument to this script.')
+
+STATE_FIPS = sys.argv[1]
+FILENAME = sys.argv[2]
+
+collection = utils.get_geography_collection()
+
+inserts = 0
+row_count = 0
+
+if config.SUMLEV_BLOCK not in config.SUMLEVS:
+ print 'Skipping block crosswalk.'
+ sys.exit()
+
+with open(FILENAME) as f:
+ rows = UnicodeCSVReader(f)
+ headers = rows.next()
+
+ for row in rows:
+ row_count += 1
+ row_dict = dict(zip(headers, row))
+
+ geoid00 = ''.join([
+ row_dict['STATE_2000'].rjust(2, '0'),
+ row_dict['COUNTY_2000'].rjust(3, '0'),
+ row_dict['TRACT_2000'].rjust(6, '0'),
+ row_dict['BLK_2000'].rjust(4, '0')
+ ])
+ geoid10 = ''.join([
+ row_dict['STATE_2010'].rjust(2, '0'),
+ row_dict['COUNTY_2010'].rjust(3, '0'),
+ row_dict['TRACT_2010'].rjust(6, '0'),
+ row_dict['BLK_2010'].rjust(4, '0')
+ ])
+
+ geography = collection.find_one({ 'geoid': geoid10 }, fields=['xwalk'])
+
+ if not geography:
+ continue
+
+ if row_dict['AREALAND_INT'] == '0':
+ pct = 0
+ else:
+ pct = float(row_dict['AREALAND_INT']) / float(row_dict['AREALAND_2000'])
+
+ #pop_pct_2000 = float(row_dict['POPPCT00']) / 100
+ #house_pct_2000 = float(row_dict['HUPCT00']) / 100
+
+ if 'xwalk' not in geography:
+ geography['xwalk'] = {}
+
+ geography['xwalk'][geoid00] = {
+ 'POPPCT00': pct,
+ 'HUPCT00': pct
+ }
+
+ collection.update({ '_id': objectid.ObjectId(geography['_id']) }, { '$set': { 'xwalk': geography['xwalk'] } }, safe=True)
+ inserts += 1
+
+print "State: %s" % STATE_FIPS
+print "File: %s" % FILENAME
+print ' Row count: %i' % row_count
+print ' Inserted: %i' % inserts
+

0 comments on commit af9775c

Please sign in to comment.