Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Testing s3 deployment.

  • Loading branch information...
commit dee8ba6d99a535a8f25cd834ce61be1c846f1f22 1 parent 65d1949
cgroskopf authored
View
25 dataprocessing/batch_sf_2000.sh → dataprocessing/batch_sf.sh
@@ -12,6 +12,9 @@ STATE_NAME_ABBR=`python get_state_abbr.py $1`
STATE_FIPS=`python get_state_fips.py $1`
FAKE=$2
+echo 'Dropping existing data.'
+./__drop_database.sh
+
echo 'Ensuring mongo indexes.'
./ensure_indexes.sh
@@ -46,3 +49,25 @@ if [ "$FAKE" = "FAKE" ]; then
else
./load_crosswalk.py $STATE_FIPS data/us2010trf.csv
fi
+
+echo 'Loading 2010 data'
+for i in {1..39}
+do
+ # Load 2000 data as 2010 for testing
+ if [ "$FAKE" = "FAKE" ]; then
+ ./load_sf_data_2010.py data/sf_data_2000_${STATE_NAME_LOWER}_$i.csv
+ else
+ echo "2010 data not yet available. Specify 'FAKE' as a second command-line argument to use 2000 data."
+ exit
+ fi
+done
+
+echo 'Processing crosswalk'
+./crosswalk.py $STATE_FIPS
+
+echo 'Computing deltas'
+./compute_deltas.py $STATE_FIPS
+
+echo 'Deploying to S3'
+./deploy.py
+
View
39 dataprocessing/batch_sf_2010.sh
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-if [ $# \< 1 ]
-then
- echo "You must specify the proper-case name of a state as an argument, e.g. 'Delaware'."
- exit
-fi
-
-STATE_NAME=$1
-STATE_NAME_LOWER=`echo $1 | tr '[A-Z]' '[a-z]'`
-STATE_NAME_ABBR=`python get_state_abbr.py $1`
-STATE_FIPS=`python get_state_fips.py $1`
-FAKE=$2
-
-echo 'Fetching data'
-# Fetch 2000 data to be used as 2010 data for testing
-if [ "$FAKE" = "FAKE" ]; then
- ./fetch_sf_data_2000.sh $STATE_NAME $STATE_NAME_LOWER $STATE_NAME_ABBR
-else
- ./fetch_sf_data_2010.sh $STATE_NAME $STATE_NAME_LOWER $STATE_NAME_ABBR
-fi
-
-echo 'Loading 2010 data'
-for i in {1..39}
-do
- # Load 2000 data as 2010 for testing
- if [ "$FAKE" = "FAKE" ]; then
- ./load_sf_data_2010.py data/sf_data_2000_${STATE_NAME_LOWER}_$i.csv
- else
- echo "2010 data not yet available. Specify 'FAKE' as a second command-line argument to use 2000 data."
- exit
- fi
-done
-
-echo 'Processing crosswalk'
-./crosswalk.py $STATE_FIPS
-
-echo 'Computing deltas'
-./compute_deltas.py $STATE_FIPS
View
5 dataprocessing/batch_sf_everything.py
@@ -10,8 +10,7 @@
else:
FAKE = ''
-for state in STATE_ABBRS.keys():
+for state in sorted(STATE_ABBRS.keys()):
subprocess.call(['./batch_sf_2000.sh', state, FAKE])
-
-for state in STATE_ABBRS.keys():
subprocess.call(['./batch_sf_2010.sh', state, FAKE])
+
View
3  dataprocessing/batch_test.sh
@@ -23,4 +23,7 @@
./crosswalk.py 10
./compute_deltas.py 10
+#./deploy.py
+#./__drop_database.sh
+
./tests.py
View
9 dataprocessing/compute_deltas.py
@@ -1,16 +1,9 @@
#!/usr/bin/env python
-import sys
-
from pymongo import Connection, objectid
import config
-if len(sys.argv) < 2:
- sys.exit('You must provide a state fips code as an argument to this script.')
-
-STATE_FIPS = sys.argv[1]
-
connection = Connection()
db = connection[config.CENSUS_DB]
collection = db[config.GEOGRAPHIES_COLLECTION]
@@ -18,7 +11,7 @@
row_count = 0
computations = 0
-for geography in collection.find({ 'metadata.STATE': STATE_FIPS }, fields=['data']):
+for geography in collection.find({}, fields=['data']):
row_count += 1
if 'delta' not in geography['data']:
View
2  dataprocessing/config.py
@@ -17,3 +17,5 @@
GEOGRAPHIES_2000_COLLECTION = 'geographies_2000'
LABELS_COLLECTION = 'labels'
+# S3
+S3_BUCKET = 'census-test'
View
9 dataprocessing/crosswalk.py
@@ -1,17 +1,10 @@
#!/usr/bin/env python
-import sys
-
from pymongo import Connection, objectid
import config
import utils
-if len(sys.argv) < 2:
- sys.exit('You must provide a state fips code as an argument to this script.')
-
-STATE_FIPS = sys.argv[1]
-
connection = Connection()
db = connection[config.CENSUS_DB]
collection = db[config.GEOGRAPHIES_COLLECTION]
@@ -20,7 +13,7 @@
row_count = 0
inserts = 0
-for geography in collection.find({ 'metadata.STATE': STATE_FIPS }, fields=['data', 'geoid', 'metadata.NAME', 'sumlev', 'xwalk']):
+for geography in collection.find({}, fields=['data', 'geoid', 'metadata.NAME', 'sumlev', 'xwalk']):
row_count += 1
# TRACTS - require true crosswalk
View
34 dataprocessing/deploy.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+import json
+import zlib
+
+from boto.s3.connection import S3Connection
+from boto.s3.key import Key
+from pymongo import Connection
+
+import config
+
+connection = Connection()
+db = connection[config.CENSUS_DB]
+collection = db[config.GEOGRAPHIES_COLLECTION]
+
+row_count = 0
+deployed = 0
+
+c = S3Connection()
+bucket = c.get_bucket(config.S3_BUCKET)
+
+for geography in collection.find():
+ row_count += 1
+
+ del geography['_id']
+
+ k = Key(bucket)
+ k.key = '%(geoid)s.json' % geography
+ k.set_contents_from_string(zlib.compress(json.dumps(geography)))
+
+ deployed += 1
+
+print 'Row count: %i' % row_count
+print 'Deployed: %i' % deployed
View
4 dataprocessing/ensure_indexes.sh
@@ -2,7 +2,5 @@
echo "use census;
db.geographies.ensureIndex({ 'geoid': 1 });
-db.geographies.ensureIndex({ 'metadata.STATE': 1 });
db.geographies.ensureIndex({ 'xrefs': 1 });
-db.geographies_2000.ensureIndex({ 'geoid': 1 });
-db.geographies_2000.ensureIndex({ 'metadata.STATE': 1 });" | mongo
+db.geographies_2000.ensureIndex({ 'geoid': 1 });" | mongo
View
2  dataprocessing/load_crosswalk.py
@@ -22,7 +22,7 @@
# Create dummy 2000->2010 crosswalk
if FILENAME == 'FAKE':
- for geography in collection.find({ 'metadata.STATE': STATE_FIPS }, fields=['geoid', 'xwalk']):
+ for geography in collection.find({}, fields=['geoid', 'xwalk']):
if 'xwalk' not in geography:
geography['xwalk'] = {}
Please sign in to comment.
Something went wrong with that request. Please try again.