Skip to content
This repository
Browse code

Testing s3 deployment.

  • Loading branch information...
commit dee8ba6d99a535a8f25cd834ce61be1c846f1f22 1 parent 65d1949
cgroskopf authored
25 dataprocessing/batch_sf_2000.sh → dataprocessing/batch_sf.sh
@@ -12,6 +12,9 @@ STATE_NAME_ABBR=`python get_state_abbr.py $1`
12 12 STATE_FIPS=`python get_state_fips.py $1`
13 13 FAKE=$2
14 14
  15 +echo 'Dropping existing data.'
  16 +./__drop_database.sh
  17 +
15 18 echo 'Ensuring mongo indexes.'
16 19 ./ensure_indexes.sh
17 20
@@ -46,3 +49,25 @@ if [ "$FAKE" = "FAKE" ]; then
46 49 else
47 50 ./load_crosswalk.py $STATE_FIPS data/us2010trf.csv
48 51 fi
  52 +
  53 +echo 'Loading 2010 data'
  54 +for i in {1..39}
  55 +do
  56 + # Load 2000 data as 2010 for testing
  57 + if [ "$FAKE" = "FAKE" ]; then
  58 + ./load_sf_data_2010.py data/sf_data_2000_${STATE_NAME_LOWER}_$i.csv
  59 + else
  60 + echo "2010 data not yet available. Specify 'FAKE' as a second command-line argument to use 2000 data."
  61 + exit
  62 + fi
  63 +done
  64 +
  65 +echo 'Processing crosswalk'
  66 +./crosswalk.py $STATE_FIPS
  67 +
  68 +echo 'Computing deltas'
  69 +./compute_deltas.py $STATE_FIPS
  70 +
  71 +echo 'Deploying to S3'
  72 +./deploy.py
  73 +
39 dataprocessing/batch_sf_2010.sh
... ... @@ -1,39 +0,0 @@
1   -#!/bin/bash
2   -
3   -if [ $# \< 1 ]
4   -then
5   - echo "You must specify the proper-case name of a state as an argument, e.g. 'Delaware'."
6   - exit
7   -fi
8   -
9   -STATE_NAME=$1
10   -STATE_NAME_LOWER=`echo $1 | tr '[A-Z]' '[a-z]'`
11   -STATE_NAME_ABBR=`python get_state_abbr.py $1`
12   -STATE_FIPS=`python get_state_fips.py $1`
13   -FAKE=$2
14   -
15   -echo 'Fetching data'
16   -# Fetch 2000 data to be used as 2010 data for testing
17   -if [ "$FAKE" = "FAKE" ]; then
18   - ./fetch_sf_data_2000.sh $STATE_NAME $STATE_NAME_LOWER $STATE_NAME_ABBR
19   -else
20   - ./fetch_sf_data_2010.sh $STATE_NAME $STATE_NAME_LOWER $STATE_NAME_ABBR
21   -fi
22   -
23   -echo 'Loading 2010 data'
24   -for i in {1..39}
25   -do
26   - # Load 2000 data as 2010 for testing
27   - if [ "$FAKE" = "FAKE" ]; then
28   - ./load_sf_data_2010.py data/sf_data_2000_${STATE_NAME_LOWER}_$i.csv
29   - else
30   - echo "2010 data not yet available. Specify 'FAKE' as a second command-line argument to use 2000 data."
31   - exit
32   - fi
33   -done
34   -
35   -echo 'Processing crosswalk'
36   -./crosswalk.py $STATE_FIPS
37   -
38   -echo 'Computing deltas'
39   -./compute_deltas.py $STATE_FIPS
5 dataprocessing/batch_sf_everything.py
@@ -10,8 +10,7 @@
10 10 else:
11 11 FAKE = ''
12 12
13   -for state in STATE_ABBRS.keys():
  13 +for state in sorted(STATE_ABBRS.keys()):
14 14 subprocess.call(['./batch_sf_2000.sh', state, FAKE])
15   -
16   -for state in STATE_ABBRS.keys():
17 15 subprocess.call(['./batch_sf_2010.sh', state, FAKE])
  16 +
3  dataprocessing/batch_test.sh
@@ -23,4 +23,7 @@
23 23 ./crosswalk.py 10
24 24 ./compute_deltas.py 10
25 25
  26 +#./deploy.py
  27 +#./__drop_database.sh
  28 +
26 29 ./tests.py
9 dataprocessing/compute_deltas.py
... ... @@ -1,16 +1,9 @@
1 1 #!/usr/bin/env python
2 2
3   -import sys
4   -
5 3 from pymongo import Connection, objectid
6 4
7 5 import config
8 6
9   -if len(sys.argv) < 2:
10   - sys.exit('You must provide a state fips code as an argument to this script.')
11   -
12   -STATE_FIPS = sys.argv[1]
13   -
14 7 connection = Connection()
15 8 db = connection[config.CENSUS_DB]
16 9 collection = db[config.GEOGRAPHIES_COLLECTION]
@@ -18,7 +11,7 @@
18 11 row_count = 0
19 12 computations = 0
20 13
21   -for geography in collection.find({ 'metadata.STATE': STATE_FIPS }, fields=['data']):
  14 +for geography in collection.find({}, fields=['data']):
22 15 row_count += 1
23 16
24 17 if 'delta' not in geography['data']:
2  dataprocessing/config.py
@@ -17,3 +17,5 @@
17 17 GEOGRAPHIES_2000_COLLECTION = 'geographies_2000'
18 18 LABELS_COLLECTION = 'labels'
19 19
  20 +# S3
  21 +S3_BUCKET = 'census-test'
9 dataprocessing/crosswalk.py
... ... @@ -1,17 +1,10 @@
1 1 #!/usr/bin/env python
2 2
3   -import sys
4   -
5 3 from pymongo import Connection, objectid
6 4
7 5 import config
8 6 import utils
9 7
10   -if len(sys.argv) < 2:
11   - sys.exit('You must provide a state fips code as an argument to this script.')
12   -
13   -STATE_FIPS = sys.argv[1]
14   -
15 8 connection = Connection()
16 9 db = connection[config.CENSUS_DB]
17 10 collection = db[config.GEOGRAPHIES_COLLECTION]
@@ -20,7 +13,7 @@
20 13 row_count = 0
21 14 inserts = 0
22 15
23   -for geography in collection.find({ 'metadata.STATE': STATE_FIPS }, fields=['data', 'geoid', 'metadata.NAME', 'sumlev', 'xwalk']):
  16 +for geography in collection.find({}, fields=['data', 'geoid', 'metadata.NAME', 'sumlev', 'xwalk']):
24 17 row_count += 1
25 18
26 19 # TRACTS - require true crosswalk
34 dataprocessing/deploy.py
... ... @@ -0,0 +1,34 @@
  1 +#!/usr/bin/env python
  2 +
  3 +import json
  4 +import zlib
  5 +
  6 +from boto.s3.connection import S3Connection
  7 +from boto.s3.key import Key
  8 +from pymongo import Connection
  9 +
  10 +import config
  11 +
  12 +connection = Connection()
  13 +db = connection[config.CENSUS_DB]
  14 +collection = db[config.GEOGRAPHIES_COLLECTION]
  15 +
  16 +row_count = 0
  17 +deployed = 0
  18 +
  19 +c = S3Connection()
  20 +bucket = c.get_bucket(config.S3_BUCKET)
  21 +
  22 +for geography in collection.find():
  23 + row_count += 1
  24 +
  25 + del geography['_id']
  26 +
  27 + k = Key(bucket)
  28 + k.key = '%(geoid)s.json' % geography
  29 + k.set_contents_from_string(zlib.compress(json.dumps(geography)))
  30 +
  31 + deployed += 1
  32 +
  33 +print 'Row count: %i' % row_count
  34 +print 'Deployed: %i' % deployed
4 dataprocessing/ensure_indexes.sh
@@ -2,7 +2,5 @@
2 2
3 3 echo "use census;
4 4 db.geographies.ensureIndex({ 'geoid': 1 });
5   -db.geographies.ensureIndex({ 'metadata.STATE': 1 });
6 5 db.geographies.ensureIndex({ 'xrefs': 1 });
7   -db.geographies_2000.ensureIndex({ 'geoid': 1 });
8   -db.geographies_2000.ensureIndex({ 'metadata.STATE': 1 });" | mongo
  6 +db.geographies_2000.ensureIndex({ 'geoid': 1 });" | mongo
2  dataprocessing/load_crosswalk.py
@@ -22,7 +22,7 @@
22 22
23 23 # Create dummy 2000->2010 crosswalk
24 24 if FILENAME == 'FAKE':
25   - for geography in collection.find({ 'metadata.STATE': STATE_FIPS }, fields=['geoid', 'xwalk']):
  25 + for geography in collection.find({}, fields=['geoid', 'xwalk']):
26 26 if 'xwalk' not in geography:
27 27 geography['xwalk'] = {}
28 28

0 comments on commit dee8ba6

Please sign in to comment.
Something went wrong with that request. Please try again.