Skip to content

Commit

Permalink
Update the load_programs command
Browse files Browse the repository at this point in the history
Th command's s3 loading option is now pointed at a specific bucket,
`validated_program_data`, to ensure that raw school submissions aren't
inadvertently loaded into the cfgov database without being checked.
  • Loading branch information
higs4281 committed Jan 6, 2017
1 parent fb1ef1e commit 108cbd3
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
15 changes: 11 additions & 4 deletions paying_for_college/disclosures/scripts/load_programs.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def read_in_data(filename):
return data


# http://files.consumerfinance.gov.s3.amazonaws.com/pb/paying_for_college/csv/CFPBDATAFILE713%20(2).CSV
def read_in_s3(url):
data = [{}]
response = requests.get(url)
Expand Down Expand Up @@ -185,14 +184,21 @@ def clean(data):
return cleaned_data


# 'source' should be a CSV file path or, if s3 is True, an s3 URL
def load(source, s3=False):
"""
Loads program data from a local or S3 file.
For a local file, 'source' should be a CSV file path.
For an s3 file, 'source' should be the file name of a CSV
in the 'validated_program_data' folder on s3.
"""
test_program = False
new_programs = 0
updated_programs = 0
FAILED = [] # failed messages
if s3:
raw_data = read_in_s3(source)
s3_url = ('http://files.consumerfinance.gov.s3.amazonaws.com'
'/pb/paying_for_college/csv/validated_program_data/{}')
raw_data = read_in_s3(s3_url.format(source))
else:
raw_data = read_in_data(source)
if not raw_data[0]:
Expand Down Expand Up @@ -255,7 +261,8 @@ def load(source, s3=False):
for key, error_list in serializer.errors.iteritems():

fail_msg = (
'ERROR on row {}: {}: '.format(raw_data.index(row) + 1, key))
'ERROR on row {}: {}: '.format(
raw_data.index(row) + 1, key))
for e in error_list:
fail_msg = '{} {},'.format(fail_msg, e)
FAILED.append(fail_msg)
Expand Down
2 changes: 1 addition & 1 deletion paying_for_college/management/commands/load_programs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from django.core.management.base import BaseCommand, CommandError
from django.core.management.base import BaseCommand
from paying_for_college.disclosures.scripts import load_programs

COMMAND_HELP = """update_programs will update program data based on
Expand Down

0 comments on commit 108cbd3

Please sign in to comment.