diff --git a/.travis.yml b/.travis.yml index dd362c94..cf59826b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,8 +4,13 @@ python: install: - | echo "DATABASES = {'default': {'ENGINE': 'django.contrib.gis.db.backends.postgis', 'NAME': 'travisdb', 'USER': 'postgres'}}" > mapusaurus/mapusaurus/settings/local_settings.py - - pip install -r requirements.txt --use-mirrors + - pip install -r requirements_test.txt - pip install coverage coveralls +addons: + postgresql: 9.3 + apt: + packages: + - postgresql-9.3-postgis-2.3 before_script: - psql -c 'CREATE DATABASE travisdb;' -U postgres - psql -c 'CREATE EXTENSION postgis;' -U postgres -d travisdb diff --git a/mapusaurus/censusdata/management/commands/load_summary_one.py b/mapusaurus/censusdata/management/commands/load_summary_one.py index 7bde2ddc..bd3e41b2 100644 --- a/mapusaurus/censusdata/management/commands/load_summary_one.py +++ b/mapusaurus/censusdata/management/commands/load_summary_one.py @@ -32,6 +32,7 @@ def handle(self, *args, **options): recordnum = line[18:25] censustract = line[27:32] + line[54:60] censustract = errors.in_2010.get(censustract, censustract) + censustract = errors.change_specific_year(censustract, year) if censustract is not None: geoids_by_record[recordnum] = year + censustract state = line[27:29] diff --git a/mapusaurus/censusdata/tests/test_loader.py b/mapusaurus/censusdata/tests/test_loader.py index 0e51ff4e..e80f1287 100644 --- a/mapusaurus/censusdata/tests/test_loader.py +++ b/mapusaurus/censusdata/tests/test_loader.py @@ -8,6 +8,7 @@ from censusdata import models from censusdata.management.commands.load_summary_one import Command +import geo.errors class LoadSummaryDataTest(TestCase): fixtures = ['mock_geo'] @@ -38,13 +39,14 @@ def test_handle(self, hf3, hf4, hf5): self.assertEqual(positional_args[3]['0007159'], year+'11001000100') self.assertEqual(positional_args[3]['0007211'], year+'11001000902') - @patch('censusdata.management.commands.load_summary_one.errors') @patch.object(Command, 'handle_filefive') @patch.object(Command, 'handle_filefour') @patch.object(Command, 'handle_filethree') - def test_handle_errors_dict(self, hf3, hf4, hf5, errors): + def test_handle_errors_dict(self, hf3, hf4, hf5): year = '2001' - errors.in_2010 = {'11001000100': '22002000200', '11001000902': None} + old_geo_errors = geo.errors.in_2010 + geo.errors.in_2010 = {'11001000100': '22002000200', '11001000902': None} + # Create Mock GEO file shutil.copyfile(os.path.join("censusdata", "tests", "mock_geo.txt"), os.path.join(self.tempdir, "ZZgeo2010.sf1")) @@ -57,6 +59,8 @@ def test_handle_errors_dict(self, hf3, hf4, hf5, errors): # This entry was converted self.assertEqual(positional_args[3]['0007159'], year+'22002000200') + geo.errors.in_2010 = old_geo_errors + def test_handle_filethree(self): shutil.copyfile(os.path.join("censusdata", "tests", "mock_file3.txt"), os.path.join(self.tempdir, "ZZ000032010.sf1")) diff --git a/mapusaurus/geo/errors.py b/mapusaurus/geo/errors.py index 709a78f3..9c62c86b 100644 --- a/mapusaurus/geo/errors.py +++ b/mapusaurus/geo/errors.py @@ -2,8 +2,12 @@ # Unfortunately, both HMDA and census population statistics refer to the # original, erroneous census tracts. See # http://www.census.gov/geo/reference/pdfs/Geography_Notes.pdf +# https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes.201X.html, where X is the last digit of the year in_2010 = { # Original -> Correct + + # 2012 + # https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes/2012/geography-changes.html "04019002701": "04019002704", "04019002903": "04019002906", "04019410501": "04019004118", @@ -14,6 +18,8 @@ "06037930401": "06037137000", + # 2011 + # https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes/2011/geography-changes.html "36053940101": "36053030101", "36053940102": "36053030102", "36053940103": "36053030103", @@ -32,3 +38,23 @@ # removing it "36085008900": None, } + +changes = { + #https://www.census.gov/geo/maps-data/maps/2010ref/stXX_tract.html, XX to replace with the state code + 2014: { + "51515050100": "51019050100", + }, + 2015: { + "02270000100": "02158000100", + "46113940500": "46102940500", + "46113940800": "46102940800", + "46113940900": "46102940900", + # More soon + } +} +def change_specific_year(census_tract, year): + new_census_tract = census_tract + for yr in sorted(changes): + if int(year) > yr: + new_census_tract = changes[yr].get(new_census_tract, new_census_tract) + return new_census_tract diff --git a/mapusaurus/hmda/migrations/0003_auto_20171215_1812.py b/mapusaurus/hmda/migrations/0003_auto_20171215_1812.py new file mode 100644 index 00000000..edb035ab --- /dev/null +++ b/mapusaurus/hmda/migrations/0003_auto_20171215_1812.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('hmda', '0002_auto_20160219_1957'), + ] + + operations = [ + migrations.AlterField( + model_name='year', + name='census_year', + field=models.PositiveIntegerField(help_text=b'Year of census data.', choices=[(1970, 1970), (1971, 1971), (1972, 1972), (1973, 1973), (1974, 1974), (1975, 1975), (1976, 1976), (1977, 1977), (1978, 1978), (1979, 1979), (1980, 1980), (1981, 1981), (1982, 1982), (1983, 1983), (1984, 1984), (1985, 1985), (1986, 1986), (1987, 1987), (1988, 1988), (1989, 1989), (1990, 1990), (1991, 1991), (1992, 1992), (1993, 1993), (1994, 1994), (1995, 1995), (1996, 1996), (1997, 1997), (1998, 1998), (1999, 1999), (2000, 2000), (2001, 2001), (2002, 2002), (2003, 2003), (2004, 2004), (2005, 2005), (2006, 2006), (2007, 2007), (2008, 2008), (2009, 2009), (2010, 2010), (2011, 2011), (2012, 2012), (2013, 2013), (2014, 2014), (2015, 2015), (2016, 2016), (2017, 2017), (2018, 2018), (2019, 2019), (2020, 2020), (2021, 2021), (2022, 2022), (2023, 2023), (2024, 2024), (2025, 2025), (2026, 2026), (2027, 2027), (2028, 2028), (2029, 2029), (2030, 2030), (2031, 2031), (2032, 2032), (2033, 2033), (2034, 2034), (2035, 2035), (2036, 2036), (2037, 2037), (2038, 2038), (2039, 2039), (2040, 2040), (2041, 2041), (2042, 2042), (2043, 2043), (2044, 2044), (2045, 2045), (2046, 2046), (2047, 2047), (2048, 2048), (2049, 2049)]), + ), + migrations.AlterField( + model_name='year', + name='geo_year', + field=models.PositiveIntegerField(help_text=b'Year that geographic boundaries were recorded.', choices=[(1970, 1970), (1971, 1971), (1972, 1972), (1973, 1973), (1974, 1974), (1975, 1975), (1976, 1976), (1977, 1977), (1978, 1978), (1979, 1979), (1980, 1980), (1981, 1981), (1982, 1982), (1983, 1983), (1984, 1984), (1985, 1985), (1986, 1986), (1987, 1987), (1988, 1988), (1989, 1989), (1990, 1990), (1991, 1991), (1992, 1992), (1993, 1993), (1994, 1994), (1995, 1995), (1996, 1996), (1997, 1997), (1998, 1998), (1999, 1999), (2000, 2000), (2001, 2001), (2002, 2002), (2003, 2003), (2004, 2004), (2005, 2005), (2006, 2006), (2007, 2007), (2008, 2008), (2009, 2009), (2010, 2010), (2011, 2011), (2012, 2012), (2013, 2013), (2014, 2014), (2015, 2015), (2016, 2016), (2017, 2017), (2018, 2018), (2019, 2019), (2020, 2020), (2021, 2021), (2022, 2022), (2023, 2023), (2024, 2024), (2025, 2025), (2026, 2026), (2027, 2027), (2028, 2028), (2029, 2029), (2030, 2030), (2031, 2031), (2032, 2032), (2033, 2033), (2034, 2034), (2035, 2035), (2036, 2036), (2037, 2037), (2038, 2038), (2039, 2039), (2040, 2040), (2041, 2041), (2042, 2042), (2043, 2043), (2044, 2044), (2045, 2045), (2046, 2046), (2047, 2047), (2048, 2048), (2049, 2049)]), + ), + migrations.AlterField( + model_name='year', + name='hmda_year', + field=models.PositiveIntegerField(help_text=b'The reporting year of the HMDA record.', serialize=False, primary_key=True, choices=[(1970, 1970), (1971, 1971), (1972, 1972), (1973, 1973), (1974, 1974), (1975, 1975), (1976, 1976), (1977, 1977), (1978, 1978), (1979, 1979), (1980, 1980), (1981, 1981), (1982, 1982), (1983, 1983), (1984, 1984), (1985, 1985), (1986, 1986), (1987, 1987), (1988, 1988), (1989, 1989), (1990, 1990), (1991, 1991), (1992, 1992), (1993, 1993), (1994, 1994), (1995, 1995), (1996, 1996), (1997, 1997), (1998, 1998), (1999, 1999), (2000, 2000), (2001, 2001), (2002, 2002), (2003, 2003), (2004, 2004), (2005, 2005), (2006, 2006), (2007, 2007), (2008, 2008), (2009, 2009), (2010, 2010), (2011, 2011), (2012, 2012), (2013, 2013), (2014, 2014), (2015, 2015), (2016, 2016), (2017, 2017), (2018, 2018), (2019, 2019), (2020, 2020), (2021, 2021), (2022, 2022), (2023, 2023), (2024, 2024), (2025, 2025), (2026, 2026), (2027, 2027), (2028, 2028), (2029, 2029), (2030, 2030), (2031, 2031), (2032, 2032), (2033, 2033), (2034, 2034), (2035, 2035), (2036, 2036), (2037, 2037), (2038, 2038), (2039, 2039), (2040, 2040), (2041, 2041), (2042, 2042), (2043, 2043), (2044, 2044), (2045, 2045), (2046, 2046), (2047, 2047), (2048, 2048), (2049, 2049)]), + ), + ] \ No newline at end of file diff --git a/mapusaurus/respondents/management/commands/load_branch_locations.py b/mapusaurus/respondents/management/commands/load_branch_locations.py index 302082a8..20908bc0 100644 --- a/mapusaurus/respondents/management/commands/load_branch_locations.py +++ b/mapusaurus/respondents/management/commands/load_branch_locations.py @@ -5,6 +5,9 @@ class Command(BaseCommand): args = "" + def normalize(s): + return s.strip().upper() + def handle(self, *args, **options): branch_location_filename = args[0] count = 0; @@ -14,20 +17,27 @@ def handle(self, *args, **options): for branch_location_line in branch_location_reader: record = Branch( year = branch_location_line[0].replace("'", ""), - name = branch_location_line[6], - street = branch_location_line[7] if branch_location_line[7] != '0' else '', - city = branch_location_line[8], - state = branch_location_line[10], - zipcode = branch_location_line[11], + name = normalize(branch_location_line[6]), + street = normalize(branch_location_line[7]) if branch_location_line[7] != '0' else '', + city = normalize(branch_location_line[8]), + state = normalize(branch_location_line[10]), + zipcode = normalize(branch_location_line[11]), lat = branch_location_line[13], lon = branch_location_line[12], ) - record.institution_id = (branch_location_line[0]+branch_location_line[1]+branch_location_line[2]).replace("'", "") + record.institution_id = (branch_location_line[0]+branch_location_line[1]+branch_location_line[2]).replace("'", "").replace(" ", "") if Institution.objects.filter(institution_id=record.institution_id).count() > 0: branch_location.append(record) + else: + print "Can't find institution_id" + print '{}\t{}\t{}'.format(record.institution_id, record.name, record.street) if len(branch_location) > 9999: count += len(branch_location) Branch.objects.bulk_create(branch_location, batch_size=1000) print "Record count: " + str(count) branch_location[:] = [] - + if len(branch_location) > 0: + count += len(branch_location) + Branch.objects.bulk_create(branch_location, batch_size=1000) + print "Record count: " + str(count) + branch_location[:] = [] diff --git a/mapusaurus/respondents/management/commands/load_transmittal.py b/mapusaurus/respondents/management/commands/load_transmittal.py index cd09ed71..d7e5e70c 100644 --- a/mapusaurus/respondents/management/commands/load_transmittal.py +++ b/mapusaurus/respondents/management/commands/load_transmittal.py @@ -16,6 +16,7 @@ def handle(self, *args, **options): with open(transmittal_filename) as institutioncsv: transmittal_reader = csv.reader(institutioncsv, delimiter='\t') institutions = [] + # count = 1 # use if want to see which item failed, see comment below where we create institution individually for inst_line in transmittal_reader: year = inst_line[0] zip_code = inst_line[8] @@ -37,5 +38,9 @@ def handle(self, *args, **options): assets=int(inst_line[17]), ) + # This can be used to figure out which exact item was failing, will need to disable bulk create below to use this + # Institution.objects.create(inst) + # inst.save() + # count += 1 institutions.append(inst) Institution.objects.bulk_create(institutions) diff --git a/mapusaurus/respondents/migrations/0003_auto_20171215_1812.py b/mapusaurus/respondents/migrations/0003_auto_20171215_1812.py new file mode 100644 index 00000000..03cf9e95 --- /dev/null +++ b/mapusaurus/respondents/migrations/0003_auto_20171215_1812.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('respondents', '0002_auto_20160222_1706'), + ] + + operations = [ + migrations.AlterField( + model_name='institution', + name='assets', + field=models.BigIntegerField(default=0, help_text=b'Prior year reported assets in thousands of dollars'), + ), + ] \ No newline at end of file diff --git a/mapusaurus/respondents/migrations/0004_auto_20171227_2123.py b/mapusaurus/respondents/migrations/0004_auto_20171227_2123.py new file mode 100644 index 00000000..92ed62ad --- /dev/null +++ b/mapusaurus/respondents/migrations/0004_auto_20171227_2123.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('respondents', '0003_auto_20171215_1812'), + ] + + operations = [ + migrations.AlterField( + model_name='branch', + name='name', + field=models.CharField(max_length=60), + ), + ] \ No newline at end of file diff --git a/mapusaurus/respondents/migrations/0005_auto_20171227_2322.py b/mapusaurus/respondents/migrations/0005_auto_20171227_2322.py new file mode 100644 index 00000000..b0426bd5 --- /dev/null +++ b/mapusaurus/respondents/migrations/0005_auto_20171227_2322.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from django.db import models, migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('respondents', '0004_auto_20171227_2123'), + ] + + operations = [ + migrations.AlterField( + model_name='branch', + name='name', + field=models.CharField(max_length=100), + ), + ] \ No newline at end of file diff --git a/mapusaurus/respondents/models.py b/mapusaurus/respondents/models.py index 5c16ffa6..5bc9e9f5 100644 --- a/mapusaurus/respondents/models.py +++ b/mapusaurus/respondents/models.py @@ -71,7 +71,7 @@ class Institution(models.Model): name = models.CharField(max_length=30) mailing_address = models.CharField(max_length=40) zip_code = models.ForeignKey('ZipCodeCityStateYear', null=False) - assets = models.PositiveIntegerField( + assets = models.BigIntegerField( default=0, help_text='Prior year reported assets in thousands of dollars' ) @@ -145,7 +145,7 @@ class LenderHierarchy(models.Model): class Branch(models.Model): year = models.SmallIntegerField() institution = models.ForeignKey('Institution', to_field='institution_id') - name = models.CharField(max_length=50) + name = models.CharField(max_length=100) street = models.CharField(max_length=100) city = models.CharField(max_length=25) state = USStateField() diff --git a/requirements_test.txt b/requirements_test.txt new file mode 100644 index 00000000..5bc9fdce --- /dev/null +++ b/requirements_test.txt @@ -0,0 +1,18 @@ +Django==1.7 +argparse==1.2.1 +django-geojson==2.6.0 +django-haystack==2.3.0 +django-localflavor==1.0 +django-overextends==0.3.2 +djangorestframework==2.3.14 +jsonschema==2.4.0 +mock==1.0.1 +psycopg2 +elasticsearch==1.0 +requests==2.3.0 +simplejson==3.6.3 +six==1.7.3 +sphinx-me==0.3 +wsgiref==0.1.2 +gunicorn==19.1.1 +newrelic==2.60.0.46