Skip to content

Commit

Permalink
Merge pull request #386 from amymok/HMDA-2016
Browse files Browse the repository at this point in the history
Add new county changes
  • Loading branch information
sephcoster committed Jan 4, 2018
2 parents 7b0a266 + 6a152a7 commit b3b7ccc
Show file tree
Hide file tree
Showing 12 changed files with 168 additions and 13 deletions.
7 changes: 6 additions & 1 deletion .travis.yml
Expand Up @@ -4,8 +4,13 @@ python:
install:
- |
echo "DATABASES = {'default': {'ENGINE': 'django.contrib.gis.db.backends.postgis', 'NAME': 'travisdb', 'USER': 'postgres'}}" > mapusaurus/mapusaurus/settings/local_settings.py
- pip install -r requirements.txt --use-mirrors
- pip install -r requirements_test.txt
- pip install coverage coveralls
addons:
postgresql: 9.3
apt:
packages:
- postgresql-9.3-postgis-2.3
before_script:
- psql -c 'CREATE DATABASE travisdb;' -U postgres
- psql -c 'CREATE EXTENSION postgis;' -U postgres -d travisdb
Expand Down
Expand Up @@ -32,6 +32,7 @@ def handle(self, *args, **options):
recordnum = line[18:25]
censustract = line[27:32] + line[54:60]
censustract = errors.in_2010.get(censustract, censustract)
censustract = errors.change_specific_year(censustract, year)
if censustract is not None:
geoids_by_record[recordnum] = year + censustract
state = line[27:29]
Expand Down
10 changes: 7 additions & 3 deletions mapusaurus/censusdata/tests/test_loader.py
Expand Up @@ -8,6 +8,7 @@
from censusdata import models
from censusdata.management.commands.load_summary_one import Command

import geo.errors

class LoadSummaryDataTest(TestCase):
fixtures = ['mock_geo']
Expand Down Expand Up @@ -38,13 +39,14 @@ def test_handle(self, hf3, hf4, hf5):
self.assertEqual(positional_args[3]['0007159'], year+'11001000100')
self.assertEqual(positional_args[3]['0007211'], year+'11001000902')

@patch('censusdata.management.commands.load_summary_one.errors')
@patch.object(Command, 'handle_filefive')
@patch.object(Command, 'handle_filefour')
@patch.object(Command, 'handle_filethree')
def test_handle_errors_dict(self, hf3, hf4, hf5, errors):
def test_handle_errors_dict(self, hf3, hf4, hf5):
year = '2001'
errors.in_2010 = {'11001000100': '22002000200', '11001000902': None}
old_geo_errors = geo.errors.in_2010
geo.errors.in_2010 = {'11001000100': '22002000200', '11001000902': None}

# Create Mock GEO file
shutil.copyfile(os.path.join("censusdata", "tests", "mock_geo.txt"),
os.path.join(self.tempdir, "ZZgeo2010.sf1"))
Expand All @@ -57,6 +59,8 @@ def test_handle_errors_dict(self, hf3, hf4, hf5, errors):
# This entry was converted
self.assertEqual(positional_args[3]['0007159'], year+'22002000200')

geo.errors.in_2010 = old_geo_errors

def test_handle_filethree(self):
shutil.copyfile(os.path.join("censusdata", "tests", "mock_file3.txt"),
os.path.join(self.tempdir, "ZZ000032010.sf1"))
Expand Down
26 changes: 26 additions & 0 deletions mapusaurus/geo/errors.py
Expand Up @@ -2,8 +2,12 @@
# Unfortunately, both HMDA and census population statistics refer to the
# original, erroneous census tracts. See
# http://www.census.gov/geo/reference/pdfs/Geography_Notes.pdf
# https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes.201X.html, where X is the last digit of the year
in_2010 = {
# Original -> Correct

# 2012
# https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes/2012/geography-changes.html
"04019002701": "04019002704",
"04019002903": "04019002906",
"04019410501": "04019004118",
Expand All @@ -14,6 +18,8 @@

"06037930401": "06037137000",

# 2011
# https://www.census.gov/programs-surveys/acs/technical-documentation/table-and-geography-changes/2011/geography-changes.html
"36053940101": "36053030101",
"36053940102": "36053030102",
"36053940103": "36053030103",
Expand All @@ -32,3 +38,23 @@
# removing it
"36085008900": None,
}

changes = {
#https://www.census.gov/geo/maps-data/maps/2010ref/stXX_tract.html, XX to replace with the state code
2014: {
"51515050100": "51019050100",
},
2015: {
"02270000100": "02158000100",
"46113940500": "46102940500",
"46113940800": "46102940800",
"46113940900": "46102940900",
# More soon
}
}
def change_specific_year(census_tract, year):
new_census_tract = census_tract
for yr in sorted(changes):
if int(year) > yr:
new_census_tract = changes[yr].get(new_census_tract, new_census_tract)
return new_census_tract
29 changes: 29 additions & 0 deletions mapusaurus/hmda/migrations/0003_auto_20171215_1812.py
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import models, migrations


class Migration(migrations.Migration):

dependencies = [
('hmda', '0002_auto_20160219_1957'),
]

operations = [
migrations.AlterField(
model_name='year',
name='census_year',
field=models.PositiveIntegerField(help_text=b'Year of census data.', choices=[(1970, 1970), (1971, 1971), (1972, 1972), (1973, 1973), (1974, 1974), (1975, 1975), (1976, 1976), (1977, 1977), (1978, 1978), (1979, 1979), (1980, 1980), (1981, 1981), (1982, 1982), (1983, 1983), (1984, 1984), (1985, 1985), (1986, 1986), (1987, 1987), (1988, 1988), (1989, 1989), (1990, 1990), (1991, 1991), (1992, 1992), (1993, 1993), (1994, 1994), (1995, 1995), (1996, 1996), (1997, 1997), (1998, 1998), (1999, 1999), (2000, 2000), (2001, 2001), (2002, 2002), (2003, 2003), (2004, 2004), (2005, 2005), (2006, 2006), (2007, 2007), (2008, 2008), (2009, 2009), (2010, 2010), (2011, 2011), (2012, 2012), (2013, 2013), (2014, 2014), (2015, 2015), (2016, 2016), (2017, 2017), (2018, 2018), (2019, 2019), (2020, 2020), (2021, 2021), (2022, 2022), (2023, 2023), (2024, 2024), (2025, 2025), (2026, 2026), (2027, 2027), (2028, 2028), (2029, 2029), (2030, 2030), (2031, 2031), (2032, 2032), (2033, 2033), (2034, 2034), (2035, 2035), (2036, 2036), (2037, 2037), (2038, 2038), (2039, 2039), (2040, 2040), (2041, 2041), (2042, 2042), (2043, 2043), (2044, 2044), (2045, 2045), (2046, 2046), (2047, 2047), (2048, 2048), (2049, 2049)]),
),
migrations.AlterField(
model_name='year',
name='geo_year',
field=models.PositiveIntegerField(help_text=b'Year that geographic boundaries were recorded.', choices=[(1970, 1970), (1971, 1971), (1972, 1972), (1973, 1973), (1974, 1974), (1975, 1975), (1976, 1976), (1977, 1977), (1978, 1978), (1979, 1979), (1980, 1980), (1981, 1981), (1982, 1982), (1983, 1983), (1984, 1984), (1985, 1985), (1986, 1986), (1987, 1987), (1988, 1988), (1989, 1989), (1990, 1990), (1991, 1991), (1992, 1992), (1993, 1993), (1994, 1994), (1995, 1995), (1996, 1996), (1997, 1997), (1998, 1998), (1999, 1999), (2000, 2000), (2001, 2001), (2002, 2002), (2003, 2003), (2004, 2004), (2005, 2005), (2006, 2006), (2007, 2007), (2008, 2008), (2009, 2009), (2010, 2010), (2011, 2011), (2012, 2012), (2013, 2013), (2014, 2014), (2015, 2015), (2016, 2016), (2017, 2017), (2018, 2018), (2019, 2019), (2020, 2020), (2021, 2021), (2022, 2022), (2023, 2023), (2024, 2024), (2025, 2025), (2026, 2026), (2027, 2027), (2028, 2028), (2029, 2029), (2030, 2030), (2031, 2031), (2032, 2032), (2033, 2033), (2034, 2034), (2035, 2035), (2036, 2036), (2037, 2037), (2038, 2038), (2039, 2039), (2040, 2040), (2041, 2041), (2042, 2042), (2043, 2043), (2044, 2044), (2045, 2045), (2046, 2046), (2047, 2047), (2048, 2048), (2049, 2049)]),
),
migrations.AlterField(
model_name='year',
name='hmda_year',
field=models.PositiveIntegerField(help_text=b'The reporting year of the HMDA record.', serialize=False, primary_key=True, choices=[(1970, 1970), (1971, 1971), (1972, 1972), (1973, 1973), (1974, 1974), (1975, 1975), (1976, 1976), (1977, 1977), (1978, 1978), (1979, 1979), (1980, 1980), (1981, 1981), (1982, 1982), (1983, 1983), (1984, 1984), (1985, 1985), (1986, 1986), (1987, 1987), (1988, 1988), (1989, 1989), (1990, 1990), (1991, 1991), (1992, 1992), (1993, 1993), (1994, 1994), (1995, 1995), (1996, 1996), (1997, 1997), (1998, 1998), (1999, 1999), (2000, 2000), (2001, 2001), (2002, 2002), (2003, 2003), (2004, 2004), (2005, 2005), (2006, 2006), (2007, 2007), (2008, 2008), (2009, 2009), (2010, 2010), (2011, 2011), (2012, 2012), (2013, 2013), (2014, 2014), (2015, 2015), (2016, 2016), (2017, 2017), (2018, 2018), (2019, 2019), (2020, 2020), (2021, 2021), (2022, 2022), (2023, 2023), (2024, 2024), (2025, 2025), (2026, 2026), (2027, 2027), (2028, 2028), (2029, 2029), (2030, 2030), (2031, 2031), (2032, 2032), (2033, 2033), (2034, 2034), (2035, 2035), (2036, 2036), (2037, 2037), (2038, 2038), (2039, 2039), (2040, 2040), (2041, 2041), (2042, 2042), (2043, 2043), (2044, 2044), (2045, 2045), (2046, 2046), (2047, 2047), (2048, 2048), (2049, 2049)]),
),
]
Expand Up @@ -5,6 +5,9 @@
class Command(BaseCommand):
args = "<filename>"

def normalize(s):
return s.strip().upper()

def handle(self, *args, **options):
branch_location_filename = args[0]
count = 0;
Expand All @@ -14,20 +17,27 @@ def handle(self, *args, **options):
for branch_location_line in branch_location_reader:
record = Branch(
year = branch_location_line[0].replace("'", ""),
name = branch_location_line[6],
street = branch_location_line[7] if branch_location_line[7] != '0' else '',
city = branch_location_line[8],
state = branch_location_line[10],
zipcode = branch_location_line[11],
name = normalize(branch_location_line[6]),
street = normalize(branch_location_line[7]) if branch_location_line[7] != '0' else '',
city = normalize(branch_location_line[8]),
state = normalize(branch_location_line[10]),
zipcode = normalize(branch_location_line[11]),
lat = branch_location_line[13],
lon = branch_location_line[12],
)
record.institution_id = (branch_location_line[0]+branch_location_line[1]+branch_location_line[2]).replace("'", "")
record.institution_id = (branch_location_line[0]+branch_location_line[1]+branch_location_line[2]).replace("'", "").replace(" ", "")
if Institution.objects.filter(institution_id=record.institution_id).count() > 0:
branch_location.append(record)
else:
print "Can't find institution_id"
print '{}\t{}\t{}'.format(record.institution_id, record.name, record.street)
if len(branch_location) > 9999:
count += len(branch_location)
Branch.objects.bulk_create(branch_location, batch_size=1000)
print "Record count: " + str(count)
branch_location[:] = []

if len(branch_location) > 0:
count += len(branch_location)
Branch.objects.bulk_create(branch_location, batch_size=1000)
print "Record count: " + str(count)
branch_location[:] = []
Expand Up @@ -16,6 +16,7 @@ def handle(self, *args, **options):
with open(transmittal_filename) as institutioncsv:
transmittal_reader = csv.reader(institutioncsv, delimiter='\t')
institutions = []
# count = 1 # use if want to see which item failed, see comment below where we create institution individually
for inst_line in transmittal_reader:
year = inst_line[0]
zip_code = inst_line[8]
Expand All @@ -37,5 +38,9 @@ def handle(self, *args, **options):
assets=int(inst_line[17]),
)

# This can be used to figure out which exact item was failing, will need to disable bulk create below to use this
# Institution.objects.create(inst)
# inst.save()
# count += 1
institutions.append(inst)
Institution.objects.bulk_create(institutions)
19 changes: 19 additions & 0 deletions mapusaurus/respondents/migrations/0003_auto_20171215_1812.py
@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import models, migrations


class Migration(migrations.Migration):

dependencies = [
('respondents', '0002_auto_20160222_1706'),
]

operations = [
migrations.AlterField(
model_name='institution',
name='assets',
field=models.BigIntegerField(default=0, help_text=b'Prior year reported assets in thousands of dollars'),
),
]
19 changes: 19 additions & 0 deletions mapusaurus/respondents/migrations/0004_auto_20171227_2123.py
@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import models, migrations


class Migration(migrations.Migration):

dependencies = [
('respondents', '0003_auto_20171215_1812'),
]

operations = [
migrations.AlterField(
model_name='branch',
name='name',
field=models.CharField(max_length=60),
),
]
19 changes: 19 additions & 0 deletions mapusaurus/respondents/migrations/0005_auto_20171227_2322.py
@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from django.db import models, migrations


class Migration(migrations.Migration):

dependencies = [
('respondents', '0004_auto_20171227_2123'),
]

operations = [
migrations.AlterField(
model_name='branch',
name='name',
field=models.CharField(max_length=100),
),
]
4 changes: 2 additions & 2 deletions mapusaurus/respondents/models.py
Expand Up @@ -71,7 +71,7 @@ class Institution(models.Model):
name = models.CharField(max_length=30)
mailing_address = models.CharField(max_length=40)
zip_code = models.ForeignKey('ZipCodeCityStateYear', null=False)
assets = models.PositiveIntegerField(
assets = models.BigIntegerField(
default=0,
help_text='Prior year reported assets in thousands of dollars'
)
Expand Down Expand Up @@ -145,7 +145,7 @@ class LenderHierarchy(models.Model):
class Branch(models.Model):
year = models.SmallIntegerField()
institution = models.ForeignKey('Institution', to_field='institution_id')
name = models.CharField(max_length=50)
name = models.CharField(max_length=100)
street = models.CharField(max_length=100)
city = models.CharField(max_length=25)
state = USStateField()
Expand Down
18 changes: 18 additions & 0 deletions requirements_test.txt
@@ -0,0 +1,18 @@
Django==1.7
argparse==1.2.1
django-geojson==2.6.0
django-haystack==2.3.0
django-localflavor==1.0
django-overextends==0.3.2
djangorestframework==2.3.14
jsonschema==2.4.0
mock==1.0.1
psycopg2
elasticsearch==1.0
requests==2.3.0
simplejson==3.6.3
six==1.7.3
sphinx-me==0.3
wsgiref==0.1.2
gunicorn==19.1.1
newrelic==2.60.0.46

0 comments on commit b3b7ccc

Please sign in to comment.