Skip to content
This repository has been archived by the owner on Feb 1, 2019. It is now read-only.

Commit

Permalink
Merge pull request #77 from porterbot/larAggregate
Browse files Browse the repository at this point in the history
Added LAR aggregate support through CFPB data API
  • Loading branch information
LinuxBozo committed Apr 9, 2015
2 parents 26523cd + d6c633a commit 3ddd71c
Show file tree
Hide file tree
Showing 11 changed files with 298 additions and 171,734 deletions.
171,410 changes: 0 additions & 171,410 deletions data/lar.json

This file was deleted.

106 changes: 106 additions & 0 deletions data/laraggregates.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
'use strict';

var fs = require('fs');
var _ = require('lodash'),
superagent = require('superagent'),
Promise = require('bluebird');

var dataGET = Promise.promisify(superagent.get);

var poll = function(url) {
return dataGET(url)
.then(function(data) {
if (data.body.results && data.body.results.length>0) {
return data;
}
return Promise.delay(url, 10000).then(poll);
});
}

var buildDataAPIURI = function (activityYear,queryParams) {
var uri = [];
for (var key in queryParams) {
if (queryParams[key] instanceof Array) {
uri.push(key + '+IN+(' + queryParams[key].join() + ')');
} else if (queryParams[key] instanceof Object) {
uri.push(key + queryParams[key].type + queryParams[key].value);
} else {
uri.push(key + '=' + queryParams[key]);
}
}
var finalURI = dataURI + activityYear;
if (uri.length>0) {
finalURI += '+AND+';
}
return finalURI + uri.join('+AND+')
+ '&$select=respondent_id,agency_code,COUNT(sequence_number)&$group=respondent_id,agency_code&$limit=0&$orderBy=count_sequence_number+DESC';
}

var getData = function (activityYear) {
return Promise.map(larAggregates, function(aggregate) {
var uri = buildDataAPIURI (activityYear,aggregate.queryParams);
return poll(uri);
}, { concurrency: 10 })
.then(function(result) {
// merge all the results from this year into a single array of results
_.each(result, function (larResult, index) {
_.each(larResult.body.results, function (respondentTotal) {
var aggregateObj = {};
var key = respondentTotal.respondent_id + '|' + respondentTotal.agency_code;
aggregateObj[key] = {};
aggregateObj[key][larAggregates[index].dbLabel] = respondentTotal.count_sequence_number.toString();
_.merge(roughLARData,aggregateObj);
});
});
// clean up the result a bit for putting into mongo
_.each(roughLARData, function (respondentTotals, key) {
var newObj = _.clone(respondentTotals);
var keyVals = key.split('|');
newObj['respondent_id'] = keyVals[0];
newObj['agency_code'] = keyVals[1];
newObj['activity_year'] = activityYear;
for (var index=0; index<larAggregates.length; index++) {
// fill with zeroes for missing entries for convenience of queries
if (!_.has(newObj, larAggregates[index].dbLabel)) {
newObj[larAggregates[index].dbLabel] = '0';
}
}
finalLARCollection.push(newObj);
});
});
}

var dataURI = 'https://api.consumerfinance.gov/data/hmda/slice/hmda_lar.json?&$where=as_of_year=';
var roughLARData = {};
var finalLARCollection;
var activityYears = ['2012', '2013'];
var larAggregates = [
{dbLabel: 'totalLoans', queryParams: {}},
{dbLabel: 'totalHomePurchaseLoans', queryParams: {property_type:['1','2'], loan_purpose: '1', action_taken: ['1','6']}},
{dbLabel: 'soldHomePurchaseLoans', queryParams: {purchaser_type:{type: '>', value:'0'}, property_type:['1','2'], loan_purpose: '1', action_taken: ['1','6']}},
{dbLabel: 'totalRefinanceLoans', queryParams: {property_type:['1','2'], loan_purpose: '3', action_taken: ['1','6']}},
{dbLabel: 'soldRefinanceLoans', queryParams: {purchaser_type:{type: '>', value:'0'}, property_type:['1','2'], loan_purpose: '3', action_taken: ['1','6']}},
{dbLabel: 'totalQ70', queryParams: {property_type:['1','2'], loan_purpose: ['1','3'], loan_type: '1', action_taken: ['1','6']}},
{dbLabel: 'compareQ70', queryParams: {property_type:['1','2'], loan_purpose: ['1','3'], loan_type: '1', action_taken: ['1','6'], purchaser_type: ['1','3']}},
{dbLabel: 'totalQ71', queryParams: {property_type:['1','2'], loan_purpose: ['1','3'], loan_type: '2', action_taken: ['1','6']}},
{dbLabel: 'compareQ71', queryParams: {property_type:['1','2'], loan_purpose: ['1','3'], loan_type: '2', action_taken: ['1','6'], purchaser_type: '2'}},
{dbLabel: 'totalQ72', queryParams: {property_type:['1','2'], loan_purpose: ['1','3'], loan_type: '3', action_taken: ['1','6']}},
{dbLabel: 'compareQ72', queryParams: {property_type:['1','2'], loan_purpose: ['1','3'], loan_type: '3', action_taken: ['1','6'], purchaser_type: '2'}}
];


module.exports = {
// retrieve individual aggregate results for every respondent and then merge into a single mongo collection
retrieveData: function() {
finalLARCollection = [];
return Promise.map(activityYears, function(activityYear) {
return getData(activityYear);
})
.then (function() {
return finalLARCollection;
})
}
};



58 changes: 32 additions & 26 deletions data/reload_mongo.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

var mongoose = require('mongoose');
var fs = require('fs');
var _ = require('underscore');
var async = require('async');
var _ = require('lodash');
var Promise = require('bluebird');
var larAggregates = require('./laraggregates.js');

var config;

Expand All @@ -18,6 +19,22 @@ if (env == 'test') {
config = require('../config/config.json');
}

var insertData = function(key, docs) {
var deferred = Promise.defer();
// Batch insert
mongoose.model(key).collection.insert(docs, function(err, result) {
if (err) {
console.log('ERROR: could not insert data for ' + key);
console.log(err);
deferred.reject(err);
} else {
console.log('inserted '+result.length +' records for ' + key);
deferred.resolve();
}
});
return deferred.promise;
};

var uri = 'mongodb://' + config.mongoConfig.host + ':' + config.mongoConfig.port + '/' + (config.mongoConfig.database ? config.mongoConfig.database : 'hmda');
var opts = {};
if (config.mongoConfig.username) {
Expand Down Expand Up @@ -49,17 +66,18 @@ mongoose.connection.once('open', function(callback) {
require(models_path + '/' + file);
});

// Create array for storing our loading anon functions
var asyncFunctions = [];

// Now loop through the loaded models
_.each(mongoose.connections[0].base.modelSchemas, function(schema, key) {
// add an anon function to our array
asyncFunctions.push(function (next) {
// Get the data from the json files for each collection
Promise.map(_.keys(mongoose.connections[0].base.modelSchemas), function(key) {
var docs;
if (key.toLowerCase() === 'lar') {
return larAggregates.retrieveData()
.then(function(data) {
return insertData(key, data);
});
} else {
var filename = key.toLowerCase()+'.json';
var lines = fs.readFileSync(__dirname + '/'+filename,'utf8').split('\n');
var docs = lines.map(function(line) {
var docs = lines.map(function(line) {
if (line) {
var data = JSON.parse(line);
delete data['_id'];
Expand All @@ -73,22 +91,10 @@ mongoose.connection.once('open', function(callback) {
return {};
}
});
// Batch insert
mongoose.model(key).collection.insert(docs, function(err, result) {
if (err) {
console.log('ERROR: could not insert data for ' + key);
console.log(err);
} else {
console.log('inserted '+result.length +' records for ' + key);
}
return next();
});

});
});

// Finally, call our array of loading functions in parallel, and exit when done
async.parallel(asyncFunctions, function(err) {
return insertData(key, docs);
}
}, { concurrency: 10 })
.then(function(result) {
console.log('Done...');
process.exit();
});
Expand Down
2 changes: 1 addition & 1 deletion lib/queryUtil.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict';

var mongoose = require('mongoose'),
_ = require('underscore');
_ = require('lodash');

module.exports = {
exists: function(domain, query, callback) {
Expand Down
21 changes: 13 additions & 8 deletions models/lar.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@ var mongoose = require('mongoose');

var larSchema = mongoose.Schema({
'activity_year': String,
'respondent_id': String,
'agency_code': String,
'loan_type': String,
'loan_purpose': String,
'loan_amount': String,
'action_type': String,
'purchaser_type': String,
'property_type': String
'respondent_id': String,
'totalLoans' : String,
'totalHomePurchaseLoans' : String,
'soldHomePurchaseLoans' : String,
'totalRefinanceLoans' : String,
'soldRefinanceLoans' : String,
'totalQ70' : String,
'compareQ70' : String,
'totalQ71' : String,
'compareQ71' : String,
'totalQ72' : String,
'compareQ72' : String
});
larSchema.index({'activity_year': 1, 'respondent_id': 1, 'agency_code': 1, 'loan_purpose': 1, 'action_type': 1, 'property_type': 1});
larSchema.index({'activity_year': 1, 'respondent_id': 1, 'agency_code': 1});
module.exports = mongoose.model('Lar', larSchema, 'lar');
4 changes: 3 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
"localizr": "^0.1.0",
"moment": "^2.9.0",
"mongoose": "^3.8.20",
"underscore": "^1.7.0"
"lodash": "^3.6.0",
"superagent": "^1.0.0",
"bluebird": "^2.9.12"
},
"devDependencies": {
"coveralls": "^2.11.2",
Expand Down
Loading

0 comments on commit 3ddd71c

Please sign in to comment.