Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

refactored Importer into cli Tool, abstract parent, selectable implem…

…entation
  • Loading branch information...
commit 034364afe8598ff8d33850b5bc77e382730959be 1 parent ee4b9fb
@akollegger authored
View
5 .gitignore
@@ -1 +1,6 @@
.DS_Store
+*.graphdb
+ivy/
+bin/
+batch.properties
+neo4j-community*
View
297 CODE/fecGraph/src/importer/AbkImporter.java
@@ -0,0 +1,297 @@
+package org.followthedata.importer;
+
+import org.neo4j.graphdb.RelationshipType;
+import org.neo4j.graphdb.index.Index;
+import org.neo4j.graphdb.index.IndexManager;
+import org.neo4j.graphdb.index.RelationshipIndex;
+import org.neo4j.kernel.impl.util.FileUtils;
+import org.neo4j.unsafe.batchinsert.BatchInserter;
+import org.neo4j.unsafe.batchinsert.BatchInserters;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
+import org.neo4j.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
+
+import java.io.*;
+import java.util.*;
+
+import org.neo4j.helpers.collection.MapUtil;
+
+import static org.neo4j.helpers.collection.MapUtil.map;
+import static org.neo4j.helpers.collection.MapUtil.stringMap;
+import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.EXACT_CONFIG;
+import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.FULLTEXT_CONFIG;
+
+public class AbkImporter extends FecBatchImporter {
+
+ public static final int USERS = 3000000;
+
+ enum MyRelationshipTypes implements RelationshipType {SUPPORTS, FOR, CONTRIBUTES, RECEIVES, GAVE,SUPERPACGIFT,SUPERPACEXPEND,SUPERPACACTION}
+ Map<String,Long> cache = new HashMap<String,Long>(USERS);
+ Map<String,Long> contribCache = new HashMap<String,Long>(USERS);
+
+ public AbkImporter(File graphDb) {
+ super(graphDb);
+ }
+
+ @Override
+ protected void importIndiv(Reader reader, int flag) throws IOException {
+ String[] strTemp;
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 0);
+ String line;
+ report.reset();
+ LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(db);
+ BatchInserterIndex idxIndivContrib = indexProvider.nodeIndex( "individuals", MapUtil.stringMap( "type", "exact" ) );
+ idxIndivContrib.setCacheCapacity( "indivName", 2000000 );
+ while ((line = bf.readLine()) != null) {
+ strTemp = line.split("\\|");
+ long caller = db.createNode(data.update(line));
+ //System.out.println(caller);
+ Map<String, Object> properties = MapUtil.map( "indivName", strTemp[1]);
+ properties.put("indivCity", strTemp[2]);
+ properties.put("indivState", strTemp[3]);
+ properties.put("indivZip", strTemp[4]);
+ properties.put("indivOCC", strTemp[6]);
+ idxIndivContrib.add(caller,properties);
+ cache.put(strTemp[0], caller);
+
+ report.dots();
+ }
+ idxIndivContrib.flush();
+ indexProvider.shutdown();
+ report.finishImport("Nodes");
+ }
+
+ @Override
+ protected void importCommittees(Reader reader) throws IOException {
+ String[] strTemp;
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 0);
+ String line;
+ report.reset();
+ LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(db);
+ BatchInserterIndex idxCommittees = indexProvider.nodeIndex( "committees", MapUtil.stringMap( "type", "exact" ) );
+ idxCommittees.setCacheCapacity( "commName", 100000 );
+
+ while ((line = bf.readLine()) != null) {
+ strTemp = line.split("\\|");
+ long committee = db.createNode(data.update(line));
+ Map<String, Object> properties = MapUtil.map( "commName", strTemp[1]);
+ properties.put("commID", strTemp[0]);
+ properties.put("commTreas", strTemp[3]);
+ properties.put("commState", strTemp[7]);
+ idxCommittees.add(committee,properties);
+ //System.out.println(caller);
+ cache.put(strTemp[0], committee);
+ idxCommittees.flush();
+ report.dots();
+ }
+ idxCommittees.flush();
+ indexProvider.shutdown();
+
+ report.finishImport("Nodes");
+ }
+
+ @Override
+ protected void importSuperPac(Reader reader) throws IOException {
+ String[] strTemp;
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 0);
+ String line;
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ strTemp = line.split("\\|");
+ Long lCommId = cache.get(strTemp[1]);
+ if (lCommId!=null){
+
+ }else{
+ long caller = db.createNode(data.update(line));
+ cache.put(strTemp[0], caller);
+ }
+ //System.out.println(caller);
+
+ report.dots();
+ }
+ report.finishImport("Nodes");
+ }
+
+ @Override
+ protected void importSuperPacContrib(Reader reader) throws IOException {
+ String[] strTemp;
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 0);
+ String line;
+ LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(db);
+ BatchInserterIndex idxSuperPacContribs = indexProvider.nodeIndex( "superPacDonations", MapUtil.stringMap( "type", "fulltext" ) );
+ idxSuperPacContribs.setCacheCapacity( "commID", 200000 );
+
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ strTemp = line.split("\\|");
+ long pacCont = db.createNode(data.update(line));
+ Long lCommId = cache.get(strTemp[2]);
+ if (lCommId!=null){
+ db.createRelationship(lCommId, pacCont, MyRelationshipTypes.SUPERPACGIFT, null);
+ }
+
+ Map<String, Object> properties = MapUtil.map( "commID", strTemp[2]);
+ properties.put("donatingOrg", strTemp[3]);
+ properties.put("donorLast", strTemp[4]);
+ properties.put("donorFirst", strTemp[5]);
+ properties.put("donorState", strTemp[7]);
+ // properties.put("donorFullName", strTemp[15]);
+ idxSuperPacContribs.add(pacCont,properties);
+ report.dots();
+ }
+ System.out.println("Finished with SUPERPAC Contributions");
+ report.finishImport("Nodes");
+ idxSuperPacContribs.flush();
+ indexProvider.shutdown();
+ }
+
+ @Override
+ protected void importSuperPacExpend(Reader reader) throws IOException {
+ String[] strTemp;
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 0);
+ String line;
+ LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(db);
+ BatchInserterIndex idxSuperPacExpend = indexProvider.nodeIndex( "superPacExpend", MapUtil.stringMap( "type", "exact" ) );
+ idxSuperPacExpend.setCacheCapacity( "commID", 200000 );
+
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ strTemp = line.split("\\|");
+ // System.out.println(line);
+ long pacExpend = db.createNode(data.update(line));
+ Long lCommId = cache.get(strTemp[3]);
+ Long lCandId = cache.get(strTemp[7]);
+ if (lCommId!=null){
+ db.createRelationship(lCommId, pacExpend, MyRelationshipTypes.SUPERPACEXPEND, null);
+ }
+ if (lCandId!=null){
+ db.createRelationship(lCandId, pacExpend, MyRelationshipTypes.SUPERPACACTION, null);
+ }
+
+ Map<String, Object> properties = MapUtil.map( "commID", strTemp[2]);
+ properties.put("isSuperPAC", strTemp[3]);
+ properties.put("candidate", strTemp[5]);
+ properties.put("SUPPORT_OPPOSE", strTemp[6]);
+ properties.put("expendAmt", strTemp[12]);
+ idxSuperPacExpend.add(pacExpend,properties);
+ report.dots();
+ }
+ idxSuperPacExpend.flush();
+ indexProvider.shutdown();
+ System.out.println("Finished with SUPERPAC Expenditures");
+ report.finishImport("Nodes");
+ }
+
+ @Override
+ protected void importCandidates(Reader reader) throws IOException {
+ String[] strTemp;
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 0);
+ String line;
+ report.reset();
+ LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(db);
+
+ BatchInserterIndex candidates = indexProvider.nodeIndex( "candidates", MapUtil.stringMap( "type", "exact" ) );
+ candidates.setCacheCapacity( "candidateName", 100000 );
+ while ((line = bf.readLine()) != null) {
+ strTemp = line.split("\\|");
+ long polCand = db.createNode(data.update(line));
+ Map<String, Object> properties = MapUtil.map( "candidateName", strTemp[1]);
+ properties.put("candidateID", strTemp[0]);
+ properties.put("candidateParty", strTemp[3]);
+ properties.put("candidateOfficeState", strTemp[5]);
+ properties.put("candidateElectionYear",strTemp[4]);
+ candidates.add(polCand,properties);
+ candidates.flush();
+ Long lCommId = cache.get(strTemp[10]);
+ if (lCommId!=null){
+ db.createRelationship(lCommId, polCand, MyRelationshipTypes.SUPPORTS, null);
+ }
+ report.dots();
+ }
+ candidates.flush();
+ indexProvider.shutdown();
+ report.finishImport("Nodes");
+ }
+
+ @Override
+ protected void importContrib(Reader reader) throws IOException {
+ String[] strTemp;
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 0);
+ String line;
+ report.reset();
+ LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(db);
+ BatchInserterIndex contributors = indexProvider.nodeIndex( "contributions", MapUtil.stringMap( "type", "exact" ) );
+ contributors.setCacheCapacity( "commID", 2500000 );
+
+ while ((line = bf.readLine()) != null) {
+ strTemp = line.split("\\|",-1);
+ //System.out.println(line);
+ long indContr = db.createNode(data.update(line));
+ Long lCommId = cache.get(strTemp[1]);
+ Long lIndivId = cache.get(strTemp[0]);
+ if (lCommId!=null){
+ db.createRelationship(lCommId, indContr, MyRelationshipTypes.RECEIVES, null);
+
+ }
+ if (lIndivId!=null){
+ long indRel = db.createRelationship(lIndivId, indContr, MyRelationshipTypes.GAVE, null);
+ }
+
+ try{
+ Map<String, Object> properties = MapUtil.map( "commID", strTemp[1]);
+ properties.put("contribDate", strTemp[3]);
+ properties.put("contribAmt", strTemp[4]);
+ contributors.add(indContr,properties);
+ } catch (Exception e){
+ System.out.println(e);
+ }
+ report.dots();
+
+ }
+ contributors.flush();
+ indexProvider.shutdown();
+ report.finishImport("Nodes");
+ }
+
+ @Override
+ protected void importRelationships(Reader reader) throws IOException {
+ BufferedReader bf = new BufferedReader(reader);
+ final Data data = new Data(bf.readLine(), "\\|", 3);
+ Object[] rel = new Object[3];
+ final RelType relType = new RelType();
+ String line;
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ final Map<String, Object> properties = data.update(line, rel);
+ db.createRelationship(id(rel[0]), id(rel[1]), relType.update(rel[2]), properties);
+ report.dots();
+ }
+ report.finishImport("Relationships");
+ }
+
+ @Override
+ protected void importIndex(String indexName, BatchInserterIndex index, Reader reader) throws IOException {
+
+ BufferedReader bf = new BufferedReader(reader);
+
+ final Data data = new Data(bf.readLine(), "\\|", 1);
+ Object[] node = new Object[1];
+ String line;
+ report.reset();
+ while ((line = bf.readLine()) != null) {
+ final Map<String, Object> properties = data.update(line, node);
+ index.add(id(node[0]), properties);
+ report.dots();
+ }
+
+ report.finishImport("Done inserting into " + indexName + " Index");
+ }
+
+}
View
313 CODE/fecGraph/src/importer/FecBatchImporter.java
@@ -0,0 +1,313 @@
+package org.followthedata.importer;
+
+import org.neo4j.graphdb.RelationshipType;
+import org.neo4j.graphdb.index.Index;
+import org.neo4j.graphdb.index.IndexManager;
+import org.neo4j.graphdb.index.RelationshipIndex;
+import org.neo4j.kernel.impl.util.FileUtils;
+import org.neo4j.unsafe.batchinsert.BatchInserter;
+import org.neo4j.unsafe.batchinsert.BatchInserters;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndexProvider;
+import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
+import org.neo4j.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
+
+import java.io.*;
+import java.util.*;
+
+import org.neo4j.helpers.collection.MapUtil;
+
+import static org.neo4j.helpers.collection.MapUtil.map;
+import static org.neo4j.helpers.collection.MapUtil.stringMap;
+import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.EXACT_CONFIG;
+import static org.neo4j.index.impl.lucene.LuceneIndexImplementation.FULLTEXT_CONFIG;
+
+public abstract class FecBatchImporter {
+ protected static Report report;
+ protected BatchInserter db;
+ protected BatchInserterIndexProvider lucene;
+
+
+ public FecBatchImporter(File graphDb) {
+ Map<String, String> config = new HashMap<String, String>();
+ try {
+ if (new File("batch.properties").exists()) {
+ System.out.println("Using Existing Configuration File");
+ } else {
+ System.out.println("Writing Configuration File to batch.properties");
+ FileWriter fw = new FileWriter( "batch.properties" );
+ fw.append( "use_memory_mapped_buffers=true\n"
+ + "neostore.nodestore.db.mapped_memory=100M\n"
+ + "neostore.relationshipstore.db.mapped_memory=500M\n"
+ + "neostore.propertystore.db.mapped_memory=1G\n"
+ + "neostore.propertystore.db.strings.mapped_memory=200M\n"
+ + "neostore.propertystore.db.arrays.mapped_memory=0M\n"
+ + "neostore.propertystore.db.index.keys.mapped_memory=15M\n"
+ + "neostore.propertystore.db.index.mapped_memory=15M" );
+ fw.close();
+ }
+
+ config = MapUtil.load( new File(
+ "batch.properties" ) );
+
+ } catch (Exception e) {
+ System.out.println(e.getMessage());
+ }
+
+ db = createBatchInserter(graphDb, config);
+ lucene = createIndexProvider();
+ report = createReport();
+ }
+
+ protected StdOutReport createReport() {
+ return new StdOutReport(10 * 1000 * 1000, 100);
+ }
+
+ protected LuceneBatchInserterIndexProvider createIndexProvider() {
+ return new LuceneBatchInserterIndexProvider(db);
+ }
+
+ protected BatchInserter createBatchInserter(File graphDb, Map<String, String> config) {
+ return BatchInserters.inserter(graphDb.getAbsolutePath(), config);
+ }
+
+ public void finish() {
+ lucene.shutdown();
+ db.shutdown();
+ // report.finish();
+ }
+
+ public static class Data {
+ private Object[] data;
+ private final int offset;
+ private final String delim;
+ private final String[] fields;
+ private final String[] lineData;
+ private final Type types[];
+ private final int lineSize;
+ private int dataSize;
+
+ public Data(String header, String delim, int offset) {
+ this.offset = offset;
+ this.delim = delim;
+ fields = header.split(delim);
+ lineSize = fields.length;
+ types = parseTypes(fields);
+ lineData = new String[lineSize];
+ createMapData(lineSize, offset);
+ }
+
+ private Object[] createMapData(int lineSize, int offset) {
+ dataSize = lineSize - offset;
+ data = new Object[dataSize*2];
+ for (int i = 0; i < dataSize; i++) {
+ data[i * 2] = fields[i + offset];
+ }
+ return data;
+ }
+
+ private Type[] parseTypes(String[] fields) {
+ Type[] types = new Type[lineSize];
+ Arrays.fill(types, Type.STRING);
+ for (int i = 0; i < lineSize; i++) {
+ String field = fields[i];
+ int idx = field.indexOf(':');
+ if (idx!=-1) {
+ fields[i]=field.substring(0,idx);
+ types[i]= Type.fromString(field.substring(idx + 1));
+ }
+ }
+ return types;
+ }
+
+ private int split(String line) {
+ // final StringTokenizer st = new StringTokenizer(line, delim,true);
+ final String[] values = line.split(delim);
+
+// System.out.println(line);
+ if (values.length < lineSize) {
+ System.err.println("ERROR: line has fewer than expected fields (" + lineSize + ")");
+ System.err.println(line);
+ System.exit(1); // ABK TODO: manage error codes
+ }
+ int count=0;
+ for (int i = 0; i < lineSize; i++) {
+ // String value = st.nextToken();
+ String value = values[i];
+ lineData[i] = value.trim().isEmpty() ? null : value;
+ if (i >= offset && lineData[i]!=null) {
+ data[count++]=fields[i];
+ data[count++]=types[i].convert(lineData[i]);
+ }
+ }
+ return count;
+ }
+
+ public Map<String,Object> update(String line, Object... header) {
+ int nonNullCount = split(line);
+ if (header.length > 0) {
+ System.arraycopy(lineData, 0, header, 0, header.length);
+ }
+
+ if (nonNullCount == dataSize*2) {
+ return map(data);
+ }
+ Object[] newData=new Object[nonNullCount];
+ System.arraycopy(data,0,newData,0,nonNullCount);
+ return map(newData);
+ }
+
+ }
+
+ static class StdOutReport implements Report {
+ private final long batch;
+ private final long dots;
+ private long count;
+ private long total = System.currentTimeMillis(), time, batchTime;
+
+ public StdOutReport(long batch, int dots) {
+ this.batch = batch;
+ this.dots = batch / dots;
+ }
+
+ @Override
+ public void reset() {
+ count = 0;
+ batchTime = time = System.currentTimeMillis();
+ }
+
+ @Override
+ public void finish() {
+ System.out.println("\nTotal import time: "+ (System.currentTimeMillis() - total) / 1000 + " seconds ");
+ }
+
+ @Override
+ public void dots() {
+ if ((++count % dots) != 0) return;
+ System.out.print(".");
+ if ((count % batch) != 0) return;
+ long now = System.currentTimeMillis();
+ System.out.println(" "+ (now - batchTime) + " ms for "+batch);
+ batchTime = now;
+ }
+
+ @Override
+ public void finishImport(String type) {
+ System.out.println("\nImporting " + count + " " + type + " took " + (System.currentTimeMillis() - time) / 1000 + " seconds ");
+ }
+ }
+
+ protected abstract void importIndiv(Reader reader, int flag) throws IOException;
+
+ protected abstract void importCommittees(Reader reader) throws IOException;
+
+ protected abstract void importSuperPac(Reader reader) throws IOException;
+
+ protected abstract void importSuperPacContrib(Reader reader) throws IOException;
+
+ protected abstract void importSuperPacExpend(Reader reader) throws IOException;
+
+ protected abstract void importCandidates(Reader reader) throws IOException;
+
+ protected abstract void importContrib(Reader reader) throws IOException;
+
+ protected abstract void importRelationships(Reader reader) throws IOException;
+
+ protected abstract void importIndex(String indexName, BatchInserterIndex index, Reader reader) throws IOException;
+
+ protected BatchInserterIndex nodeIndexFor(String indexName, String indexType) {
+ return lucene.nodeIndex(indexName, configFor(indexType));
+ }
+
+ protected BatchInserterIndex relationshipIndexFor(String indexName, String indexType) {
+ return lucene.relationshipIndex(indexName, configFor(indexType));
+ }
+
+ protected Map<String, String> configFor(String indexType) {
+ return indexType.equals("fulltext") ? FULLTEXT_CONFIG : EXACT_CONFIG;
+ }
+
+ static class RelType implements RelationshipType {
+ String name;
+
+ public RelType update(Object value) {
+ this.name = value.toString();
+ return this;
+ }
+
+ public String name() {
+ return name;
+ }
+ }
+
+ public enum Type {
+ BOOLEAN {
+ @Override
+ public Object convert(String value) {
+ return Boolean.valueOf(value);
+ }
+ },
+ INT {
+ @Override
+ public Object convert(String value) {
+ return Integer.valueOf(value);
+ }
+ },
+ LONG {
+ @Override
+ public Object convert(String value) {
+ return Long.valueOf(value);
+ }
+ },
+ DOUBLE {
+ @Override
+ public Object convert(String value) {
+ return Double.valueOf(value);
+ }
+ },
+ FLOAT {
+ @Override
+ public Object convert(String value) {
+ return Float.valueOf(value);
+ }
+ },
+ BYTE {
+ @Override
+ public Object convert(String value) {
+ return Byte.valueOf(value);
+ }
+ },
+ SHORT {
+ @Override
+ public Object convert(String value) {
+ return Short.valueOf(value);
+ }
+ },
+ CHAR {
+ @Override
+ public Object convert(String value) {
+ return value.charAt(0);
+ }
+ },
+ STRING {
+ @Override
+ public Object convert(String value) {
+ return value;
+ }
+ };
+
+ private static Type fromString(String typeString) {
+ if (typeString==null || typeString.isEmpty()) return Type.STRING;
+ try {
+ return valueOf(typeString.toUpperCase());
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Unknown Type "+typeString);
+ }
+ }
+
+ public abstract Object convert(String value);
+ }
+
+ protected long id(Object id) {
+ return Long.parseLong(id.toString());
+ }
+}
View
22 CODE/fecGraph/src/importer/Importer.java
@@ -197,17 +197,20 @@ public Data(String header, String delim, int offset) {
}
private int split(String line) {
- final StringTokenizer st = new StringTokenizer(line, delim,true);
+ // final StringTokenizer st = new StringTokenizer(line, delim,true);
+ final String[] values = line.split(delim);
+
// System.out.println(line);
+ if (values.length < lineSize) {
+ System.err.println("ERROR: line has fewer than expected fields (" + lineSize + ")");
+ System.err.println(line);
+ System.exit(1); // ABK TODO: manage error codes
+ }
int count=0;
for (int i = 0; i < lineSize; i++) {
- String value = st.nextToken();
- if (value.equals(delim)) {
- lineData[i] = null;
- } else {
- lineData[i] = value.trim().isEmpty() ? null : value;
- if (i< lineSize -1) st.nextToken();
- }
+ // String value = st.nextToken();
+ String value = values[i];
+ lineData[i] = value.trim().isEmpty() ? null : value;
if (i >= offset && lineData[i]!=null) {
data[count++]=fields[i];
data[count++]=types[i].convert(lineData[i]);
@@ -371,8 +374,9 @@ void importSuperPacContrib(Reader reader) throws IOException {
Map<String, Object> properties = MapUtil.map( "commID", strTemp[2]);
properties.put("donatingOrg", strTemp[3]);
properties.put("donorLast", strTemp[4]);
+ properties.put("donorFirst", strTemp[5]);
properties.put("donorState", strTemp[7]);
- properties.put("donorFullName", strTemp[15]);
+ // properties.put("donorFullName", strTemp[15]);
idxSuperPacContribs.add(pacCont,properties);
report.dots();
}
View
184 CODE/fecGraph/src/importer/Tool.java
@@ -0,0 +1,184 @@
+package org.followthedata.importer;
+
+import org.apache.commons.cli.*;
+import java.io.*;
+import java.util.*;
+import org.neo4j.kernel.impl.util.FileUtils;
+
+
+public class Tool {
+
+ enum ImportImplementors {
+ DFAUTH,
+ AKOLLEGGER
+ }
+
+ FecBatchImporter fecBatchImporter;
+
+ public Tool(FecBatchImporter fecBatchImporter)
+ {
+ this.fecBatchImporter = fecBatchImporter;
+ }
+
+ public void importAll(String[] committeeFilenames,
+ String[] candidateFilenames,
+ String[] individualFilenames,
+ String[] contributionFilenames,
+ String[] superPacFilenames,
+ String[] superPacContributionFilenames,
+ String[] superPacExpenditureFilenames)
+ throws IOException
+ {
+ try {
+ for (String committee : committeeFilenames) {
+ importCommitteesFrom(new File(committee));
+ }
+ for (String candidate : candidateFilenames) {
+ importCandidatesFrom(new File(candidate));
+ }
+ for (String individual : individualFilenames) {
+ importIndividualsFrom(new File(individual));
+ }
+ for (String contribution : contributionFilenames) {
+ importContributionsFrom(new File(contribution));
+ }
+ for (String superPac : superPacFilenames) {
+ importCommitteesFrom(new File(superPac));
+ }
+ for (String superPacContributions : superPacContributionFilenames) {
+ importSuperPacContributionsFrom(new File(superPacContributions));
+ }
+ for (String superPacExpenditure : superPacExpenditureFilenames) {
+ importSuperPacExpendituresFrom(new File(superPacExpenditure));
+ }
+
+ System.out.println("finished");
+ } finally {
+ fecBatchImporter.finish();
+ }
+ }
+
+ private void importCommitteesFrom(File committeeFile) throws IOException {
+ if (committeeFile.exists()) fecBatchImporter.importCommittees(new FileReader(committeeFile));
+ }
+ private void importCandidatesFrom(File candidateFile) throws IOException {
+ if (candidateFile.exists()) fecBatchImporter.importCandidates(new FileReader(candidateFile));
+ }
+ private void importIndividualsFrom(File individualFile) throws IOException {
+ if (individualFile.exists()) fecBatchImporter.importIndiv(new FileReader(individualFile),0); // ABK TODO - what's the flag for?
+ }
+ private void importContributionsFrom(File contributionFile) throws IOException {
+ if (contributionFile.exists()) fecBatchImporter.importContrib(new FileReader(contributionFile));
+ }
+ private void importSuperPacContributionsFrom(File superPacContributionFile) throws IOException {
+ if (superPacContributionFile.exists()) fecBatchImporter.importSuperPacContrib(new FileReader(superPacContributionFile));
+ }
+ private void importSuperPacExpendituresFrom(File superPacExpenditureFile) throws IOException {
+ if (superPacExpenditureFile.exists()) fecBatchImporter.importSuperPacExpend(new FileReader(superPacExpenditureFile));
+ }
+
+ public static void main(String[] args) {
+
+ Option help = new Option( "h", "help", false, "print this message" );
+ Option force = new Option( "f", "force", false, "force overwrite of existing database, if it exists" );
+ Option graphdb = new Option( "g", "graphdb", true, "location of graph database store directory (DEFAULT: fec.graphdb)" );
+ Option datadir = new Option("d", "data", true, "location of FEC data files (DEFAULT: DATA)");
+ Option importer = new Option("i", "importer", true, "name of importer to use for creating graph");
+
+ Options options = new Options();
+ options.addOption( help );
+ options.addOption( force );
+ options.addOption( graphdb );
+
+ CommandLineParser parser = new GnuParser();
+ try {
+ // parse the command line arguments
+ CommandLine line = parser.parse( options, args );
+
+ if (line.hasOption(help.getOpt())) {
+ HelpFormatter formatter = new HelpFormatter();
+ formatter.setWidth(120);
+ formatter.printHelp( "fec2graph", options );
+ }
+
+ File graphdbDirectory = new File(line.getOptionValue(graphdb.getOpt(), "fec.graphdb"));
+ if (graphdbDirectory.exists()) {
+ if (line.hasOption("force")) {
+ try {
+ FileUtils.deleteRecursively(graphdbDirectory);
+ } catch (IOException ioe) {
+ System.err.println("Failed to clear datbase directory " + graphdbDirectory.getPath() + " because: " + ioe.getMessage());
+ System.exit(1);
+ }
+ } else {
+ // database exists, without force
+ System.err.println("WARNING: Graph database exists at " + graphdbDirectory.getPath());
+ System.err.println("\tUse --force to overwrite. Aborting.");
+ System.exit(2);
+ }
+ }
+
+
+ File dataDir = new File(line.getOptionValue(datadir.getOpt(), "DATA"));
+ if (!dataDir.exists()) {
+ System.err.println("ERROR: FEC data file does not exist at " + dataDir.getPath() + ". Aborting.");
+ System.exit(3);
+ }
+
+ String[] committees = new String[] {
+ dataDir.getPath() + File.separator + "committee.dta"
+ };
+ String[] candidates = new String[] {
+ dataDir.getPath() + File.separator + "candidate.dta"
+ };
+ String[] individuals = new String[] {
+ dataDir.getPath() + File.separator + "indivContrib1.dta",
+ dataDir.getPath() + File.separator + "indivContrib2.dta"
+ };
+ String[] contributions = new String[] {
+ dataDir.getPath() + File.separator + "allIndivContrib1.dta",
+ dataDir.getPath() + File.separator + "allIndivContrib2.dta",
+ dataDir.getPath() + File.separator + "allIndivContrib3.dta",
+ dataDir.getPath() + File.separator + "allIndivContrib4.dta",
+ dataDir.getPath() + File.separator + "allIndivContrib5.dta"
+ };
+ String[] superPacs = new String[] {
+ dataDir.getPath() + File.separator + "superPacList.dta"
+ };
+ String[] superPacContributions = new String[] {
+ dataDir.getPath() + File.separator + "superPacDonors.dta"
+ };
+ String[] superPacExpenditures = new String[] {
+ dataDir.getPath() + File.separator + "superPacExpend.dta"
+ };
+
+ // Pick a batch-importer implementation
+ ImportImplementors selectedImplementor = ImportImplementors.valueOf(line.getOptionValue(importer.getOpt(), "AKOLLEGGER").toUpperCase());
+ FecBatchImporter selectedImporter = null;
+ switch (selectedImplementor) {
+ // case DFAUTH: selectedImporter = new Importer(graphDbDirectory);
+ default: selectedImporter = new AbkImporter(graphdbDirectory);
+ }
+
+ // run the batch import
+ System.out.println("Importing raw data from " + dataDir.getPath() + " to graph at " + graphdbDirectory.getPath() + " using " + selectedImplementor);
+ Tool fec2graph = new Tool(selectedImporter);
+
+ fec2graph.importAll(
+ committees,
+ candidates,
+ individuals,
+ contributions,
+ superPacs,
+ superPacContributions,
+ superPacExpenditures
+ );
+ }
+ catch( ParseException exp ) {
+ System.err.println( "Parsing failed. Reason: " + exp.getMessage() );
+ }
+ catch (IOException ioe) {
+ System.err.println( "Import failed, because: " + ioe.getMessage() );
+ }
+ }
+}
View
74 build.xml
@@ -1,8 +1,18 @@
-<project name="fecGraph" default="go" xmlns:ivy="antlib:org.apache.ivy.ant">
+<project name="fec-graph" default="go" xmlns:ivy="antlib:org.apache.ivy.ant">
+
+ <property name="project.version" value="1.0"/>
+ <property name="project.main.class" value="org.followthedata.importer.Tool"/>
+ <property name="project.jar" value="${ant.project.name}-${project.version}.jar"/>
<property name="lib.dir" value="CODE/fecGraph/lib" />
<property name="build.dir" value="CODE/fecGraph/bin" />
<property name="src.dir" value="CODE/fecGraph/src" />
+ <property name="bin.dir" value="bin" />
+
+ <property name="neo4j.home.dir" location="neo4j-home" />
+ <property name="neo4j.data.dir" location="fec.graphdb" />
+ <property name="neo4j.version" value="1.8.RC1" />
+ <property name="neo4j.server" value="neo4j-community-${neo4j.version}" />
<!-- paths used for compilation and run -->
<path id="lib.path.id">
@@ -38,7 +48,7 @@
<target name="go" depends="install-ivy"
description="--> resolve dependencies, compile and run the project">
- <ivy:retrieve pattern="${lib.dir}/${ivy.retrieve.pattern}"/>
+ <ivy:retrieve pattern="${lib.dir}/[artifact]-[revision].[ext]" sync="true"/>
<echo message="compiling..."/>
<mkdir dir="${build.dir}" />
@@ -46,6 +56,23 @@
</target>
+ <target name="jar" depends="go">
+ <jar destfile="${lib.dir}/${project.jar}" basedir="${build.dir}">
+ <manifest>
+ <attribute name="Main-Class" value="${project.main.class}"/>
+ </manifest>
+ </jar>
+ </target>
+
+ <target name="tool" depends="jar">
+ <mkdir dir="${bin.dir}" />
+ <echo file="${bin.dir}/fec2graph">#!/usr/bin/env bash
+exec java -cp "${lib.dir}/*" ${project.main.class} "$@"
+ </echo>
+ <chmod file="${bin.dir}/fec2graph" perm="+x"/>
+ </target>
+
+
<!-- =================================
target: clean
@@ -53,6 +80,7 @@
<target name="clean" description="--> clean the project">
<delete includeemptydirs="true" quiet="true">
<fileset dir="${build.dir}" />
+ <fileset dir="${bin.dir}" />
</delete>
</target>
@@ -70,4 +98,46 @@
description="--> clean the ivy cache">
<ivy:cleancache />
</target>
+
+
+ <!-- =================================
+ target: neo4j-install
+ ================================= -->
+ <target name="-check-neo4j-install">
+ <available file="${neo4j.server}/bin/neo4j" property="neo4j.installed"/>
+ </target>
+
+ <target name="neo4j-install" depends="-check-neo4j-install" description="downloads Neo4j server" unless="${neo4j.installed}">
+ <get src="http://dist.neo4j.org/${neo4j.server}-unix.tar.gz" skipexisting="true" dest="."/>
+ <gunzip src="${neo4j.server}-unix.tar.gz"/>
+ <untar src="${neo4j.server}-unix.tar" dest="." />
+ <delete file="${neo4j.server}-unix.tar" />
+ <chmod file="${neo4j.server}/bin/neo4j" perm="+x"/>
+ </target>
+
+ <target name="neo4j-config" depends="neo4j-install" description="updates Neo4j configuration to use imported FEC graph database">
+ <exec executable="sed">
+ <arg value="-e"/>
+ <arg value="/org\.neo4j\.server\.database\.location/d"/>
+ <arg value="-i"/>
+ <arg value=""/>
+ <arg value="${neo4j.server}/conf/neo4j-server.properties"/>
+ </exec>
+ <echo file="${neo4j.server}/conf/neo4j-server.properties" append="true">org.neo4j.server.database.location=${neo4j.data.dir}
+ </echo>
+ </target>
+
+ <target name="neo4j-start" depends="neo4j-config">
+ <exec executable="${neo4j.server}/bin/neo4j">
+ <arg value="start" />
+ </exec>
+ </target>
+
+
+ <target name="neo4j-stop" depends="neo4j-config">
+ <exec executable="${neo4j.server}/bin/neo4j">
+ <arg value="stop" />
+ </exec>
+ </target>
+
</project>
View
1  ivy.xml
@@ -2,6 +2,7 @@
<info organisation="org.apache" module="hello-ivy"/>
<dependencies>
<dependency org="org.apache.geronimo.specs" name="geronimo-jta_1.1_spec" rev="1.1.1" />
+ <dependency org="commons-cli" name="commons-cli" rev="1.2" />
<dependency org="org.neo4j" name="neo4j" rev="1.8.M07" />
</dependencies>
</ivy-module>
Please sign in to comment.
Something went wrong with that request. Please try again.