Permalink
Browse files

Merged graph.py and load.py together

In anticipation of resolving other data, graph.py and load.py were
merged together into a single file. Another file, something like
contributions.py, with a similar structure will exist soon enough.
  • Loading branch information...
Zack Maril
Zack Maril committed Jul 3, 2014
1 parent b428192 commit c9c0e3bf8b25dc628f077a1595eb1e6d622b7024
Showing with 65 additions and 69 deletions.
  1. +0 −65 bokonon/graph.py
  2. +65 −4 bokonon/{load.py → lobbying.py}
View
@@ -1,65 +0,0 @@
-from being import countTypes, groupMerge, matchTypeAndHasFields, windowMerge
-from load import loadData
-from pprint import pprint
-import re
-from save import project, steralize, save
-from text import extractNames
-
-def represent(i,v):
- l = ", ".join([v["name"],v["address"],v["city"],v["state"]])
- return l
-
-def main():
- print("Loading universe...")
- universe = loadData()
-
- #Solid matching
- groupMerge(universe,
- matchTypeAndHasFields("client",["name"]),
- lambda i,v: [v["name"]],
- description="Merged clients based on exact name match")
-
- #Surprisingly solid
- groupMerge(universe,
- matchTypeAndHasFields("client",["name"]),
- lambda i,v: [re.sub(" ","",v["name"])],
- description="Merged clients based on exact match without spaces")
-
- #Solid
- groupMerge(universe,
- matchTypeAndHasFields("client",["name"]),
- lambda i,v: [re.sub("'","",v["name"])],
- description="Merged clients based on exact match without 's")
-
- #Most likely solid
- groupMerge(universe,
- matchTypeAndHasFields("client",["name"]),
- lambda i,v: extractNames(v["name"]),
- description="Merged clients based on extracted and cleaned name match"
- )
-
- #Not so bad
- windowMerge(universe,
- matchTypeAndHasFields("client",["name"]),
- lambda v: extractNames(v["name"]),
- 5,
- 1,
- pred=lambda v,w: v["state"] == w["state"] and v["city"] == w["city"] and v["address"] == w["address"],
- description="Merged clients based on windowed extracted name matchs")
-
- project(universe,"clientnames.txt", lambda v: v["type"] == "client", represent)
-
- groupMerge(universe,
- matchTypeAndHasFields("lobbyist",["firstname","lastname"]),
- lambda i,v: [(v["firstname"].lower(),v["lastname"].lower())],
- description="Merged lobbyists based on exact name match")
-
- def repper(i,v):
- fid = filter(lambda x: x[2]['relation'] == "workedfor",universe.edges([i],data=True))[0][1]
- return (v["lastname"]+", "+v["firstname"]+":"+universe.node[fid]["orgname"]).lower()
-
- project(universe,"lobbyistnames.txt", lambda v: v["type"] == "lobbyist",repper)
-
-
-if __name__ == "__main__":
- main()
@@ -1,14 +1,16 @@
import codecs
import json
-from glob import glob
import multiprocessing
import networkx as nx
import os
import pickle
+import re
-
-from text import preProcess
-from being import addRecord
+from being import addRecord, countTypes, groupMerge, matchTypeAndHasFields, windowMerge
+from glob import glob
+from pprint import pprint
+from save import project, steralize, save
+from text import preProcess, extractNames
processed_files = 'processed_files'
processed_graph = 'processed_graph'
@@ -160,3 +162,62 @@ def loadData():
with open(processed_graph,"w") as f:
pickle.dump(universe,f,2)
return universe
+
+def represent(i,v):
+ l = ", ".join([v["name"],v["address"],v["city"],v["state"]])
+ return l
+
+def main():
+ print("Loading universe...")
+ universe = loadData()
+
+ #Solid matching
+ groupMerge(universe,
+ matchTypeAndHasFields("client",["name"]),
+ lambda i,v: [v["name"]],
+ description="Merged clients based on exact name match")
+
+ #Surprisingly solid
+ groupMerge(universe,
+ matchTypeAndHasFields("client",["name"]),
+ lambda i,v: [re.sub(" ","",v["name"])],
+ description="Merged clients based on exact match without spaces")
+
+ #Solid
+ groupMerge(universe,
+ matchTypeAndHasFields("client",["name"]),
+ lambda i,v: [re.sub("'","",v["name"])],
+ description="Merged clients based on exact match without 's")
+
+ #Most likely solid
+ groupMerge(universe,
+ matchTypeAndHasFields("client",["name"]),
+ lambda i,v: extractNames(v["name"]),
+ description="Merged clients based on extracted and cleaned name match"
+ )
+
+ #Not so bad
+ windowMerge(universe,
+ matchTypeAndHasFields("client",["name"]),
+ lambda v: extractNames(v["name"]),
+ 5,
+ 1,
+ pred=lambda v,w: v["state"] == w["state"] and v["city"] == w["city"] and v["address"] == w["address"],
+ description="Merged clients based on windowed extracted name matchs")
+
+ project(universe,"clientnames.txt", lambda v: v["type"] == "client", represent)
+
+ groupMerge(universe,
+ matchTypeAndHasFields("lobbyist",["firstname","lastname"]),
+ lambda i,v: [(v["firstname"].lower(),v["lastname"].lower())],
+ description="Merged lobbyists based on exact name match")
+
+ def repper(i,v):
+ fid = filter(lambda x: x[2]['relation'] == "workedfor",universe.edges([i],data=True))[0][1]
+ return (v["lastname"]+", "+v["firstname"]+":"+universe.node[fid]["orgname"]).lower()
+
+ project(universe,"lobbyistnames.txt", lambda v: v["type"] == "lobbyist",repper)
+
+
+if __name__ == "__main__":
+ main()

0 comments on commit c9c0e3b

Please sign in to comment.