Browse files

More general project function

save.py now has a more general project function for viewing fields of
nodes. That means no more carting around names inside beings.
  • Loading branch information...
1 parent b32ff16 commit 2d8fa529191877b95e477366303babe025db0075 Zack Maril committed Jun 26, 2014
Showing with 15 additions and 59 deletions.
  1. +2 −10 dedupe/being.py
  2. +13 −49 dedupe/save.py
View
12 dedupe/being.py
@@ -27,10 +27,6 @@ def mergeTheirBeings(universe,al,bl):
universe.remove_edge(v,b)
av = universe.node[a]
bv = universe.node[b]
- if "names" in av and "names" in bv:
- av["names"] = av["names"].union(bv["names"])
- elif "names" not in av and "names" in bv:
- av["names"] = bv["names"]
return al
def cullHermits(universe):
@@ -59,10 +55,6 @@ def groupMerge(universe, pred, extract,description=None):
for k,v in d.iteritems():
merged = reduce(lambda x,y: mergeTheirBeings(universe,x,y),v)
found = findBeing(universe,merged)
- if "names" in universe.node[found]:
- universe.node[found]["names"].add(k)
- else:
- universe.node[found]["names"] = set([k])
cullHermits(universe)
if description != None:
@@ -73,6 +65,6 @@ def groupMerge(universe, pred, extract,description=None):
print(txt)
print("")
-def matchTypeAndHaveField(t,f):
- return lambda v: v["type"] == t and v[f] != ""
+def matchTypeAndHasFields(t,fs):
+ return lambda v: v["type"] == t and all([v[f] != "" for f in fs])
View
62 dedupe/save.py
@@ -21,58 +21,22 @@ def save(universe):
print("Saved in {}".format(f))
-def project(universe):
- beings = filter(lambda x: x[1]["type"] == "Being", universe.nodes(data=True))x
- clients ={}
- firms = {}
+def project(universe,fo,pred,extract):
+ beings = filter(lambda x: x[1]["type"] == "Being", universe.nodes(data=True))
+ lst = []
for b in beings:
ns = nx.neighbors(universe,b[0])
- if universe.node[ns[0]]["type"] == "client":
- n = universe.node[b[0]]["names"]
- clients[n] = set(map(lambda x: universe.node[x]["name"], ns))
- if universe.node[ns[0]]["type"] == "firm":
- if len(ns) != 1:
- n = universe.node[b[0]]["names"]
- firms[n] = set(map(lambda x: universe.node[x]["orgname"], ns))
-
-
- print("Found {} unique clients".format(len(clients)))
- print("Found {} unique firms".format(len(firms)))
-
- print("Writing clientnames.txt")
- with open("clientnames.txt","w") as f:
- for k in sorted(clients.keys()):
- if len(clients[k]) == 1 and list(clients[k])[0] == k:
- f.write(list(clients[k])[0])
- elif len(clients[k]) == 1 and list(clients[k])[0] != k:
- f.write(list(clients[k])[0]+" ==> "+k)
- else:
- f.write(k)
- f.write('\n')
- f.write("%%%%%%%%%%%%%%%%%%%%")
+ if pred(universe.node[ns[0]]):
+ fs = list(set(map(lambda x: extract(universe.node[x]), ns)))
+ fs = sorted(fs)
+ lst.append(fs)
+
+ print("Writing {} with {} groups".format(fo,len(lst)))
+ with codecs.open(fo,"w",encoding="utf-8") as f:
+ for b in sorted(lst,key=lambda x:x[0].lower()):
+ for el in b:
+ f.write(el)
f.write('\n')
- for n in clients[k]:
- f.write(n)
- f.write('\n')
f.write('\n')
- f.write('\n')
-
- print("Writing firmnames.txt")
- with open("firmnames.txt","w") as f:
- for k in sorted(firms.keys()):
- if len(firms[k]) == 1 and list(firms[k])[0] == k:
- f.write(list(firms[k])[0])
- elif len(firms[k]) == 1 and list(firms[k])[0] != k:
- f.write(list(firms[k])[0]+" ==> "+k)
- else:
- f.write(k)
- f.write('\n')
- f.write("%%%%%%%%%%%%%%%%%%%%")
- f.write('\n')
- for n in firms[k]:
- f.write(n)
- f.write('\n')
f.write('\n')
- f.write('\n')
-

0 comments on commit 2d8fa52

Please sign in to comment.