This repository has been archived by the owner on Jun 23, 2023. It is now read-only.
/
graph.py
55 lines (45 loc) · 1.91 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from being import countTypes, groupMerge, matchTypeAndHasFields, windowMerge
from load import loadData
from pprint import pprint
import re
from save import project, steralize, save
from text import extractNames
def represent(v):
l = ", ".join([v["name"],v["address"],v["city"],v["state"]])
return l
def main():
print("Loading universe...")
universe = loadData()
#Solid matching
groupMerge(universe,
matchTypeAndHasFields("client",["name"]),
lambda v: [v["name"]],
description="Merged clients based on exact name match")
#Surprisingly solid
groupMerge(universe,
matchTypeAndHasFields("client",["name"]),
lambda v: [re.sub(" ","",v["name"])],
description="Merged clients based on exact match without spaces")
#Solid
groupMerge(universe,
matchTypeAndHasFields("client",["name"]),
lambda v: [re.sub("'","",v["name"])],
description="Merged clients based on exact match without 's")
#Most likely solid
groupMerge(universe,
matchTypeAndHasFields("client",["name"]),
lambda v: extractNames(v["name"]),
description="Merged clients based on extracted and cleaned name match"
)
#Not great
windowMerge(universe,
matchTypeAndHasFields("client",["name"]),
lambda v: extractNames(v["name"]),
5,
1,
pred=lambda v,w: v["state"] == w["state"] and v["city"] == w["city"] and v["address"] == w["address"],
description="Merged clients based on windowed extracted name matchs",
logging=represent)
project(universe,"clientnames.txt", lambda v: v["type"] == "client", represent)
if __name__ == "__main__":
main()