/
settools.py
76 lines (46 loc) · 1.91 KB
/
settools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#! /usr/bin/env python3
from typing import *
T = TypeVar('T')
from itertools import filterfalse
from .seqtools import bestsubseqwithgap, filterbyother, enumeratesubseqswithgap
from .mathtools import safediv
def bestsubset(a: Set[T], key: Callable[[Iterable[T]], Any]) -> Set[T]:
return set(bestsubseqwithgap(list(a), key))
def setcover(whole: Iterable[T], covered: Iterable[Set[T]], key: Callable = len) -> Iterable[FrozenSet[T]]:
whole = set(whole)
covers = set(frozenset(x) for x in covered)
while whole and covers:
bestval, best = None, None
for curr in covers:
currtemp = curr & whole
if currtemp:
currval = key(currtemp)
if not bestval or currval > bestval:
bestval, best = currval, curr
if not best:
return
yield best
whole -= best
covers.remove(best)
def addtoset(s: Set[T], x: T) -> bool:
if x in s:
return False
s.add(x)
return True
def weightedjaccard(a: Any, b: Any, key: Callable[[Any], float] = sum) -> float:
x = key(a & b)
return safediv(x, key(a) + key(b) - x)
def jaccard(a: Set[T], b: Set[T]) -> float:
return weightedjaccard(a, b, key=len)
def multisetjaccard(a: Counter[T], b: Counter[T]) -> float:
return weightedjaccard(a, b, key=lambda c: sum(c.values()))
def dropsubsetsof(a: Iterable[Set[T]], b: Set[T]) -> Iterable[Set[T]]:
return filterfalse(lambda x: x <= b, a)
def dropsubsets(a: Iterable[Set[T]]) -> Iterable[Set[T]]:
return filterbyother(lambda x, y: not x <= y, a)
def dropsupersetsof(a: Iterable[Set[T]], b: Set[T]) -> Iterable[Set[T]]:
return filterfalse(lambda x: x >= b, a)
def dropsupersets(a: Iterable[Set[T]]) -> Iterable[Set[T]]:
return filterbyother(lambda x, y: not x >= y, a)
def enumeratesubsets(a: Set[T]) -> Iterable[Set[T]]:
return map(set, enumeratesubseqswithgap(a))