From e9bf10662714ebbc7e49c507d6ecc8b2c88563ba Mon Sep 17 00:00:00 2001
From: Sourcery AI <>
Date: Fri, 2 Sep 2022 15:50:16 +0000
Subject: [PATCH] 'Refactored by Sourcery'

---
 shoten/filters.py | 24 ++++++++++++------------
 shoten/shoten.py  |  5 ++---
 2 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/shoten/filters.py b/shoten/filters.py
index 37a2c57..5d4c6c0 100644
--- a/shoten/filters.py
+++ b/shoten/filters.py
@@ -185,7 +185,7 @@ def frequency_filter(vocab: Dict[str, Entry], max_perc: float=50, min_perc: floa
     return vocab
 
 
-def hyphenated_filter(vocab: Dict[str, Entry], perc: float=50, verbose: bool=False) -> Dict[str, Entry]:  # threshold in percent
+def hyphenated_filter(vocab: Dict[str, Entry], perc: float=50, verbose: bool=False) -> Dict[str, Entry]:    # threshold in percent
     '''Reduce dict size by deleting hyphenated tokens when the parts are frequent.'''
     deletions, old_len = [], len(vocab)
     myfreqs = np.array(sum_entries(vocab))
@@ -196,7 +196,7 @@ def hyphenated_filter(vocab: Dict[str, Entry], perc: float=50, verbose: bool=Fal
         if firstpart in vocab and sum_entry(vocab[firstpart]) > threshold or \
            secondpart in vocab and sum_entry(vocab[secondpart]) > threshold:
             deletions.append(word)
-    if verbose is True:
+    if verbose:
         print(sorted(deletions))
     for item in deletions:
         del vocab[item]
@@ -255,12 +255,10 @@ def freshness_filter(vocab: Dict[str, Entry], percentage: float=10) -> Dict[str,
         thresh = len(series)*(percentage/100)
         freshnessindex = sum(series[-ceil(thresh):])
         oldnessindex = sum(series[:ceil(thresh)])
-        if oldnessindex < datethreshold:
-            #if oldnessindex < np.percentile(series, percentage):
-            #    continue
-            if freshnessindex < np.percentile(series, percentage):
-                deletions.append(token)
-            # print(vocab[token], freshnessindex, oldnessindex, token)
+        if oldnessindex < datethreshold and freshnessindex < np.percentile(
+            series, percentage
+        ):
+            deletions.append(token)
     for item in deletions:
         del vocab[item]
     print_changes('freshness', old_len, len(vocab))
@@ -326,10 +324,12 @@ def sources_filter(vocab: Dict[str, Entry], myset: Set[str]) -> Dict[str, Entry]
 def wordlist_filter(vocab: Dict[str, Entry], mylist: List[str], keep_words: bool=False) -> Dict[str, Entry]:
     '''Keep or discard words present in the input list.'''
     intersection = set(vocab) & set(mylist)
-    if keep_words is False:
-        deletions = list(intersection)
-    else:
-        deletions = [w for w in vocab if w not in intersection]
+    deletions = (
+        [w for w in vocab if w not in intersection]
+        if keep_words
+        else list(intersection)
+    )
+
     old_len = len(vocab)
     for word in deletions:
         del vocab[word]
diff --git a/shoten/shoten.py b/shoten/shoten.py
index ac9f592..ce90575 100644
--- a/shoten/shoten.py
+++ b/shoten/shoten.py
@@ -36,8 +36,7 @@ def find_files(dirname: str, maxdiff: int) -> Iterator[str]:
     for thepath, _, files in walk(dirname):
         # check for dates in directory names
         if '-' in thepath:
-            match = DATESEARCH.search(thepath)
-            if match:
+            if match := DATESEARCH.search(thepath):
                 thisdiff = calc_timediff(match[0])
                 if thisdiff is not None and thisdiff > maxdiff:
                     continue
@@ -181,7 +180,7 @@ def read_file(filepath: str, *, maxdiff: int=1000, mindiff: int=0, authorregex:
             source = mytree.findtext('.//tei:publisher', namespaces=NSPACE)
     # headings
     headwords = set()
-    if details is True:
+    if details:
         bow = [' '.join(h.itertext()) for h in mytree.xpath('.//tei:fw|.//tei:head', namespaces=NSPACE)]
         headwords = {t for t in simple_tokenizer(' '.join(bow)) if is_relevant_input(t)}
     # process