Modified mask handling for generating alphabets

dhondta · Apr 28, 2020 · ca02a31 · ca02a31
1 parent f044e4f
commit ca02a31
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 30 deletions.
diff --git a/codext/__common__.py b/codext/__common__.py
@@ -249,6 +249,9 @@ def _wrapper(param):
             if ignore_case is not None:
                 case_d = ["upper", "lower"][any(c in "".join(smapdict.values()) for c in "abcdefghijklmnopqrstuvwxyz")]
                 case_e = ["upper", "lower"][any(c in "".join(smapdict.keys()) for c in "abcdefghijklmnopqrstuvwxyz")]
+                i = ignore_case
+                smapdict = {getattr(k, case_e)() if i in ["both", "encode"] else k: \
+                            getattr(v, case_d)() if i in ["both", "decode"] else v for k, v in smapdict.items()}
             if decode:
                 tmp = {}
                 # this has a meaning for encoding maps that could have clashes in encoded chars (e.g. Bacon's cipher ;
@@ -456,13 +459,20 @@ def _wrapper(*args, **kwargs):
 # alphabet generation function from a given mask
 def get_alphabet_from_mask(mask):
     """
-    This function generates an alphabet from the given mask.
+    This function generates an alphabet from the given mask. The style used is similar to Hashcat ; group keys are
+     marked with a heading "?".
     """
-    alphabet = ""
-    for m in mask:
-        for c in MASKS.get(m, m):
-            if c not in alphabet:
-                alphabet += c
+    i, alphabet = 0, ""
+    while i < len(mask):
+        c = mask[i]
+        if c == "?" and i < len(mask) - 1 and mask[i+1] in MASKS.keys():
+            for c in MASKS[mask[i+1]]:
+                if c not in alphabet:
+                    alphabet += c
+            i += 1
+        elif c not in alphabet:
+            alphabet += c
+        i += 1
     return alphabet
 
 

diff --git a/codext/crypto/affine.py b/codext/crypto/affine.py
@@ -13,16 +13,10 @@
 
 
 def encmap_factory(mask=None):
-    mask = mask or "lus-1,2"
-    try:
-        mask, key = mask.split("-")
-        a, b = map(int, key.split(","))
-    except ValueError:
-        raise LookupError("Bad parameter for encoding 'affine': '{}'".format(mask))
-    try:
-        alphabet = get_alphabet_from_mask(mask)
-    except KeyError:
-        raise LookupError("Bad parameter for encoding 'affine': '{}'".format(mask))
+    mask = mask or "?l?u?s-1,2"
+    mask, key = mask.split("-")
+    a, b = map(int, key.split(","))
+    alphabet = get_alphabet_from_mask(mask)
     encmap = {c: alphabet[(a * alphabet.index(c) + b) % len(alphabet)] for c in alphabet}
     if len(set(encmap.keys())) != len(set(encmap.values())):
         raise LookupError("Bad parameter for encoding 'affine': {}, {}".format(a, b))

diff --git a/codext/crypto/atbash.py b/codext/crypto/atbash.py
@@ -13,11 +13,7 @@
 
 
 def encmap_factory(mask=None):
-    mask = mask or "lus"
-    try:
-        alphabet = get_alphabet_from_mask(mask)
-    except KeyError:
-        raise LookupError("Bad parameter for encoding 'atbash': '{}'".format(mask))
+    alphabet = get_alphabet_from_mask(mask or "?l?u?s")
     return {k: v for k, v in zip(alphabet, alphabet[::-1])}
 
 

diff --git a/docs/ciphers.md b/docs/ciphers.md
@@ -2,7 +2,7 @@
 
 !!! note "Available masks"
 
-    Some cipher codecs use character masks to generate their alphabets.
+    Some cipher codecs use character masks to generate their alphabets. Groups of characters are indicated using a headin "`?`".
 
     `a`: printable characters
     `b`: all 8-bits chars
@@ -15,6 +15,11 @@
     `u`: uppercase letters
 
     When combining masks, only one occurrence of each character is taken in the final alphabet.
+
+    So, for instance, the following masks yield the following alphabets:
+
+    - `?l?u?d?s`: "`abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 `"
+    - `?s.,?!?u?d`: "` .,?!ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789`"
 
 -----
 
@@ -24,20 +29,20 @@ This codec implements the Affine monoalphabetic substitution cipher. It is param
 
 **Codec** | **Conversions** | **Aliases** | **Comment**
 :---: | :---: | --- | ---
-`affine` | Affine <-> text | `affine` (uses default mask "`lus`" with `a=1` and `b=2`), `affine_cipher-luds-5,8`, `affine- .,?!ud-23,6`, ...
+`affine` | Affine <-> text | `affine` (uses default mask "`?l?u?s`" with `a=1` and `b=2`), `affine_cipher-?l?u?d?s-5,8`, `affine- .,?!?u?d-23,6`, ...
 
 ```python
 >>> codext.encode("this is a test", "affine")
 'vjkubkubcbvguv'
 >>> codext.decode("vjkubkubcbvguv", "affine")
 'this is a test'
->>> codext.encode("this is a test", "affine-luds-5,8")
+>>> codext.encode("this is a test", "affine-?l?u?d?s-5,8")
 'ORWJdWJdidOCJO'
->>> codext.decode("ORWJdWJdidOCJO", "affine-luds-5,8")
+>>> codext.decode("ORWJdWJdidOCJO", "affine-?l?u?d?s-5,8")
 'this is a test'
->>> codext.encode("THIS IS A TEST", "affine- .,?!ud-5,8")
+>>> codext.encode("THIS IS A TEST", "affine-?s.,?!?u?d-5,8")
 'AW1 D1 D2DAH A'
->>> codext.decode("AW1 D1 D2DAH A", "affine- .,?!ud-5,8")
+>>> codext.decode("AW1 D1 D2DAH A", "affine-?s.,?!?u?d-5,8")
 'THIS IS A TEST'
 ```
 
@@ -53,14 +58,14 @@ It implements the monoalphabetic substitution cipher used for the Hebrew alphabe
 
 **Codec** | **Conversions** | **Aliases** | **Comment**
 :---: | :---: | --- | ---
-`atbash` | Atbash <-> text | `atbash` (uses default mask "`lu`"), `atbash_cipher-lds`, ...
+`atbash` | Atbash <-> text | `atbash` (uses default mask "`lus`"), `atbash_cipher-?l?d?s`, ...
 
 ```python
 >>> codext.encode("this is a test", "atbash")
 'HTSIaSIa aHWIH'
->>> codext.encode("this is a test", "atbash-lups")
+>>> codext.encode("this is a test", "atbash-?l?u?p?s")
 '.^]/a]/a a.{/.'
->>> codext.decode(".^]/a]/a a.{/.", "atbash_cipher_lups")
+>>> codext.decode(".^]/a]/a a.{/.", "atbash_cipher_?l?u?p?s")
 'this is a test'
 ```