Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 279 lines (227 sloc) 10.617 kb
f16b361 Al Sweigart Initial commit.
authored
1 # Simple Substitution Cipher Breaker
2 # http://inventwithpython.com/codebreaker (BSD Licensed)
3
4 """
5 In this program, a "word pattern" is a description of which letters are
24fc561 Al Sweigart Changes from unit testing.
authored
6 repeated in a word. A word pattern is numbers delimited by periods.
7 The first letter to appear in the word is assigned 0, the second letter 1,
8 and so on. So the word pattern for 'cucumber' is '0.1.0.1.2.3.4.5' because the
9 first letter 'c' occurs as the first and third letter in the word
10 'cucumber'. So the pattern has '0' as the first and third number.
f16b361 Al Sweigart Initial commit.
authored
11
12 The pattern for 'abc' or 'cba' is '0.1.2'
13 The pattern for 'aaa' or 'bbb' is '0.0.0'
14 The pattern for 'hello' is '0.1.2.2.3'
24fc561 Al Sweigart Changes from unit testing.
authored
15 The pattern for 'advise' or 'closet' is '0.1.2.3.4.5' (they have only
16 unique letters in the word)
f16b361 Al Sweigart Initial commit.
authored
17
18 In this program, a "candidate" is a possible English word that a ciphertext
19 work can decrypt to.
20 For example, 'cucumber', 'mementos', and 'cocoanut' are candidates for the
21 ciphertext word 'JHJHWDOV' (because all of them have the pattern
22 '0.1.0.1.2.3.4.5')
23
24fc561 Al Sweigart Changes from unit testing.
authored
24 In this program, a "map" or "mapping" is a dictionary where the keys are
25 the letters in LETTERS (e.g. 'A', 'B', 'C', etc) and the values are lists
26 of letters that could possibly be the correct decryption. If the list is
27 blank, this means that it is unknown what this letter could decrypt to.
f16b361 Al Sweigart Initial commit.
authored
28 """
29
30 # The import statement for wordPatterns is further down.
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
31 import os, simpleSubCipher, re, copy
f16b361 Al Sweigart Initial commit.
authored
32
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
33 LETTERS = simpleSubCipher.LETTERS
f16b361 Al Sweigart Initial commit.
authored
34
35
36 def main():
37 message = 'SY L NLX SR PYYACAO L YLWJ EISWI UPAR LULSXRJ ISR SXRJSXWJR, IA ESMM RWCTJSXSZA SJ WMPRAMH, LXO TXMARR JIA AQSOAXWA SR PQACEIAMNSXU, IA ESMM CAYTRA JP FAMSAQA SJ. SY, PX JIA PJIAC ILXO, IA SR PYYACAO RPNAJISXU EISWI LYYPCOR L CALRPX YPC LWJSXU SX LWWPCOLXWA JP ISR SXRJSXWJR, IA ESMM LWWABJ SJ AQAX PX JIA RMSUIJARJ AQSOAXWA. JIA PCSUSX PY NHJIR SR AGBMLSXAO SX JISR ELH. -FACJCLXO CTRRAMM'
38
39 nonLettersPattern = re.compile('[^A-Z\s]')
40 ciphertext = nonLettersPattern.sub('', message.upper()).split()
41
42 # allCandidates is a dict with keys of a single ciphertext word, and
43 # values of the possible word patterns
44 # e.g. allCandidates == {'PYYACAO': ['alleged', 'ammeter', ...etc],
45 # 'EISWI': ['aerie', 'aging', 'algol', ...etc],
46 # 'LULSXRJ': ['abalone', 'abashed', ...etc],
47 # ...etc }
48 allCandidates = {}
49 for cipherWord in ciphertext:
50 pattern = getWordPattern(cipherWord)
51 if pattern not in wordPatterns.allPatterns:
52 continue
53 allCandidates[cipherWord] = copy.copy(wordPatterns.allPatterns[pattern])
54
55 # convert candidate words to uppercase
56 for i in range(len(allCandidates[cipherWord])):
57 allCandidates[cipherWord][i] = allCandidates[cipherWord][i].upper()
58
59 # determine the possible valid ciphertext translations
60 print('Breaking...')
91f8abb Al Sweigart First round of personal fixes. This is an unhelpful log message.
authored
61 # Python programs can be stopped at any time by pressing Ctrl-C (on
62 # Windows) or Ctrl-D (on Mac and Linux)
63 print('(Press Ctrl-C or Ctrl-D to quit at any time.)')
f16b361 Al Sweigart Initial commit.
authored
64 theMap = breakSimpleSub(getNewBlankMapping(), allCandidates)
65
66 # display the results to the user.
67 print('Done.')
68 print()
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
69 printMapping(theMap)
f16b361 Al Sweigart Initial commit.
authored
70 print()
71 print('Original ciphertext:')
72 print(message)
91f8abb Al Sweigart First round of personal fixes. This is an unhelpful log message.
authored
73 print()
f16b361 Al Sweigart Initial commit.
authored
74 print('Broken ciphertext:')
75 print(decryptWithMap(message, theMap))
76 print()
77
78 def getWordPattern(word):
79 # Returns a string of the pattern form of the given word.
24fc561 Al Sweigart Changes from unit testing.
authored
80 # e.g. '0.1.2.3.4.1.2.3.5.6' for 'DUSTBUSTER'
f16b361 Al Sweigart Initial commit.
authored
81 word = word.upper()
82 nextNum = 0
83 letterNums = {}
84 wordPattern = []
85
86 for letter in word:
87 if letter in letterNums:
88 wordPattern.append(str(letterNums[letter]))
89 else:
90 wordPattern.append(str(nextNum))
91 letterNums[letter] = nextNum
92 nextNum += 1
93 return '.'.join(wordPattern)
94
95
96
97 def breakSimpleSub(theMap, allCandidates):
98 # allCandidate's format:
99 # { 'cipherword1': ['candidate1a', 'candidate1b', ...],
100 # 'cipherword2': ['candidate2a', 'candidate2b', ...],
101 # ...}
102
103 for cipherWord in allCandidates.keys():
104 # get a new mapping for each ciphertext word
105 newMap = getNewBlankMapping()
106
107 # create a map that has all the letters' possible candidate
108 # decryptions added to it
109 for candidate in allCandidates[cipherWord]:
110 newMap = addMappings(newMap, cipherWord, candidate)
111
112 # intersect this new map with the existing map
113 theMap = intersectMappings(theMap, newMap)
114
115 # remove any solved letters from the other possible mappings
116 theMap = removeSolvedLettersFromMapping(theMap)
117
118 return theMap
119
120
121 def getNewBlankMapping():
122 # Returns a dict where the keys are single-character strings of the
123 # uppercase letters, and the values are blank lists.
124 # E.g. {'A': [], 'B': [], 'C': [], ...etc}
125 theMap = {}
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
126 for i in LETTERS:
f16b361 Al Sweigart Initial commit.
authored
127 theMap[i] = []
128 return theMap
129
130
131 def addMappings(theMap, cipherWord, candidate):
24fc561 Al Sweigart Changes from unit testing.
authored
132 # The theMap parameter is a "mapping" data structure that this
133 # function modifies. (See the comments at the top of this file.)
f16b361 Al Sweigart Initial commit.
authored
134 # The cipherWord parameter is a string value of the ciphertext word.
24fc561 Al Sweigart Changes from unit testing.
authored
135 # The candidate parameter is a possible English word that the
136 # cipherWord could decrypt to.
f16b361 Al Sweigart Initial commit.
authored
137
24fc561 Al Sweigart Changes from unit testing.
authored
138 # This function modifies theMap so that the mappings of the
139 # cipherWord's letters to the candidate's letters are added to theMap.
f16b361 Al Sweigart Initial commit.
authored
140
141 for i in range(len(cipherWord)):
142 if candidate[i] not in theMap[cipherWord[i]]:
143 theMap[cipherWord[i]].append(candidate[i])
144 return theMap
145
146
147 def intersectMappings(mapA, mapB):
148 # To intersect two maps, create a blank map, and that add only the
149 # candidate decryption letters if they exist in both maps.
150 result = getNewBlankMapping()
151 for i in mapA.keys():
152
24fc561 Al Sweigart Changes from unit testing.
authored
153 # An empty list means "any letter is possible". So just copy the
154 # other map entirely.
f16b361 Al Sweigart Initial commit.
authored
155 if mapA[i] == []:
156 result[i] = copy.copy(mapB[i])
157 elif mapB[i] == []:
158 result[i] = copy.copy(mapA[i])
159
160 else:
161 for j in mapA[i]:
162 if j in mapB[i]:
163 result[i].append(j)
164 return result
165
166
167 def removeSolvedLettersFromMapping(theMap):
24fc561 Al Sweigart Changes from unit testing.
authored
168 # Letters in the mapping that map to only one letter are consider
169 # "solved" and can be removed from the other letters.
170 # For example, if 'A' maps to possible letters ['M', 'N'], and 'B'
171 # maps to ['N'], then we know that 'B' must map to 'N', so we can
172 # remove 'N' from the list of what 'A' could map to. So 'A' then maps
173 # to ['M']. Note that now that 'A' maps to only one letter, we can
174 # remove 'M' from the list of possible mappings for every other
175 # letter. (This is why there is a loop that keeps reducing the map.)
f16b361 Al Sweigart Initial commit.
authored
176 previousSolvedLetters = []
177 solvedLetters = None
178 while previousSolvedLetters != solvedLetters:
179 # This loop will break when solvedLetters is not changed by the
180 # reduction process (and is the same as previousSolvedLetters).
181 previousSolvedLetters = solvedLetters
182 solvedLetters = []
183
24fc561 Al Sweigart Changes from unit testing.
authored
184 # solvedLetters will be a list of English letters that have one
185 # and only one possible mapping in theMap
f16b361 Al Sweigart Initial commit.
authored
186 for i in theMap:
187 if len(theMap[i]) == 1:
188 solvedLetters.append(theMap[i][0])
189
190 # If a letter is solved, than it cannot possibly be a possible
24fc561 Al Sweigart Changes from unit testing.
authored
191 # decryption letter for a different ciphertext letter, so we
192 # should remove it.
f16b361 Al Sweigart Initial commit.
authored
193 for i in theMap:
194 for s in solvedLetters:
195 if len(theMap[i]) != 1 and s in theMap[i]:
196 theMap[i].remove(s)
197
24fc561 Al Sweigart Changes from unit testing.
authored
198 # With a letter removed, it's possible that we may have reduced
199 # other ciphertext letters to one and only one solution, so keep
200 # looping until previousSolvedLetters == solvedLetters. At that
201 # point, we'll know we can't rmemove any more letters.
f16b361 Al Sweigart Initial commit.
authored
202 return theMap
203
204
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
205 def printMapping(theMap):
f16b361 Al Sweigart Initial commit.
authored
206 # Display a mapping data structure on the screen.
207 print('Mapping:')
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
208 print(' ' + ' '.join(list(LETTERS)))
209 print(' ' + ' '.join('=' * len(LETTERS)))
f16b361 Al Sweigart Initial commit.
authored
210
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
211 for i in range(len(LETTERS)):
f16b361 Al Sweigart Initial commit.
authored
212 print(' ', end='')
213 foundAnyLetters = False
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
214 for j in LETTERS:
f16b361 Al Sweigart Initial commit.
authored
215 # theMap[j] points to a list of single-character strings that
216 # are potential solutions for the ciphertext letter in j.
217 if len(theMap[j]) > i:
218 foundAnyLetters = True
219 print(theMap[j][i] + ' ', end='')
220 else:
221 print(' ', end='')
222 print()
223 if foundAnyLetters == False:
224 break
225
226
227 def decryptWithMap(ciphertext, theMap):
228 # This function will do a simple sub decryption of ciphertext with the
229 # information in theMap, instead of a simple sub key.
230
231 # First create a simple sub key from the theMap mapping.
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
232 key = ['x'] * len(LETTERS)
f16b361 Al Sweigart Initial commit.
authored
233 for letter in theMap.keys():
234 if len(theMap[letter]) == 1:
24fc561 Al Sweigart Changes from unit testing.
authored
235 # If only one possible letter mapping, add it to the key.
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
236 keyIndex = LETTERS.find(theMap[letter][0].upper())
f16b361 Al Sweigart Initial commit.
authored
237 key[keyIndex] = letter.upper()
238 else:
239 ciphertext = ciphertext.replace(letter, '_')
240 key = ''.join(key)
241
242 # Then decrypt the original ciphertext with this key and return the
243 # decryption.
244 return simpleSubCipher.decryptMessage(key, ciphertext)
245
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
246 def checkForWordPatternsPy():
24fc561 Al Sweigart Changes from unit testing.
authored
247 # If the wordPatterns.py file does not exist, create it based on the
248 # words in our dictionary text file, dictionary.txt.
249 # (Download this file from http://invpy.com/dictionary.txt)
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
250 if not os.path.exists('wordPatterns.py'):
251 import pprint # import the "pretty print" module
252 allPatterns = {}
f16b361 Al Sweigart Initial commit.
authored
253
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
254 fp = open('dictionary.txt')
255 wordList = fp.readlines()
256 fp.close()
f16b361 Al Sweigart Initial commit.
authored
257
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
258 for word in wordList:
24fc561 Al Sweigart Changes from unit testing.
authored
259 word = word.strip() # get rid of newline at the end
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
260 pattern = getWordPattern(word)
f16b361 Al Sweigart Initial commit.
authored
261
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
262 if pattern not in allPatterns:
263 allPatterns[pattern] = [word]
264 else:
265 allPatterns[pattern].append(word)
f16b361 Al Sweigart Initial commit.
authored
266
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
267 # This is code that writes code. The wordPatterns.py file contains
268 # one very, very large assignment statement.
269 fp = open('wordPatterns.py', 'w')
270 fp.write('allPatterns = ')
271 fp.write(pprint.pformat(allPatterns))
272 fp.close()
f16b361 Al Sweigart Initial commit.
authored
273
274 # Import our wordPatterns.py file.
c4fd6ab Al Sweigart Pylint-suggested fixes.
authored
275 checkForWordPatternsPy()
f16b361 Al Sweigart Initial commit.
authored
276 import wordPatterns
277
278 if __name__ == '__main__':
279 main()
Something went wrong with that request. Please try again.