Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 270 lines (214 sloc) 12.724 kb
f16b361 @asweigart Initial commit.
authored
1 # Vigenere Cipher Breaker
2 # http://inventwithpython.com/codebreaker (BSD Licensed)
3
4 import copy, math, itertools, re
5 import vigenereCipher, pyperclip, freqFinder, detectEnglish
6 LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
7
8 MAX_KEY_LENGTH = 16
9 NUM_MOST_FREQ_LETTERS = 3
10 SILENT_MODE = False
11 FACTOR_CACHE = {} # a dictionary that stores lists of factors
12
13 nonLettersPattern = re.compile('[^A-Z]')
14
15 def main():
16 # Instead of typing this ciphertext out, you can copy & paste it
24fc561 @asweigart Changes from unit testing.
authored
17 # from http://invpy.com/vigenereBreaker.py
f16b361 @asweigart Initial commit.
authored
18 ciphertext = """ADIZ AVTZQECI TMZUBB WSA M PMILQEV HALPQAVTAKUOI, LGOUQDAF, KDMKTSVMZTSL, IZR XOEXGHZR KKUSITAAF. VZ WSA TWBHDG UBALMMZHDAD QZ HCE VMHSGOHUQBO OX KAAKULMD GXIWVOS, KRGDURDNY I RCMMSTUGVTAWZ CA TZM OCICWXFG JF "STSCMILPY" OID "UWYDPTSBUCI" WABT HCE LCDWIG EIOVDNW. BGFDNY QE KDDWTK QJNKQPSMEV BA PZ TZM ROOHWZ AT XOEXGHZR KKUSICW IZR VRLQRWXIST UBOEDTUUZNUM. PIMIFO ICMLV EMF DI, LCDWIG OWDYZD XWD HCE YWHSMNEMZH XOVM MBY CQXTSM SUPACG (GUKE) OO BDMFQCLWG BOMK, TZUHVIF'A OCYETZQOFIFO OSITJM. RCM A LQYS CE OIE VZAV WR VPT 8, LPQ GZCLQAB MEKXABNITTQ TJR YMDAVN FIHOG CJGBHVNSTKGDS. ZM PSQIKMP O IUEJQF JF LMOVIIICQG AOJ JDSVKAVS UZREIZ QDPZMDG, DNUTGRDNY BTS HELPAR JF LPQ PJMTM, MB ZLWKFFJMWKTOIIUIX AVCZQZS OHSB OCPLV NUBY SWBFWIGK NAF OHW MZWBMS UMQCIFM. MTOEJ BTS RAJ PQ KJRCMP OO TZM ZOOIGVMZ KHQAUQVL DINCMALWDM, RHWZQ VZ CJMMHZD GVQ CA TZM RWMSL LQGDGFA RCM A KBAFZD-HZAUMAE KAAKULMD, HCE SKQ. WI 1948 TMZUBB JGQZSY MSF ZSRMSV'E QJMHCFWIG DINCMALWDM VT EIZQCEKBQF PNADQFNILG, IVZRW PQ ONSAAFSY IF BTS YENMXCKMWVF CA TZM YOICZMEHZR UWYDPTWZE OID TMOOHE AVFSMEKBQR DN EIFVZMSBUQVL TQAZJGQ. PQ KMOLM M DVPWZ AB OHW KTSHIUIX PVSAA AT HOJXTCBEFMEWN, AFL BFZDAKFSY OKKUZGALQZU XHWUUQVL JMMQOIGVE GPCZ IE HCE TMXCPSGD-LVVBGBUBNKQ ZQOXTAWZ, KCIUP ISME XQDGO OTAQFQEV QZ HCE 1960K. BGFDNY'A TCHOKMJIVLABK FZSMTFSY IF I OFDMAVMZ KRGAQQPTAWZ WI 1952, WZMZ VJMGAQLPAD IOHN WWZQ GOIDT UZGEYIX WI TZM GBDTWL WWIGVWY. VZ AUKQDOEV BDSVTEMZH RILP RSHADM TCMMGVQG (XHWUUQVL UIEHMALQAB) VS SV MZOEJVMHDVW BA DMIKWZ. HPRAVS RDEV QZ 1954, XPSL WHSM TOW ISZKK JQTJRW PUG 42ID TQDHCDSG, RFJM UGMBDDW XAWNOFQZU. VN AVCIZSL LQHZREQZSY TZIF VDS VMMHC WSA EIDCALQ; VDS EWFVZR SVP GJMW WFVZRK JQZDENMP VDS VMMHC WSA MQXIVMZHVL. GV 10 ESKTWUNSM 2009, FGTXCRIFO MB DNLMDBZT UIYDVIYV, NFDTAAT DMIEM YWIIKBQF BOJLAB WRGEZ AVDW IZ CAFAKUOG PMJXWX AHWXCBY GV NSCADN AT OHW JDWOIKP SCQEJVYSIT XWD "HCE SXBOGLAVS KVY ZM ION TJMMHZD." SA AT HAQ 2012 I BFDVSBQ AZMTMD'G WIDT ION BWNAFZ TZM TCPSW WR ZJRVA IVDCZ EAIGD YZMBO TMZUBB A KBMHPTGZK DVRVWZ WA EFIOHZD."""
91f8abb @asweigart First round of personal fixes. This is an unhelpful log message.
authored
19 brokenCiphertext = breakVigenere(ciphertext)
20
21 if brokenCiphertext != None:
22 print('Copying broken ciphertext to clipboard:')
c4fd6ab @asweigart Pylint-suggested fixes.
authored
23 print(brokenCiphertext)
91f8abb @asweigart First round of personal fixes. This is an unhelpful log message.
authored
24 pyperclip.copy(brokenCiphertext)
25 else:
26 print('Failed to break encryption.')
27
f16b361 @asweigart Initial commit.
authored
28
29
30 def findRepeatSequences(ciphertext):
24fc561 @asweigart Changes from unit testing.
authored
31 # Goes through the ciphertext and finds any 3 to 5 letter sequences
32 # that are repeated. Returns a dict with the keys of the sequence and
33 # value of a list of spacings (number of letters between the repeats.)
f16b361 @asweigart Initial commit.
authored
34
24fc561 @asweigart Changes from unit testing.
authored
35 # Take out all of the non-letter characters from the ciphertext.
f16b361 @asweigart Initial commit.
authored
36 letterList = [] # start with a blank list
37 for letter in ciphertext:
38 if letter.isalpha():
39 letterList.append(letter) # only add letters to the list
40 ciphertext = ''.join(letterList) # create one string from the list
41
42 # Compile a list of seqLen-letter sequences found in the ciphertext.
43 seqSpacings = {}
c4fd6ab @asweigart Pylint-suggested fixes.
authored
44 for seqLen in range(3, 5):
f16b361 @asweigart Initial commit.
authored
45 for seqStart in range(len(ciphertext) - seqLen):
46 # Determine what the sequence is, and store it in seq
47 seq = ciphertext[seqStart:seqStart+seqLen]
48
49 # Look for this sequence in the rest of the ciphertext
50 for i in range(seqStart + seqLen, len(ciphertext) - seqLen):
51 if ciphertext[i:i + seqLen] == seq:
52 # Found a repeated sequence.
53 if seq not in seqSpacings:
24fc561 @asweigart Changes from unit testing.
authored
54 # First time a repeat was found, create a blank
55 # list for it in seqSpacings.
f16b361 @asweigart Initial commit.
authored
56 seqSpacings[seq] = []
57
58 # Append the spacing distance between the repeated
59 # sequence and the original sequence.
60 seqSpacings[seq].append(i - seqStart)
61 return seqSpacings
62
63
64 def getFactors(num):
65 # Returns a list of factors of num.
66 # For example, getFactors(28) returns [2, 14, 4, 7]
67
24fc561 @asweigart Changes from unit testing.
authored
68 # If we've calculated the factors before, they'll be in FACTOR_CACHE.
f16b361 @asweigart Initial commit.
authored
69 # In that case, just return a copy of the list of factors.
70 if num in FACTOR_CACHE:
71 return copy.copy(FACTOR_CACHE[num])
72
73 factors = [] # the list of factors found
74
75 # When finding factors, you only need to check the integers up to the
76 # square root of the number.
77 for i in range(2, int(math.sqrt(num))): # skip the factors 1 and num
78 if num % i == 0:
79 factors.append(i)
80 factors.append(int(num / i))
81
82 FACTOR_CACHE[num] = factors # add thist list to FACTOR_CACHE
83
84 return copy.copy(factors) # return a copy of this list of factors
85
86
87 def getMostCommonFactors(seqFactors):
88 # First, get a count of many times a factor occurs in seqFactors
24fc561 @asweigart Changes from unit testing.
authored
89 factorCounts = {} # key is a factor, value is how often if occurs
f16b361 @asweigart Initial commit.
authored
90 for seq in seqFactors:
91 factorList = seqFactors[seq]
92 for factor in factorList:
93 if factor not in factorCounts:
94 factorCounts[factor] = 0
95 factorCounts[factor] += 1
96
24fc561 @asweigart Changes from unit testing.
authored
97 # Second, put the factor and its count into a tuple, and make a list
98 # of these tuples so we can sort them.
f16b361 @asweigart Initial commit.
authored
99 factorsByCount = []
100 for factor in factorCounts:
24fc561 @asweigart Changes from unit testing.
authored
101 # exclude factors larger than MAX_KEY_LENGTH
102 if factor < MAX_KEY_LENGTH:
f16b361 @asweigart Initial commit.
authored
103 factorsByCount.append( (factor, factorCounts[factor]) )
104
24fc561 @asweigart Changes from unit testing.
authored
105 # sort the list by the factor count
106 factorsByCount.sort(key=lambda x: x[1], reverse=True)
107
108 # Third, go through the factorsByCount list and cut off the list
109 # after you find a factor that is not within 50% of the size of the
110 # previous factor count.
f16b361 @asweigart Initial commit.
authored
111 markCount = factorsByCount[0][1]
112 for i in range(1, len(factorsByCount)):
113 if markCount * 0.5 > factorsByCount[i][1]:
24fc561 @asweigart Changes from unit testing.
authored
114 # set factorsByCount to thelist up to i (and cut the rest)
115 factorsByCount = factorsByCount[:i]
f16b361 @asweigart Initial commit.
authored
116 break
117
118 return factorsByCount
119
120
121 def getNthLetter(nth, keyLength, message):
122 # Returns every Nth letter for each keyLength set of letters in text.
123 # E.g. getNthLetter(1, 3, 'ABCABCABC') returns 'AAA'
124 # getNthLetter(2, 3, 'ABCABCABC') returns 'BBB'
125 # getNthLetter(3, 3, 'ABCABCABC') returns 'CCC'
126 # getNthLetter(1, 5, 'ABCABCABC') returns 'AC'
127
24fc561 @asweigart Changes from unit testing.
authored
128 # Use a "regular expression" remove non-letters from the message.
f16b361 @asweigart Initial commit.
authored
129 message = nonLettersPattern.sub('', message)
130
131 i = nth - 1
132 letters = []
133 while i < len(message):
134 letters.append(message[i])
135 i += keyLength
136 return ''.join(letters)
137
138
139 def breakVigenere(ciphertext):
24fc561 @asweigart Changes from unit testing.
authored
140 # First, we need to do Kasiski Examination to figure out what the
141 # length of the ciphertext's encryption key is.
d95e7da @asweigart Changes made while making unit tests.
authored
142 if not SILENT_MODE:
143 print('Determining most likely key lengths with Kasiski Examination...')
144
f16b361 @asweigart Initial commit.
authored
145 allLikelyKeyLengths = kasiskiExamination(ciphertext)
d95e7da @asweigart Changes made while making unit tests.
authored
146 if not SILENT_MODE:
147 print('Kasiski Examination results say the most likely key lengths are: ', end='')
148 for keyLength in allLikelyKeyLengths:
149 print('%s ' % (keyLength), end='')
150 print()
151 print()
f16b361 @asweigart Initial commit.
authored
152
153 for keyLength in allLikelyKeyLengths:
24fc561 @asweigart Changes from unit testing.
authored
154 print('Attempting break with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
f16b361 @asweigart Initial commit.
authored
155 brokenCiphertext = attemptBreakWithKeyLength(ciphertext, keyLength)
156 if brokenCiphertext != None:
157 break
158
24fc561 @asweigart Changes from unit testing.
authored
159 # If none of the key lengths we found using Kasiski Examination
160 # worked, start brute forcing through key lengths.
f16b361 @asweigart Initial commit.
authored
161 if brokenCiphertext == None:
d95e7da @asweigart Changes made while making unit tests.
authored
162 if not SILENT_MODE:
163 print('Unable to break message with likely key length(s). Brute forcing key length...')
f16b361 @asweigart Initial commit.
authored
164 for keyLength in range(1, MAX_KEY_LENGTH + 1):
24fc561 @asweigart Changes from unit testing.
authored
165 # don't re-check key lengths already tried from Kasiski
166 if keyLength not in allLikelyKeyLengths:
d95e7da @asweigart Changes made while making unit tests.
authored
167 if not SILENT_MODE:
168 print('Attempting break with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
f16b361 @asweigart Initial commit.
authored
169 brokenCiphertext = attemptBreakWithKeyLength(ciphertext, keyLength)
170 if brokenCiphertext != None:
171 break
172
91f8abb @asweigart First round of personal fixes. This is an unhelpful log message.
authored
173 return brokenCiphertext
f16b361 @asweigart Initial commit.
authored
174
175
176 def kasiskiExamination(ciphertext):
24fc561 @asweigart Changes from unit testing.
authored
177 # Find out the sequences of 3 to 5 letters that occurr multiple times
178 # in the ciphertext. repeatedSeqs has a value like:
179 # {'EXG': [192], 'NAF': [339, 972, 633], ... }
f16b361 @asweigart Initial commit.
authored
180 repeatedSeqs = findRepeatSequences(ciphertext)
181
24fc561 @asweigart Changes from unit testing.
authored
182 # seqFactors keys are sequences, values are list of factors of the
183 # spacings. seqFactos has a value like: {'GFD': [2, 3, 4, 6, 9, 12,
184 # 18, 23, 36, 46, 69, 92, 138, 207], 'ALW': [2, 3, 4, 6, ...], ...}
f16b361 @asweigart Initial commit.
authored
185 seqFactors = {}
186 for seq in repeatedSeqs:
187 seqFactors[seq] = []
188 for spacing in repeatedSeqs[seq]:
189 seqFactors[seq].extend(getFactors(spacing))
190
191 # factorsByCount is a list of tuples: (factor, factorCount)
24fc561 @asweigart Changes from unit testing.
authored
192 # factorsByCount has a value like: [(3, 497), (2, 487), (6, 453), ...]
f16b361 @asweigart Initial commit.
authored
193 factorsByCount = getMostCommonFactors(seqFactors)
194
24fc561 @asweigart Changes from unit testing.
authored
195 # Now we extract the factor counts from factorsByCount and put them
196 # in variables named allLikelyKeyLengths and allLikelyKeyLengthsStr
197 # so that they are easier to use later.
f16b361 @asweigart Initial commit.
authored
198 allLikelyKeyLengths = []
199 for i in range(len(factorsByCount)):
200 allLikelyKeyLengths.append(factorsByCount[i][0])
201
202 return allLikelyKeyLengths
203
204
205 def attemptBreakWithKeyLength(ciphertext, mostLikelyKeyLength):
206 # Determine the most likely letters for each letter in the key.
207
208 # allFreqScores is a list of mostLikelyKeyLength number of lists.
209 # These inner lists are the freqScores list.
210 allFreqScores = []
211 for nth in range(1, mostLikelyKeyLength + 1):
212 nthLetters = getNthLetter(nth, mostLikelyKeyLength, ciphertext)
213
24fc561 @asweigart Changes from unit testing.
authored
214 # freqScores is a list of tuples like:
215 # [(<letter>, <Eng. Freq. match score>), ... ]
f16b361 @asweigart Initial commit.
authored
216 # This list is sorted by match score (a lower score means a better
217 # match. See the englishFreqMatch() comments in freqFinder).
218 freqScores = []
219 for possibleKey in LETTERS:
d95e7da @asweigart Changes made while making unit tests.
authored
220 translated = vigenereCipher.decryptMessage(possibleKey, nthLetters)
f16b361 @asweigart Initial commit.
authored
221 freqScores.append((possibleKey, freqFinder.englishFreqMatch(translated)))
222
24fc561 @asweigart Changes from unit testing.
authored
223 # Sort by match score
f16b361 @asweigart Initial commit.
authored
224 freqScores.sort(key=lambda x: x[1], reverse=True)
225
226 allFreqScores.append(freqScores[:NUM_MOST_FREQ_LETTERS])
227
d95e7da @asweigart Changes made while making unit tests.
authored
228 if not SILENT_MODE:
229 for i in range(len(allFreqScores)):
24fc561 @asweigart Changes from unit testing.
authored
230 # use i+1 so the first letter is not called the "0th" letter
d95e7da @asweigart Changes made while making unit tests.
authored
231 print('Possible letters for letter %s of the key: ' % (i + 1), end='')
232 for freqScore in allFreqScores[i]:
233 print('%s ' % freqScore[0], end='')
234 print()
f16b361 @asweigart Initial commit.
authored
235
236 # Try every combination of the most likely letters for each position
237 # in the key.
238 for indexes in itertools.product(range(NUM_MOST_FREQ_LETTERS), repeat=mostLikelyKeyLength):
239 # Create a possible key from the letters in allFreqScores
240 possibleKey = ''
241 for i in range(mostLikelyKeyLength):
242 possibleKey += allFreqScores[i][indexes[i]][0]
243
244 if not SILENT_MODE:
245 print('Attempting with key: %s' % (possibleKey))
246
d95e7da @asweigart Changes made while making unit tests.
authored
247 decryptedText = vigenereCipher.decryptMessage(possibleKey, ciphertext)
f16b361 @asweigart Initial commit.
authored
248
249 if freqFinder.englishTrigramMatch(decryptedText):
250 if detectEnglish.isEnglish(decryptedText):
24fc561 @asweigart Changes from unit testing.
authored
251 # Check with the user to see if the key has been found.
f16b361 @asweigart Initial commit.
authored
252 print()
253 print('Possible encryption break:')
254 print('Key ' + str(possibleKey) + ': ' + decryptedText[:200])
255 print()
256 print('Enter D for done, or just press Enter to continue breaking:')
257 response = input('> ')
258
91f8abb @asweigart First round of personal fixes. This is an unhelpful log message.
authored
259 if response.strip().upper().startswith('D'):
f16b361 @asweigart Initial commit.
authored
260 return decryptedText
261
262 # No English-looking decryption found with any of the possible keys,
263 # so return None.
264 return None
265
266
91f8abb @asweigart First round of personal fixes. This is an unhelpful log message.
authored
267 # If vigenereBreaker.py is run (instead of imported as a module) call
268 # the main() function.
f16b361 @asweigart Initial commit.
authored
269 if __name__ == '__main__':
0a11366 @asweigart Minor changes.
authored
270 main()
Something went wrong with that request. Please try again.