Permalink
Browse files

Hacking code improvements. Reorder some code to make it easier to exp…

…lain in the book. Now using a smaller (but more sensible) dictionary file.
  • Loading branch information...
1 parent 4a984cd commit 1b3897bfe9ed7b8328a9e35899ca7a4d5d1ae45b @asweigart committed Jan 24, 2013
Showing with 45,799 additions and 86,907 deletions.
  1. +45,402 −86,489 dictionary.txt
  2. +118 −139 simpleSubHacker.py
  3. +279 −279 vigenereHacker.py
View
131,891 dictionary.txt
45,402 additions, 86,489 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
257 simpleSubHacker.py
@@ -7,152 +7,115 @@
The first letter to appear in the word is assigned 0, the second letter 1,
and so on. So the word pattern for 'cucumber' is '0.1.0.1.2.3.4.5' because
the first letter 'c' occurs as the first and third letter in the word
-'cucumber'. So the pattern has '0' as the first and third number.
-
-The pattern for 'abc' or 'cba' is '0.1.2'
-The pattern for 'aaa' or 'bbb' is '0.0.0'
-The pattern for 'hello' is '0.1.2.2.3'
-The pattern for 'advise' or 'closet' is '0.1.2.3.4.5' (they have only
+'cucumber'. So the pattern has '0' as the first and third number. The 'u'
+occurs as the second and fourth letter, so '1' is used for the second and
+fourth number.
+The numbers are delimited by periods to separate them.
+
+The word pattern for 'abc' or 'cba' is '0.1.2'
+The word pattern for 'aaa' or 'bbb' is '0.0.0'
+The word pattern for 'hello' is '0.1.2.2.3'
+The word pattern for 'advise' or 'closet' is '0.1.2.3.4.5' (they have only
unique letters in the word)
In this program, a "candidate" is a possible English word that a
ciphertext work can decrypt to.
For example, 'cucumber', 'mementos', and 'cocoanut' are candidates for the
-ciphertext word 'JHJHWDOV' (because all of them have the pattern
+ciphertext word 'JHJHWDOV' (because all of these words have the pattern
'0.1.0.1.2.3.4.5')
-In this program, a "map" or "mapping" is a dictionary where the keys are
-the letters in LETTERS (e.g. 'A', 'B', 'C', etc) and the values are lists
-of letters that could possibly be the correct decryption. If the list is
-blank, this means that it is unknown what this letter could decrypt to.
+In this program, a "map" or "letter mapping" is a dictionary where the
+keys are single-letter strings (e.g. 'A', 'B', 'C', etc) and the values
+are lists of single-letter strings that could possibly be the correct
+decryption for the letter in the key. If the list is blank, this means
+that it is unknown what this letter could decrypt to.
"""
-import os, simpleSubCipher, re, copy, makeWordPatterns
+import os, re, copy, pprint, pyperclip, simpleSubCipher, makeWordPatterns
if not os.path.exists('wordPatterns.py'):
makeWordPatterns.main() # create the wordPatterns.py file
import wordPatterns
-LETTERS = simpleSubCipher.LETTERS
-
+LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+nonLettersOrSpacePattern = re.compile('[^A-Z\s]')
def main():
message = 'Sy l nlx sr pyyacao l ylwj eiswi upar lulsxrj isr sxrjsxwjr, ia esmm rwctjsxsza sj wmpramh, lxo txmarr jia aqsoaxwa sr pqaceiamnsxu, ia esmm caytra jp famsaqa sj. Sy, px jia pjiac ilxo, ia sr pyyacao rpnajisxu eiswi lyypcor l calrpx ypc lwjsxu sx lwwpcolxwa jp isr sxrjsxwjr, ia esmm lwwabj sj aqax px jia rmsuijarj aqsoaxwa. Jia pcsusx py nhjir sr agbmlsxao sx jisr elh. -Facjclxo Ctrramm'
- NONLETTERSPATTERN = re.compile('[^A-Z\s]')
- ciphertext = NONLETTERSPATTERN.sub('', message.upper()).split()
-
- # allCandidates is a dict with keys of a single ciphertext word, and
- # values of the possible word patterns
- # e.g. allCandidates == {'PYYACAO': ['alleged', 'ammeter', ...etc],
- # 'EISWI': ['aerie', 'aging', 'algol', ...etc],
- # 'LULSXRJ': ['abalone', 'abashed', ...etc],
- # ...etc }
- allCandidates = {}
- for cipherWord in ciphertext:
- pattern = makeWordPatterns.getWordPattern(cipherWord)
- if pattern not in wordPatterns.allPatterns:
- continue
- allCandidates[cipherWord] = copy.copy(wordPatterns.allPatterns[pattern])
-
- # convert candidate words to uppercase
- for i in range(len(allCandidates[cipherWord])):
- allCandidates[cipherWord][i] = allCandidates[cipherWord][i].upper()
-
- # determine the possible valid ciphertext translations
+ # Determine the possible valid ciphertext translations.
print('Hacking...')
- # Python programs can be stopped at any time by pressing Ctrl-C (on
- # Windows) or Ctrl-D (on Mac and Linux)
- print('(Press Ctrl-C or Ctrl-D to quit at any time.)')
- theMap = hackSimpleSub(getBlankMapping(), allCandidates)
+ letterMapping = hackSimpleSub(message)
- # display the results to the user.
- print('Done.')
- print()
- printMapping(theMap)
+ # Display the results to the user.
+ print('Mapping:')
+ pprint.pprint(letterMapping)
print()
print('Original ciphertext:')
print(message)
print()
- print('Hacked message:')
- print(decryptWithMap(message, theMap))
- print()
-
-
-def hackSimpleSub(theMap, allCandidates):
- # allCandidate's format:
- # { 'cipherword1': ['candidate1a', 'candidate1b', ...],
- # 'cipherword2': ['candidate2a', 'candidate2b', ...],
- # ...}
-
- for cipherWord in allCandidates.keys():
- # get a new mapping for each ciphertext word
- newMap = getBlankMapping()
-
- # create a map that has all the letters' possible candidate
- # decryptions added to it
- for candidate in allCandidates[cipherWord]:
- newMap = addLettersToMapping(newMap, cipherWord, candidate)
-
- # intersect this new map with the existing map
- theMap = intersectMappings(theMap, newMap)
-
- # remove any solved letters from the other possible mappings
- theMap = removeSolvedLettersFromMapping(theMap)
-
- return theMap
+ print('Copying hacked message to clipboard:')
+ hackedMessage = decryptWithLetterMapping(message, letterMapping)
+ pyperclip.copy(hackedMessage)
+ print(hackedMessage)
def getBlankMapping():
- # Returns a dict where the keys are single-character strings of the
- # uppercase letters, and the values are blank lists.
+ # Returns a dict where the keys are uppercase single-letter strings
+ # and the values are blank lists.
# E.g. {'A': [], 'B': [], 'C': [], ...etc}
- theMap = {}
+ #
+ # We will call the single-letter strings in the keys "cipher letters"
+ # and the single-letter strings in the value's list "possible
+ # decryption letters".
+ letterMapping = {}
for letter in LETTERS:
- theMap[letter] = []
- return theMap
+ letterMapping[letter] = []
+ return letterMapping
-def addLettersToMapping(theMap, cipherWord, candidate):
- # The theMap parameter is a "mapping" data structure that this
- # function modifies. (See the comments at the top of this file.)
+def addLettersToMapping(letterMapping, cipherWord, candidate):
+ # The letterMapping parameter is a "letter mapping" data structure
+ # that this function modifies.
# The cipherWord parameter is a string value of the ciphertext word.
# The candidate parameter is a possible English word that the
# cipherWord could decrypt to.
- # This function modifies theMap so that the mappings of the
- # cipherWord's letters to the candidate's letters are added to theMap.
+ # This function adds the letters of the candidate as possible new
+ # decryptions for the letters of the cipher word to the letter mapping
+ # data structure.
+ letterMapping = copy.deepcopy(letterMapping)
for i in range(len(cipherWord)):
- if candidate[i] not in theMap[cipherWord[i]]:
- theMap[cipherWord[i]].append(candidate[i])
- return theMap
+ if candidate[i] not in letterMapping[cipherWord[i]]:
+ letterMapping[cipherWord[i]].append(candidate[i])
+ return letterMapping
def intersectMappings(mapA, mapB):
# To intersect two maps, create a blank map, and that add only the
- # candidate decryption letters if they exist in both maps.
- intersectedMap = getBlankMapping()
- for letter in mapA.keys():
+ # possible decryption letters if they exist in BOTH maps.
+ intersectedMapping = getBlankMapping()
+ for letter in LETTERS:
- # An empty list means "any letter is possible". So just copy the
- # other map entirely.
+ # An empty list means "any letter is possible". In this case just
+ # copy the other map entirely.
if mapA[letter] == []:
- intersectedMap[letter] = copy.copy(mapB[letter])
+ intersectedMapping[letter] = copy.copy(mapB[letter])
elif mapB[letter] == []:
- intersectedMap[letter] = copy.copy(mapA[letter])
+ intersectedMapping[letter] = copy.copy(mapA[letter])
else:
# If a letter in mapA[letter] exists in mapB[letter], add
- # that letter to intersectedMap[letter].
+ # that letter to intersectedMapping[letter].
for mappedLetter in mapA[letter]:
if mappedLetter in mapB[letter]:
- intersectedMap[letter].append(mappedLetter)
- return intersectedMap
+ intersectedMapping[letter].append(mappedLetter)
+ return intersectedMapping
-def removeSolvedLettersFromMapping(theMap):
- # Letters in the mapping that map to only one letter are consider
+def removeSolvedLettersFromMapping(letterMapping):
+ # Cipher letters in the mapping that map to only one letter are
# "solved" and can be removed from the other letters.
# For example, if 'A' maps to possible letters ['M', 'N'], and 'B'
# maps to ['N'], then we know that 'B' must map to 'N', so we can
@@ -164,70 +127,86 @@ def removeSolvedLettersFromMapping(theMap):
solvedLetters = None
while previousSolvedLetters != solvedLetters:
# This loop will break when solvedLetters is not changed by the
- # reduction process (and is the same as previousSolvedLetters).
+ # reduction process inside this loop (meaning it is the same
+ # as previousSolvedLetters).
previousSolvedLetters = solvedLetters
solvedLetters = []
# solvedLetters will be a list of English letters that have one
- # and only one possible mapping in theMap
- for i in theMap:
- if len(theMap[i]) == 1:
- solvedLetters.append(theMap[i][0])
+ # and only one possible mapping in letterMapping
+ for letter in LETTERS:
+ if len(letterMapping[letter]) == 1:
+ solvedLetters.append(letterMapping[letter][0])
# If a letter is solved, than it cannot possibly be a possible
# decryption letter for a different ciphertext letter, so we
# should remove it.
- for i in theMap:
+ for letter in LETTERS:
for s in solvedLetters:
- if len(theMap[i]) != 1 and s in theMap[i]:
- theMap[i].remove(s)
+ if len(letterMapping[letter]) != 1 and s in letterMapping[letter]:
+ letterMapping[letter].remove(s)
# With a letter removed, it's possible that we may have reduced
# other ciphertext letters to one and only one solution, so keep
# looping until previousSolvedLetters == solvedLetters. At that
- # point, we'll know we can't rmemove any more letters.
- return theMap
+ # point, we'll know we can't remove any more letters.
+ return letterMapping
-def printMapping(theMap):
- # Display a mapping data structure on the screen.
- print('Mapping:')
- print(' ' + ' '.join(list(LETTERS)))
- print(' ' + ' '.join('=' * len(LETTERS)))
-
- for i in range(len(LETTERS)):
- print(' ', end='')
- foundAnyLetters = False
- for j in LETTERS:
- # theMap[j] points to a list of single-character strings that
- # are potential solutions for the ciphertext letter in j.
- if len(theMap[j]) > i:
- foundAnyLetters = True
- print(theMap[j][i] + ' ', end='')
- else:
- print(' ', end='')
- print()
- if foundAnyLetters == False:
- break
-
-
-def decryptWithMap(ciphertext, theMap):
- # This function will do a simple sub decryption of ciphertext with the
- # information in theMap, instead of a simple sub key.
-
- # First create a simple sub key from the theMap mapping.
+def hackSimpleSub(message):
+ letterMapping = getBlankMapping()
+
+ # allCandidates is a dict with keys of a single ciphertext word, and
+ # values of the possible word patterns.
+ # e.g. allCandidates == {'PYYACAO': ['alleged', 'ammeter', ...etc],
+ # 'EISWI': ['aerie', 'aging', 'algol', ...etc],
+ # 'LULSXRJ': ['abalone', 'abashed', ...etc],
+ # ...etc }
+ allCandidates = {}
+ message = nonLettersOrSpacePattern.sub('', message.upper()).split()
+ for cipherWord in message:
+ if cipherWord in allCandidates:
+ continue # we've already done this word, so continue
+ pattern = makeWordPatterns.getWordPattern(cipherWord)
+ if pattern not in wordPatterns.allPatterns:
+ continue
+ allCandidates[cipherWord] = copy.copy(wordPatterns.allPatterns[pattern])
+
+ for cipherWord in allCandidates.keys():
+ # Get a new mapping for each ciphertext word.
+ newMap = getBlankMapping()
+
+ # Create a map that has all the letters' possible candidate
+ # decryptions.
+ for candidate in allCandidates[cipherWord]:
+ newMap = addLettersToMapping(newMap, cipherWord, candidate)
+
+ # Intersect this new map with the existing map.
+ letterMapping = intersectMappings(letterMapping, newMap)
+
+ # Remove any solved letters from the other possible mappings.
+ letterMapping = removeSolvedLettersFromMapping(letterMapping)
+
+ return letterMapping
+
+
+def decryptWithLetterMapping(ciphertext, letterMapping):
+ # Return a string of the ciphertext decrypted with the letter mapping,
+ # with any ambiguous decrypted letters replaced with a _ underscore.
+
+ # First create a simple sub key from the letterMapping mapping.
key = ['x'] * len(LETTERS)
- for letter in theMap.keys():
- if len(theMap[letter]) == 1:
+ for letter in LETTERS:
+ if len(letterMapping[letter]) == 1:
# If only one possible letter mapping, add it to the key.
- keyIndex = LETTERS.find(theMap[letter][0].upper())
- key[keyIndex] = letter.upper()
+ keyIndex = LETTERS.find(letterMapping[letter][0])
+ key[keyIndex] = letter
else:
- ciphertext = ciphertext.replace(letter, '_')
+ ciphertext = ciphertext.replace(letter.lower(), '_')
+ ciphertext = ciphertext.replace(letter.upper(), '_')
key = ''.join(key)
- # Then decrypt the original ciphertext with this key and return the
- # decryption.
+ # With the key we've created, decrypt the message.
return simpleSubCipher.decryptMessage(key, ciphertext)
View
558 vigenereHacker.py
@@ -1,280 +1,280 @@
-# Vigenere Cipher Hacker
-# http://inventwithpython.com/hacking (BSD Licensed)
-
-import copy, math, itertools, re
-import vigenereCipher, pyperclip, freqFinder, detectEnglish
-LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-
-MAX_KEY_LENGTH = 16
-NUM_MOST_FREQ_LETTERS = 3
-SILENT_MODE = False
-FACTOR_CACHE = {} # a dictionary that stores lists of factors
-
-NONLETTERSPATTERN = re.compile('[^A-Z]')
-
-def main():
- # Instead of typing this ciphertext out, you can copy & paste it
- # from http://invpy.com/vigenereHacker.py
- ciphertext = """Adiz Avtzqeci Tmzubb wsa m Pmilqev halpqavtakuoi, lgouqdaf, kdmktsvmztsl, izr xoexghzr kkusitaaf. Vz wsa twbhdg ubalmmzhdad qz hce vmhsgohuqbo ox kaakulmd gxiwvos, krgdurdny i rcmmstugvtawz ca tzm ocicwxfg jf "stscmilpy" oid "uwydptsbuci" wabt hce Lcdwig eiovdnw. Bgfdny qe kddwtk qjnkqpsmev ba pz tzm roohwz at xoexghzr kkusicw izr vrlqrwxist uboedtuuznum. Pimifo Icmlv Emf DI, Lcdwig owdyzd xwd hce Ywhsmnemzh Xovm mby Cqxtsm Supacg (GUKE) oo Bdmfqclwg Bomk, Tzuhvif'a ocyetzqofifo ositjm. Rcm a lqys ce oie vzav wr Vpt 8, lpq gzclqab mekxabnittq tjr Ymdavn fihog cjgbhvnstkgds. Zm psqikmp o iuejqf jf lmoviiicqg aoj jdsvkavs Uzreiz qdpzmdg, dnutgrdny bts helpar jf lpq pjmtm, mb zlwkffjmwktoiiuix avczqzs ohsb ocplv nuby swbfwigk naf ohw Mzwbms umqcifm. Mtoej bts raj pq kjrcmp oo tzm Zooigvmz Khqauqvl Dincmalwdm, rhwzq vz cjmmhzd gvq ca tzm rwmsl lqgdgfa rcm a kbafzd-hzaumae kaakulmd, hce SKQ. Wi 1948 Tmzubb jgqzsy Msf Zsrmsv'e Qjmhcfwig Dincmalwdm vt Eizqcekbqf Pnadqfnilg, ivzrw pq onsaafsy if bts yenmxckmwvf ca tzm Yoiczmehzr uwydptwze oid tmoohe avfsmekbqr dn eifvzmsbuqvl tqazjgq. Pq kmolm m dvpwz ab ohw ktshiuix pvsaa at hojxtcbefmewn, afl bfzdakfsy okkuzgalqzu xhwuuqvl jmmqoigve gpcz ie hce Tmxcpsgd-Lvvbgbubnkq zqoxtawz, kciup isme xqdgo otaqfqev qz hce 1960k. Bgfdny'a tchokmjivlabk fzsmtfsy if i ofdmavmz krgaqqptawz wi 1952, wzmz vjmgaqlpad iohn wwzq goidt uzgeyix wi tzm Gbdtwl Wwigvwy. Vz aukqdoev bdsvtemzh rilp rshadm tcmmgvqg (xhwuuqvl uiehmalqab) vs sv mzoejvmhdvw ba dmikwz. Hpravs rdev qz 1954, xpsl whsm tow iszkk jqtjrw pug 42id tqdhcdsg, rfjm ugmbddw xawnofqzu. Vn avcizsl lqhzreqzsy tzif vds vmmhc wsa eidcalq; vds ewfvzr svp gjmw wfvzrk jqzdenmp vds vmmhc wsa mqxivmzhvl. Gv 10 Esktwunsm 2009, fgtxcrifo mb Dnlmdbzt uiydviyv, Nfdtaat Dmiem Ywiikbqf Bojlab Wrgez avdw iz cafakuog pmjxwx ahwxcby gv nscadn at ohw Jdwoikp scqejvysit xwd "hce sxboglavs kvy zm ion tjmmhzd." Sa at Haq 2012 i bfdvsbq azmtmd'g widt ion bwnafz tzm Tcpsw wr Zjrva ivdcz eaigd yzmbo Tmzubb a kbmhptgzk dvrvwz wa efiohzd."""
- hackedMessage = hackVigenere(ciphertext)
-
- if hackedMessage != None:
- print('Copying hacked message to clipboard:')
- print(hackedMessage)
- pyperclip.copy(hackedMessage)
- else:
- print('Failed to hack encryption.')
-
-
-
-def findRepeatSequences(ciphertext):
- # Goes through the ciphertext and finds any 3 to 5 letter sequences
- # that are repeated. Returns a dict with the keys of the sequence and
- # value of a list of spacings (number of letters between the repeats.)
-
- # Take out all of the non-letter characters from the ciphertext.
- letterList = [] # start with a blank list
- for letter in ciphertext:
- if letter.isalpha():
- letterList.append(letter) # only add letters to the list
- ciphertext = ''.join(letterList) # create one string from the list
-
- # Compile a list of seqLen-letter sequences found in the ciphertext.
- seqSpacings = {}
- for seqLen in range(3, 5):
- for seqStart in range(len(ciphertext) - seqLen):
- # Determine what the sequence is, and store it in seq
- seq = ciphertext[seqStart:seqStart+seqLen]
-
- # Look for this sequence in the rest of the ciphertext
- for i in range(seqStart + seqLen, len(ciphertext) - seqLen):
- if ciphertext[i:i + seqLen] == seq:
- # Found a repeated sequence.
- if seq not in seqSpacings:
- # First time a repeat was found, create a blank
- # list for it in seqSpacings.
- seqSpacings[seq] = []
-
- # Append the spacing distance between the repeated
- # sequence and the original sequence.
- seqSpacings[seq].append(i - seqStart)
- return seqSpacings
-
-
-def getFactors(num):
- # Returns a list of factors of num.
- # For example, getFactors(28) returns [2, 14, 4, 7]
-
- # If we've calculated the factors before, they'll be in FACTOR_CACHE.
- # In that case, just return a copy of the list of factors.
- if num in FACTOR_CACHE:
- return copy.copy(FACTOR_CACHE[num])
-
- factors = [] # the list of factors found
-
- # When finding factors, you only need to check the integers up to the
- # square root of the number.
- for i in range(2, int(math.sqrt(num))): # skip the factors 1 and num
- if num % i == 0:
- factors.append(i)
- factors.append(int(num / i))
-
- FACTOR_CACHE[num] = factors # add thist list to FACTOR_CACHE
-
- return copy.copy(factors) # return a copy of this list of factors
-
-
-def getMostCommonFactors(seqFactors):
- # First, get a count of many times a factor occurs in seqFactors
- factorCounts = {} # key is a factor, value is how often if occurs
- for seq in seqFactors:
- factorList = seqFactors[seq]
- for factor in factorList:
- if factor not in factorCounts:
- factorCounts[factor] = 0
- factorCounts[factor] += 1
-
- # Second, put the factor and its count into a tuple, and make a list
- # of these tuples so we can sort them.
- factorsByCount = []
- for factor in factorCounts:
- # exclude factors larger than MAX_KEY_LENGTH
- if factor < MAX_KEY_LENGTH:
- factorsByCount.append( (factor, factorCounts[factor]) )
-
- # sort the list by the factor count
- factorsByCount.sort(key=lambda x: x[1], reverse=True)
-
- # Third, go through the factorsByCount list and cut off the list
- # after you find a factor that is not within 50% of the size of the
- # previous factor count.
- markCount = factorsByCount[0][1]
- for i in range(1, len(factorsByCount)):
- if markCount * 0.5 > factorsByCount[i][1]:
- # set factorsByCount to thelist up to i (and cut the rest)
- factorsByCount = factorsByCount[:i]
- break
-
- return factorsByCount
-
-
-def getNthLetter(nth, keyLength, message):
- # Returns every Nth letter for each keyLength set of letters in text.
- # E.g. getNthLetter(1, 3, 'ABCABCABC') returns 'AAA'
- # getNthLetter(2, 3, 'ABCABCABC') returns 'BBB'
- # getNthLetter(3, 3, 'ABCABCABC') returns 'CCC'
- # getNthLetter(1, 5, 'ABCABCABC') returns 'AC'
-
- # Use a "regular expression" remove non-letters from the message.
- message = NONLETTERSPATTERN.sub('', message)
-
- i = nth - 1
- letters = []
- while i < len(message):
- letters.append(message[i])
- i += keyLength
- return ''.join(letters)
-
-
-def hackVigenere(ciphertext):
- # First, we need to do Kasiski Examination to figure out what the
- # length of the ciphertext's encryption key is.
- if not SILENT_MODE:
- print('Determining most likely key lengths with Kasiski Examination...')
-
- allLikelyKeyLengths = kasiskiExamination(ciphertext.upper())
- if not SILENT_MODE:
- print('Kasiski Examination results say the most likely key lengths are: ', end='')
- for keyLength in allLikelyKeyLengths:
- print('%s ' % (keyLength), end='')
- print()
- print()
-
- for keyLength in allLikelyKeyLengths:
- print('Attempting hack with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
- hackedMessage = attemptHackWithKeyLength(ciphertext.upper(), keyLength)
- if hackedMessage != None:
- break
-
- # If none of the key lengths we found using Kasiski Examination
- # worked, start brute forcing through key lengths.
- if hackedMessage == None:
- if not SILENT_MODE:
- print('Unable to hack message with likely key length(s). Brute forcing key length...')
- for keyLength in range(1, MAX_KEY_LENGTH + 1):
- # don't re-check key lengths already tried from Kasiski
- if keyLength not in allLikelyKeyLengths:
- if not SILENT_MODE:
- print('Attempting hack with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
- hackedMessage = attemptHackWithKeyLength(ciphertext.upper(), keyLength)
- if hackedMessage != None:
- break
-
- if hackedMessage != None:
- # Set the broken ciphertext to the original casing.
- origCase = []
- for i in range(len(ciphertext)):
- if ciphertext[i].isupper():
- origCase.append(hackedMessage[i].upper())
- else:
- origCase.append(hackedMessage[i].lower())
- hackedMessage = ''.join(origCase)
-
- return hackedMessage
-
-
-def kasiskiExamination(ciphertext):
- # Find out the sequences of 3 to 5 letters that occurr multiple times
- # in the ciphertext. repeatedSeqs has a value like:
- # {'EXG': [192], 'NAF': [339, 972, 633], ... }
- repeatedSeqs = findRepeatSequences(ciphertext)
-
- # seqFactors keys are sequences, values are list of factors of the
- # spacings. seqFactos has a value like: {'GFD': [2, 3, 4, 6, 9, 12,
- # 18, 23, 36, 46, 69, 92, 138, 207], 'ALW': [2, 3, 4, 6, ...], ...}
- seqFactors = {}
- for seq in repeatedSeqs:
- seqFactors[seq] = []
- for spacing in repeatedSeqs[seq]:
- seqFactors[seq].extend(getFactors(spacing))
-
- # factorsByCount is a list of tuples: (factor, factorCount)
- # factorsByCount has a value like: [(3, 497), (2, 487), (6, 453), ...]
- factorsByCount = getMostCommonFactors(seqFactors)
-
- # Now we extract the factor counts from factorsByCount and put them
- # in variables named allLikelyKeyLengths and allLikelyKeyLengthsStr
- # so that they are easier to use later.
- allLikelyKeyLengths = []
- for i in range(len(factorsByCount)):
- allLikelyKeyLengths.append(factorsByCount[i][0])
-
- return allLikelyKeyLengths
-
-
-def attemptHackWithKeyLength(ciphertext, mostLikelyKeyLength):
- # Determine the most likely letters for each letter in the key.
-
- # allFreqScores is a list of mostLikelyKeyLength number of lists.
- # These inner lists are the freqScores list.
- allFreqScores = []
- for nth in range(1, mostLikelyKeyLength + 1):
- nthLetters = getNthLetter(nth, mostLikelyKeyLength, ciphertext)
-
- # freqScores is a list of tuples like:
- # [(<letter>, <Eng. Freq. match score>), ... ]
- # This list is sorted by match score (a lower score means a better
- # match. See the englishFreqMatch() comments in freqFinder).
- freqScores = []
- for possibleKey in LETTERS:
- translated = vigenereCipher.decryptMessage(possibleKey, nthLetters)
- freqScores.append((possibleKey, freqFinder.englishFreqMatch(translated)))
-
- # Sort by match score
- freqScores.sort(key=lambda x: x[1], reverse=True)
-
- allFreqScores.append(freqScores[:NUM_MOST_FREQ_LETTERS])
-
- if not SILENT_MODE:
- for i in range(len(allFreqScores)):
- # use i+1 so the first letter is not called the "0th" letter
- print('Possible letters for letter %s of the key: ' % (i + 1), end='')
- for freqScore in allFreqScores[i]:
- print('%s ' % freqScore[0], end='')
- print()
-
- # Try every combination of the most likely letters for each position
- # in the key.
- for indexes in itertools.product(range(NUM_MOST_FREQ_LETTERS), repeat=mostLikelyKeyLength):
- # Create a possible key from the letters in allFreqScores
- possibleKey = ''
- for i in range(mostLikelyKeyLength):
- possibleKey += allFreqScores[i][indexes[i]][0]
-
- if not SILENT_MODE:
- print('Attempting with key: %s' % (possibleKey))
-
- decryptedText = vigenereCipher.decryptMessage(possibleKey, ciphertext)
-
- if freqFinder.englishTrigramMatch(decryptedText):
- if detectEnglish.isEnglish(decryptedText):
- # Check with the user to see if the key has been found.
- print()
- print('Possible encryption hack:')
- print('Key ' + str(possibleKey) + ': ' + decryptedText[:200])
- print()
- print('Enter D for done, or just press Enter to continue hacking:')
- response = input('> ')
-
- if response.strip().upper().startswith('D'):
- return decryptedText
-
- # No English-looking decryption found with any of the possible keys,
- # so return None.
- return None
-
-
-# If vigenereHacker.py is run (instead of imported as a module) call
-# the main() function.
-if __name__ == '__main__':
+# Vigenere Cipher Hacker
+# http://inventwithpython.com/hacking (BSD Licensed)
+
+import copy, math, itertools, re
+import vigenereCipher, pyperclip, freqAnalysis, detectEnglish
+LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+MAX_KEY_LENGTH = 16
+NUM_MOST_FREQ_LETTERS = 3
+SILENT_MODE = False
+FACTOR_CACHE = {} # a dictionary that stores lists of factors
+
+nonLettersPattern = re.compile('[^A-Z]')
+
+
+def main():
+ # Instead of typing this ciphertext out, you can copy & paste it
+ # from http://invpy.com/vigenereHacker.py
+ ciphertext = """Adiz Avtzqeci Tmzubb wsa m Pmilqev halpqavtakuoi, lgouqdaf, kdmktsvmztsl, izr xoexghzr kkusitaaf. Vz wsa twbhdg ubalmmzhdad qz hce vmhsgohuqbo ox kaakulmd gxiwvos, krgdurdny i rcmmstugvtawz ca tzm ocicwxfg jf "stscmilpy" oid "uwydptsbuci" wabt hce Lcdwig eiovdnw. Bgfdny qe kddwtk qjnkqpsmev ba pz tzm roohwz at xoexghzr kkusicw izr vrlqrwxist uboedtuuznum. Pimifo Icmlv Emf DI, Lcdwig owdyzd xwd hce Ywhsmnemzh Xovm mby Cqxtsm Supacg (GUKE) oo Bdmfqclwg Bomk, Tzuhvif'a ocyetzqofifo ositjm. Rcm a lqys ce oie vzav wr Vpt 8, lpq gzclqab mekxabnittq tjr Ymdavn fihog cjgbhvnstkgds. Zm psqikmp o iuejqf jf lmoviiicqg aoj jdsvkavs Uzreiz qdpzmdg, dnutgrdny bts helpar jf lpq pjmtm, mb zlwkffjmwktoiiuix avczqzs ohsb ocplv nuby swbfwigk naf ohw Mzwbms umqcifm. Mtoej bts raj pq kjrcmp oo tzm Zooigvmz Khqauqvl Dincmalwdm, rhwzq vz cjmmhzd gvq ca tzm rwmsl lqgdgfa rcm a kbafzd-hzaumae kaakulmd, hce SKQ. Wi 1948 Tmzubb jgqzsy Msf Zsrmsv'e Qjmhcfwig Dincmalwdm vt Eizqcekbqf Pnadqfnilg, ivzrw pq onsaafsy if bts yenmxckmwvf ca tzm Yoiczmehzr uwydptwze oid tmoohe avfsmekbqr dn eifvzmsbuqvl tqazjgq. Pq kmolm m dvpwz ab ohw ktshiuix pvsaa at hojxtcbefmewn, afl bfzdakfsy okkuzgalqzu xhwuuqvl jmmqoigve gpcz ie hce Tmxcpsgd-Lvvbgbubnkq zqoxtawz, kciup isme xqdgo otaqfqev qz hce 1960k. Bgfdny'a tchokmjivlabk fzsmtfsy if i ofdmavmz krgaqqptawz wi 1952, wzmz vjmgaqlpad iohn wwzq goidt uzgeyix wi tzm Gbdtwl Wwigvwy. Vz aukqdoev bdsvtemzh rilp rshadm tcmmgvqg (xhwuuqvl uiehmalqab) vs sv mzoejvmhdvw ba dmikwz. Hpravs rdev qz 1954, xpsl whsm tow iszkk jqtjrw pug 42id tqdhcdsg, rfjm ugmbddw xawnofqzu. Vn avcizsl lqhzreqzsy tzif vds vmmhc wsa eidcalq; vds ewfvzr svp gjmw wfvzrk jqzdenmp vds vmmhc wsa mqxivmzhvl. Gv 10 Esktwunsm 2009, fgtxcrifo mb Dnlmdbzt uiydviyv, Nfdtaat Dmiem Ywiikbqf Bojlab Wrgez avdw iz cafakuog pmjxwx ahwxcby gv nscadn at ohw Jdwoikp scqejvysit xwd "hce sxboglavs kvy zm ion tjmmhzd." Sa at Haq 2012 i bfdvsbq azmtmd'g widt ion bwnafz tzm Tcpsw wr Zjrva ivdcz eaigd yzmbo Tmzubb a kbmhptgzk dvrvwz wa efiohzd."""
+ hackedMessage = hackVigenere(ciphertext)
+
+ if hackedMessage != None:
+ print('Copying hacked message to clipboard:')
+ print(hackedMessage)
+ pyperclip.copy(hackedMessage)
+ else:
+ print('Failed to hack encryption.')
+
+
+def findRepeatSequences(ciphertext):
+ # Goes through the ciphertext and finds any 3 to 5 letter sequences
+ # that are repeated. Returns a dict with the keys of the sequence and
+ # value of a list of spacings (number of letters between the repeats.)
+
+ # Take out all of the non-letter characters from the ciphertext.
+ letterList = [] # start with a blank list
+ for letter in ciphertext:
+ if letter.isalpha():
+ letterList.append(letter) # only add letters to the list
+ ciphertext = ''.join(letterList) # create one string from the list
+
+ # Compile a list of seqLen-letter sequences found in the ciphertext.
+ seqSpacings = {}
+ for seqLen in range(3, 5):
+ for seqStart in range(len(ciphertext) - seqLen):
+ # Determine what the sequence is, and store it in seq
+ seq = ciphertext[seqStart:seqStart+seqLen]
+
+ # Look for this sequence in the rest of the ciphertext
+ for i in range(seqStart + seqLen, len(ciphertext) - seqLen):
+ if ciphertext[i:i + seqLen] == seq:
+ # Found a repeated sequence.
+ if seq not in seqSpacings:
+ # First time a repeat was found, create a blank
+ # list for it in seqSpacings.
+ seqSpacings[seq] = []
+
+ # Append the spacing distance between the repeated
+ # sequence and the original sequence.
+ seqSpacings[seq].append(i - seqStart)
+ return seqSpacings
+
+
+def getFactors(num):
+ # Returns a list of factors of num.
+ # For example, getFactors(28) returns [2, 14, 4, 7]
+
+ # If we've calculated the factors before, they'll be in FACTOR_CACHE.
+ # In that case, just return a copy of the list of factors.
+ if num in FACTOR_CACHE:
+ return copy.copy(FACTOR_CACHE[num])
+
+ factors = [] # the list of factors found
+
+ # When finding factors, you only need to check the integers up to the
+ # square root of the number.
+ for i in range(2, int(math.sqrt(num))): # skip the factors 1 and num
+ if num % i == 0:
+ factors.append(i)
+ factors.append(int(num / i))
+
+ FACTOR_CACHE[num] = factors # add thist list to FACTOR_CACHE
+
+ return copy.copy(factors) # return a copy of this list of factors
+
+
+def getMostCommonFactors(seqFactors):
+ # First, get a count of many times a factor occurs in seqFactors
+ factorCounts = {} # key is a factor, value is how often if occurs
+ for seq in seqFactors:
+ factorList = seqFactors[seq]
+ for factor in factorList:
+ if factor not in factorCounts:
+ factorCounts[factor] = 0
+ factorCounts[factor] += 1
+
+ # Second, put the factor and its count into a tuple, and make a list
+ # of these tuples so we can sort them.
+ factorsByCount = []
+ for factor in factorCounts:
+ # exclude factors larger than MAX_KEY_LENGTH
+ if factor < MAX_KEY_LENGTH:
+ factorsByCount.append( (factor, factorCounts[factor]) )
+
+ # sort the list by the factor count
+ factorsByCount.sort(key=freqAnalysis.getItemAtIndexOne, reverse=True)
+
+ # Third, go through the factorsByCount list and cut off the list
+ # after you find a factor that is not within 50% of the size of the
+ # previous factor count.
+ markCount = factorsByCount[0][1]
+ for i in range(1, len(factorsByCount)):
+ if markCount * 0.5 > factorsByCount[i][1]:
+ # set factorsByCount to thelist up to i (and cut the rest)
+ factorsByCount = factorsByCount[:i]
+ break
+
+ return factorsByCount
+
+
+def kasiskiExamination(ciphertext):
+ # Find out the sequences of 3 to 5 letters that occurr multiple times
+ # in the ciphertext. repeatedSeqs has a value like:
+ # {'EXG': [192], 'NAF': [339, 972, 633], ... }
+ repeatedSeqs = findRepeatSequences(ciphertext)
+
+ # seqFactors keys are sequences, values are list of factors of the
+ # spacings. seqFactos has a value like: {'GFD': [2, 3, 4, 6, 9, 12,
+ # 18, 23, 36, 46, 69, 92, 138, 207], 'ALW': [2, 3, 4, 6, ...], ...}
+ seqFactors = {}
+ for seq in repeatedSeqs:
+ seqFactors[seq] = []
+ for spacing in repeatedSeqs[seq]:
+ seqFactors[seq].extend(getFactors(spacing))
+
+ # factorsByCount is a list of tuples: (factor, factorCount)
+ # factorsByCount has a value like: [(3, 497), (2, 487), (6, 453), ...]
+ factorsByCount = getMostCommonFactors(seqFactors)
+
+ # Now we extract the factor counts from factorsByCount and put them
+ # in variables named allLikelyKeyLengths and allLikelyKeyLengthsStr
+ # so that they are easier to use later.
+ allLikelyKeyLengths = []
+ for i in range(len(factorsByCount)):
+ allLikelyKeyLengths.append(factorsByCount[i][0])
+
+ return allLikelyKeyLengths
+
+
+def getNthLetter(nth, keyLength, message):
+ # Returns every Nth letter for each keyLength set of letters in text.
+ # E.g. getNthLetter(1, 3, 'ABCABCABC') returns 'AAA'
+ # getNthLetter(2, 3, 'ABCABCABC') returns 'BBB'
+ # getNthLetter(3, 3, 'ABCABCABC') returns 'CCC'
+ # getNthLetter(1, 5, 'ABCABCABC') returns 'AC'
+
+ # Use a "regular expression" remove non-letters from the message.
+ message = nonLettersPattern.sub('', message)
+
+ i = nth - 1
+ letters = []
+ while i < len(message):
+ letters.append(message[i])
+ i += keyLength
+ return ''.join(letters)
+
+
+def attemptHackWithKeyLength(ciphertext, mostLikelyKeyLength):
+ # Determine the most likely letters for each letter in the key.
+
+ # allFreqScores is a list of mostLikelyKeyLength number of lists.
+ # These inner lists are the freqScores list.
+ allFreqScores = []
+ for nth in range(1, mostLikelyKeyLength + 1):
+ nthLetters = getNthLetter(nth, mostLikelyKeyLength, ciphertext)
+
+ # freqScores is a list of tuples like:
+ # [(<letter>, <Eng. Freq. match score>), ... ]
+ # This list is sorted by match score (a lower score means a better
+ # match. See the englishFreqMatchScore() comments in freqAnalysis).
+ freqScores = []
+ for possibleKey in LETTERS:
+ translated = vigenereCipher.decryptMessage(possibleKey, nthLetters)
+ freqScores.append((possibleKey, freqAnalysis.englishFreqMatchScore(translated)))
+
+ # Sort by match score
+ freqScores.sort(key=freqAnalysis.getItemAtIndexOne, reverse=True)
+
+ allFreqScores.append(freqScores[:NUM_MOST_FREQ_LETTERS])
+
+ if not SILENT_MODE:
+ for i in range(len(allFreqScores)):
+ # use i+1 so the first letter is not called the "0th" letter
+ print('Possible letters for letter %s of the key: ' % (i + 1), end='')
+ for freqScore in allFreqScores[i]:
+ print('%s ' % freqScore[0], end='')
+ print()
+
+ # Try every combination of the most likely letters for each position
+ # in the key.
+ for indexes in itertools.product(range(NUM_MOST_FREQ_LETTERS), repeat=mostLikelyKeyLength):
+ # Create a possible key from the letters in allFreqScores
+ possibleKey = ''
+ for i in range(mostLikelyKeyLength):
+ possibleKey += allFreqScores[i][indexes[i]][0]
+
+ if not SILENT_MODE:
+ print('Attempting with key: %s' % (possibleKey))
+
+ decryptedText = vigenereCipher.decryptMessage(possibleKey, ciphertext)
+
+ if freqAnalysis.englishTrigramMatch(decryptedText):
+ if detectEnglish.isEnglish(decryptedText):
+ # Check with the user to see if the key has been found.
+ print()
+ print('Possible encryption hack:')
+ print('Key ' + str(possibleKey) + ': ' + decryptedText[:200])
+ print()
+ print('Enter D for done, or just press Enter to continue hacking:')
+ response = input('> ')
+
+ if response.strip().upper().startswith('D'):
+ return decryptedText
+
+ # No English-looking decryption found with any of the possible keys,
+ # so return None.
+ return None
+
+
+def hackVigenere(ciphertext):
+ # First, we need to do Kasiski Examination to figure out what the
+ # length of the ciphertext's encryption key is.
+ if not SILENT_MODE:
+ print('Determining most likely key lengths with Kasiski Examination...')
+
+ allLikelyKeyLengths = kasiskiExamination(ciphertext.upper())
+ if not SILENT_MODE:
+ print('Kasiski Examination results say the most likely key lengths are: ', end='')
+ for keyLength in allLikelyKeyLengths:
+ print('%s ' % (keyLength), end='')
+ print()
+ print()
+
+ for keyLength in allLikelyKeyLengths:
+ print('Attempting hack with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
+ hackedMessage = attemptHackWithKeyLength(ciphertext.upper(), keyLength)
+ if hackedMessage != None:
+ break
+
+ # If none of the key lengths we found using Kasiski Examination
+ # worked, start brute forcing through key lengths.
+ if hackedMessage == None:
+ if not SILENT_MODE:
+ print('Unable to hack message with likely key length(s). Brute forcing key length...')
+ for keyLength in range(1, MAX_KEY_LENGTH + 1):
+ # don't re-check key lengths already tried from Kasiski
+ if keyLength not in allLikelyKeyLengths:
+ if not SILENT_MODE:
+ print('Attempting hack with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
+ hackedMessage = attemptHackWithKeyLength(ciphertext.upper(), keyLength)
+ if hackedMessage != None:
+ break
+
+ if hackedMessage != None:
+ # Set the broken ciphertext to the original casing.
+ origCase = []
+ for i in range(len(ciphertext)):
+ if ciphertext[i].isupper():
+ origCase.append(hackedMessage[i].upper())
+ else:
+ origCase.append(hackedMessage[i].lower())
+ hackedMessage = ''.join(origCase)
+
+ return hackedMessage
+
+
+# If vigenereHacker.py is run (instead of imported as a module) call
+# the main() function.
+if __name__ == '__main__':
main()

0 comments on commit 1b3897b

Please sign in to comment.