forked from yanapermana/metadecryptor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vigenere3.py
280 lines (227 loc) · 11 KB
/
vigenere3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# Vigenere Cipher Hacker
# http://inventwithpython.com/hacking (BSD Licensed)
import itertools, re, sys
import vigenereCipher, freqAnalysis, detectEnglish
LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
SILENT_MODE = False # if set to True, program doesn't print attempts
NUM_MOST_FREQ_LETTERS = 4 # attempts this many letters per subkey # default 4
MAX_KEY_LENGTH = 16 # will not attempt keys longer than this # Default 16
NONLETTERS_PATTERN = re.compile('[^A-Z]')
def breakVigenere(cipher):
# Instead of typing this ciphertext out, you can copy & paste it
# from http://invpy.com/vigenereHacker.py
print('\nCipher:')
yp_file = open(cipher)
i = 0
for yp_line in yp_file:
inp = yp_line.rstrip()
print(inp)
hackedMessage = hackVigenere(inp)
i += 1
yp_file.close()
if hackedMessage != None:
print(hackedMessage)
else:
print('Failed :(')
def findRepeatSequencesSpacings(message):
# Goes through the message and finds any 3 to 5 letter sequences
# that are repeated. Returns a dict with the keys of the sequence and
# values of a list of spacings (num of letters between the repeats).
# Use a regular expression to remove non-letters from the message.
message = NONLETTERS_PATTERN.sub('', message.upper())
# Compile a list of seqLen-letter sequences found in the message.
seqSpacings = {} # keys are sequences, values are list of int spacings
for seqLen in range(3, 6):
for seqStart in range(len(message) - seqLen):
# Determine what the sequence is, and store it in seq
seq = message[seqStart:seqStart + seqLen]
# Look for this sequence in the rest of the message
for i in range(seqStart + seqLen, len(message) - seqLen):
if message[i:i + seqLen] == seq:
# Found a repeated sequence.
if seq not in seqSpacings:
seqSpacings[seq] = [] # initialize blank list
# Append the spacing distance between the repeated
# sequence and the original sequence.
seqSpacings[seq].append(i - seqStart)
return seqSpacings
def getUsefulFactors(num):
# Returns a list of useful factors of num. By "useful" we mean factors
# less than MAX_KEY_LENGTH + 1. For example, getUsefulFactors(144)
# returns [2, 72, 3, 48, 4, 36, 6, 24, 8, 18, 9, 16, 12]
if num < 2:
return [] # numbers less than 2 have no useful factors
factors = [] # the list of factors found
# When finding factors, you only need to check the integers up to
# MAX_KEY_LENGTH.
for i in range(2, MAX_KEY_LENGTH + 1): # don't test 1
if num % i == 0:
factors.append(i)
factors.append(int(num / i))
if 1 in factors:
factors.remove(1)
return list(set(factors))
def getItemAtIndexOne(x):
return x[1]
def getMostCommonFactors(seqFactors):
# First, get a count of how many times a factor occurs in seqFactors.
factorCounts = {} # key is a factor, value is how often if occurs
# seqFactors keys are sequences, values are lists of factors of the
# spacings. seqFactors has a value like: {'GFD': [2, 3, 4, 6, 9, 12,
# 18, 23, 36, 46, 69, 92, 138, 207], 'ALW': [2, 3, 4, 6, ...], ...}
for seq in seqFactors:
factorList = seqFactors[seq]
for factor in factorList:
if factor not in factorCounts:
factorCounts[factor] = 0
factorCounts[factor] += 1
# Second, put the factor and its count into a tuple, and make a list
# of these tuples so we can sort them.
factorsByCount = []
for factor in factorCounts:
# exclude factors larger than MAX_KEY_LENGTH
if factor <= MAX_KEY_LENGTH:
# factorsByCount is a list of tuples: (factor, factorCount)
# factorsByCount has a value like: [(3, 497), (2, 487), ...]
factorsByCount.append((factor, factorCounts[factor]))
# Sort the list by the factor count.
factorsByCount.sort(key=getItemAtIndexOne, reverse=True)
return factorsByCount
def kasiskiExamination(ciphertext):
# Find out the sequences of 3 to 5 letters that occur multiple times
# in the ciphertext. repeatedSeqSpacings has a value like:
# {'EXG': [192], 'NAF': [339, 972, 633], ... }
repeatedSeqSpacings = findRepeatSequencesSpacings(ciphertext)
# See getMostCommonFactors() for a description of seqFactors.
seqFactors = {}
for seq in repeatedSeqSpacings:
seqFactors[seq] = []
for spacing in repeatedSeqSpacings[seq]:
seqFactors[seq].extend(getUsefulFactors(spacing))
# See getMostCommonFactors() for a description of factorsByCount.
factorsByCount = getMostCommonFactors(seqFactors)
# Now we extract the factor counts from factorsByCount and
# put them in allLikelyKeyLengths so that they are easier to
# use later.
allLikelyKeyLengths = []
for twoIntTuple in factorsByCount:
allLikelyKeyLengths.append(twoIntTuple[0])
return allLikelyKeyLengths
def getNthSubkeysLetters(n, keyLength, message):
# Returns every Nth letter for each keyLength set of letters in text.
# E.g. getNthSubkeysLetters(1, 3, 'ABCABCABC') returns 'AAA'
# getNthSubkeysLetters(2, 3, 'ABCABCABC') returns 'BBB'
# getNthSubkeysLetters(3, 3, 'ABCABCABC') returns 'CCC'
# getNthSubkeysLetters(1, 5, 'ABCDEFGHI') returns 'AF'
# Use a regular expression to remove non-letters from the message.
message = NONLETTERS_PATTERN.sub('', message)
i = n - 1
letters = []
while i < len(message):
letters.append(message[i])
i += keyLength
return ''.join(letters)
def attemptHackWithKeyLength(ciphertext, mostLikelyKeyLength):
# Determine the most likely letters for each letter in the key.
ciphertextUp = ciphertext.upper()
# allFreqScores is a list of mostLikelyKeyLength number of lists.
# These inner lists are the freqScores lists.
allFreqScores = []
for nth in range(1, mostLikelyKeyLength + 1):
nthLetters = getNthSubkeysLetters(nth, mostLikelyKeyLength, ciphertextUp)
# freqScores is a list of tuples like:
# [(<letter>, <Eng. Freq. match score>), ... ]
# List is sorted by match score. Higher score means better match.
# See the englishFreqMatchScore() comments in freqAnalysis.py.
freqScores = []
for possibleKey in LETTERS:
decryptedText = vigenereCipher.decryptMessage(possibleKey, nthLetters)
keyAndFreqMatchTuple = (possibleKey, freqAnalysis.englishFreqMatchScore(decryptedText))
freqScores.append(keyAndFreqMatchTuple)
# Sort by match score
freqScores.sort(key=getItemAtIndexOne, reverse=True)
allFreqScores.append(freqScores[:NUM_MOST_FREQ_LETTERS])
if not SILENT_MODE:
for i in range(len(allFreqScores)):
# use i + 1 so the first letter is not called the "0th" letter
print('Possible letters for letter %s of the key: ' % (i + 1), end='')
for freqScore in allFreqScores[i]:
print('%s ' % freqScore[0], end='')
print() # print a newline
# Try every combination of the most likely letters for each position
# in the key.
for indexes in itertools.product(range(NUM_MOST_FREQ_LETTERS), repeat=mostLikelyKeyLength):
# Create a possible key from the letters in allFreqScores
possibleKey = ''
for i in range(mostLikelyKeyLength):
possibleKey += allFreqScores[i][indexes[i]][0]
if not SILENT_MODE:
print('Attempting with key: %s' % (possibleKey))
decryptedText = vigenereCipher.decryptMessage(possibleKey, ciphertextUp)
if detectEnglish.isEnglish(decryptedText):
# Set the hacked ciphertext to the original casing.
origCase = []
for i in range(len(ciphertext)):
if ciphertext[i].isupper():
origCase.append(decryptedText[i].upper())
else:
origCase.append(decryptedText[i].lower())
decryptedText = ''.join(origCase)
# Check with user to see if the key has been found.
print('Possible encryption hack with key %s:' % (possibleKey))
print(decryptedText[:200]) # only show first 200 characters
print()
print('Enter D for done, or just press Enter to continue hacking:')
response = input('> ')
if response.strip().upper().startswith('D'):
return decryptedText
# No English-looking decryption found, so return None.
return None
def hackVigenere(ciphertext):
# First, we need to do Kasiski Examination to figure out what the
# length of the ciphertext's encryption key is.
allLikelyKeyLengths = kasiskiExamination(ciphertext)
if not SILENT_MODE:
keyLengthStr = ''
for keyLength in allLikelyKeyLengths:
keyLengthStr += '%s ' % (keyLength)
print('\nKasiski Examination: ' + keyLengthStr + '\n')
for keyLength in allLikelyKeyLengths:
if not SILENT_MODE:
print('\nAttempting hack with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
hackedMessage = attemptHackWithKeyLength(ciphertext, keyLength)
if hackedMessage != None:
break
# If none of the key lengths we found using Kasiski Examination
# worked, start brute-forcing through key lengths.
if hackedMessage == None:
if not SILENT_MODE:
print('Unable to hack message with likely key length(s). Brute forcing key length...')
for keyLength in range(1, MAX_KEY_LENGTH + 1):
# don't re-check key lengths already tried from Kasiski
if keyLength not in allLikelyKeyLengths:
if not SILENT_MODE:
print('Attempting hack with key length %s (%s possible keys)...' % (keyLength, NUM_MOST_FREQ_LETTERS ** keyLength))
hackedMessage = attemptHackWithKeyLength(ciphertext, keyLength)
if hackedMessage != None:
break
return hackedMessage
# If vigenereHacker.py is run (instead of imported as a module) call
# the main() function.
if __name__ == '__main__':
# Instead of typing this ciphertext out, you can copy & paste it
# from http://invpy.com/vigenereHacker.py
cipher = sys.argv[1]
print('\nCipher:')
yp_file = open(cipher)
i = 0
for yp_line in yp_file:
inp = yp_line.rstrip()
print(inp)
hackedMessage = hackVigenere(inp)
i += 1
yp_file.close()
if hackedMessage != None:
print(hackedMessage)
else:
print('Failed :(')