forked from circulosmeos/haveibeenpwned-binary
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pwned-compact-to-binary.py
49 lines (40 loc) · 1.42 KB
/
pwned-compact-to-binary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python
# Compatible with Python 2 & Python 3
#
# compact haveibeenpwned.com password hashes db
# from ASCII to binary form.
#
# v1.0, v1.1 by circulosmeos, Jan 2018
# v1.2 by circulosmeos, Apr 2018
# https://github.com/circulosmeos/haveibeenpwned-binary
# licensed under GPLv3 or higher
#
import sys, re, os
import subprocess
input_files = [ #'pwned-passwords-1.0.txt.7z',
#'pwned-passwords-update-1.txt.7z',
#'pwned-passwords-update-2.txt.7z',
'pwned-passwords-ordered-2.0.txt.7z'
]
hash_lenght = 20; # in bytes
PATH_TO_7z = '/usr/bin/7z' # for linux
#PATH_TO_7z = r'c:\Program Files\7-Zip\7z.exe' # for Windows
def compact_data(file):
i=0
output_file = re.sub( r'\.txt\.7z$', '.bin', file )
print ("\n%s"%output_file)
if not os.path.exists(output_file):
output_file = open( output_file, "w+b" )
else:
print( "Error: output file already exist: %s\n"%output_file )
return
proc = subprocess.Popen( [PATH_TO_7z, 'e', '-so', file ], stdout=subprocess.PIPE )
for line in proc.stdout:
if ( len(line) < hash_lenght ): break
i=i+1
if (i%1000000==0): sys.stdout.write('.')
sys.stdout.flush()
output_line = bytearray.fromhex( "%s"%line[0:hash_lenght*2].decode('utf-8') )
output_file.write( bytearray(output_line) )
for file in input_files:
compact_data( file )