## Generate passwords

We use SHA1, SHA256, MD5 and bcrypt algorithms to hash generated passwords.

In [3]:
import random as rand
import string

rand.seed()

In [39]:
def read_from_file(name, size=None):
    with open(name) as f:
        data = f.read().split()
        if size:
            data = data[:size]
        return data


TOP_100 = read_from_file('top-100-passwords.txt', 100)
TOP_1M = read_from_file('top-1M-passwords.txt', 10**6)
ENGLISH_COMMON_WORDS = read_from_file('english-common-words.txt')


def generate_random():
    size = rand.randint(5, 10)
    return ''.join(rand.choices(string.ascii_letters + string.digits + '!?', k=size))

def generate_from(pass_list):
    return rand.choice(pass_list)

def generate_random_readable():
    prepend_numbers = rand.random() > 0.5
    append_numbers = rand.random() > 0.5
    replace_symbols = rand.random() > 0.5
    
    num_words = rand.randint(2, 4)
    words = rand.sample(ENGLISH_COMMON_WORDS, num_words)
    result = []
    for word in words:
        if len(result) > 5:
            break
        result += list(word)
    
    if prepend_numbers:
        size = rand.randint(3, 5)
        result = rand.choices(string.digits, k=size) + result
    
    if append_numbers:
        size = rand.randint(2, 4)
        result += rand.choices(string.digits, k=size)
    
    replacements = {
        's': '$S5',
        'i': 'l1!',
        'a': '@A',
        't': '7T',
        'e': '3E',
        'g': '9G6',
        'o': 'O0',
        'b': '8B'
    }
    
    if replace_symbols:
        result = [rand.choice(replacements.get(el, '') + el) for el in result]
    
    return ''.join(result)
    
def generate_password():
    TOP_100_PERCENTAGE = 5
    TOP_1M_PERCENTAGE = 80
    RANDOM_PERCENTAGE = 5
    RANDOM_READABLE_PERCENTAGE = 10
    
    chance = rand.randint(1, 100)
    
    if chance <= TOP_100_PERCENTAGE:
        return generate_from(TOP_100)
    
    chance -= TOP_100_PERCENTAGE
    if chance <= TOP_1M_PERCENTAGE:
        return generate_from(TOP_1M)
    
    chance -= TOP_1M_PERCENTAGE
    if chance <= RANDOM_PERCENTAGE:
        return generate_random()
    
    chance -= RANDOM_PERCENTAGE
    assert chance <= RANDOM_READABLE_PERCENTAGE
    return generate_random_readable()

In [40]:
from Crypto.Protocol.KDF import bcrypt
from Crypto.Hash import MD5, SHA1, SHA256
from Crypto.Random import get_random_bytes

In [41]:
def generate_hash(algo, password):
    return algo.new(password.encode()).hexdigest()

def generate_bcrypt_hash(password, cost):
    salt = get_random_bytes(16)
    return bcrypt(password.encode(), cost, salt).decode(), salt.hex()

def generate_hashes():
    NUM_PASSWORDS = 500_000
    
    for algo in MD5, SHA1, SHA256:
        name = algo.__name__.split('.')[-1]
        with open(f'{name}.csv', 'w') as f:
            for i in range(NUM_PASSWORDS):
                print(generate_hash(algo, generate_password()), file=f)
    
    BCRYPT_COST = 4
    with open(f'bcrypt-{BCRYPT_COST}.csv', 'w') as f:
        for i in range(NUM_PASSWORDS):
            print(*generate_bcrypt_hash(generate_password(), BCRYPT_COST), sep=',', file=f)


In [42]:
generate_hashes()

## Find hash preimages