
# Key Derivation Function (KDF)

## HKDF
HKDF (HMAC Key Derivation Function) is used to generate an encryption key based on a password. We can use a range of hashing methods to dervie the encryption key. In this case we will use a range of hashing methods, and derive a key of a given size.

<img src='graphics/g_kdf_01.png' width="800px">

In [10]:
# 06_01.py
# https://asecuritysite.com/hazmat/hashnew3
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.backends import default_backend


import binascii
import sys

st = "00"
hex=False
showhex="No"
k="00"
length=16
slt=""

def show_hash(name,type,data,length,salt):

  hkdf = HKDF(algorithm=type, length=length,salt=salt, info=b"",backend=default_backend())
  mykey=hkdf.derive(data)
  hex=binascii.b2a_hex(mykey).decode()
  b64=binascii.b2a_base64(mykey).decode()
  print (f"HKDF {name}: {hex} {b64}")
  



try:
	if (hex==True): data = binascii.a2b_hex(st)
	else: data=st.encode()
	if (hex==True): salt = binascii.a2b_hex(slt)
	else: salt=slt.encode()


	print ("Key: ",st)
	print (" Hex: ",binascii.b2a_hex(data).decode())

	print ("Salt: ",slt)
	print (" Hex: ",binascii.b2a_hex(salt).decode())

	print()


	show_hash("SHA256",hashes.SHA256(),data,length,salt)



except Exception as e:
    print(e)

Key:  00
 Hex:  3030
Salt:  
 Hex:  

HKDF SHA256: c9f9ff58d94c9d8901f5ed32e36f30af yfn/WNlMnYkB9e0y428wrw==



> Verify the operaton of the program.

> Change the SHA-256 method to MD5. Verify the operation.

> Change the SHA-256 method to SHA-1. Verify the operation.

> Change the SHA-256 method to Blake2p, and use 64 bytes of hash.  Verify the operation.

## PBKDF2
PBKDF2 (Password-Based Key Derivation Function 2) is defined in RFC 2898 and generates a salted hash. Often this is used to create an encryption key from a defined password, and where it is not possible to reverse the password from the hashed value. It is used in TrueCrypt to generate the key required to read the header information of the encrypted drive, and which stores the encryption keys. Also, it is used in WPA-2 in order to create a hashed version of the password. With this, WPA-2 uses 4,096 interations. We can also specify the length of the generated hashed. 


<img src='graphics/g_kdf_03.png' width="800px">

In [4]:

# 06_02.py
# https://asecuritysite.com/pbkdf2/hazkdf
import os
import sys
import base64
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from cryptography.hazmat.backends import default_backend

round=1000
hashtype=0
length=32
message="Hello"


salt = os.urandom(16)
h=None
if (hashtype==0): h=hashes.SHA1()
elif (hashtype==1): h=hashes.SHA512_224()
elif (hashtype==2): h=hashes.SHA512_256()
elif (hashtype==3): h=hashes.SHA224()
elif (hashtype==4): h=hashes.SHA256()
elif (hashtype==5): h=hashes.SHA384()
elif (hashtype==6): h=hashes.SHA512()
elif (hashtype==7): h=hashes.SHA3_224()
elif (hashtype==8): h=hashes.SHA3_256()
elif (hashtype==9): h=hashes.SHA3_384()
elif (hashtype==10): h=hashes.SHA3_512()
elif (hashtype==11): h=hashes.MD5()
elif (hashtype==12): h=hashes.SM3()
elif (hashtype==13): h=hashes.BLAKE2b(64)
elif (hashtype==14): h=hashes.BLAKE2s(32)


kdf = PBKDF2HMAC(algorithm=h,length=length,salt=salt, iterations=round,backend=default_backend())

key = kdf.derive(message.encode())


# verify

kdf = PBKDF2HMAC(algorithm=h, length=length,salt=salt, iterations=round,backend=default_backend())

rtn=kdf.verify(message.encode(), key)

print(f"Message:\t{message}")
print(f"Salt:\t\t{salt.hex()}")
print(f"Salt:\t\t{base64.b64encode(salt).decode()}")
print(f"Rounds:\t\t{round}")
print(f"Hash:\t\t{kdf._algorithm.name}")
print(f"\nKey:\t\t{key.hex()}")
print(f"Key:\t\t{base64.b64encode(key).decode()}")
if (rtn==None): print("KDF Verified")

Message:	Hello
Salt:		e9ed6ffc41f8350c15ac0b67611ae723
Salt:		6e1v/EH4NQwVrAtnYRrnIw==
Rounds:		1000
Hash:		sha1

Key:		df9f33fc4e8d7ec3655d2369539caa0228e7946bc629076d3381e3616dac1f73
Key:		358z/E6NfsNlXSNpU5yqAijnlGvGKQdtM4HjYW2sH3M=
KDF Verified


We can also use a pure PBKDF2 method with:

In [11]:
# 06_03.py
import binascii
from Crypto.Protocol.KDF import PBKDF2, HKDF
from Crypto.Hash import SHA256
from Crypto.Random import get_random_bytes
import sys

password="qwerty"
salt = get_random_bytes(16)
s=""
type=1
bytes=16


salt=binascii.unhexlify(s)


KEK = PBKDF2(password, salt, bytes, count=1000, hmac_hash_module=SHA256)
print ("Using PBKDF2")


print (f"Password: {password}, Salt: {s}")
print ("\nHash: ",binascii.hexlify(KEK))

Using PBKDF2
Password: qwerty, Salt: 

Hash:  b'b1444b43fe945ff29561e772401db5a0'


## scrypt
scrypt is a password-based key derivation function which produces a hash with a salt and iterations. The iteration count slows down the cracking and the salt makes pre-computation difficult. The main parameters are: passphrase (P); salt (S); Blocksize (r) and CPU/Memory cost parameter (N - a power of 2). 

<img src='graphics/g_kdf_04.png' width="800px">

In [None]:
# 06_04.py
import os
import sys
import base64
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.scrypt import Scrypt


length=32
message="Hello"
N=14
r=8
p=1

if (len(sys.argv)>1):
	message=str(sys.argv[1])
if (len(sys.argv)>2):
	N=int(sys.argv[2])
if (len(sys.argv)>3):
	r=int(sys.argv[3])
if (len(sys.argv)>4):
	length=int(sys.argv[4])
if (len(sys.argv)>5):
	p=int(sys.argv[5])


salt = os.urandom(16)


kdf = Scrypt(salt=salt,length=length,n=2**N,r=r,p=p)

key = kdf.derive(message.encode())


# verify

kdf = Scrypt(salt=salt,length=length,n=2**N,r=r,p=p)

rtn=kdf.verify(message.encode(), key)

print("== scrypt === ")
print(f"Message:\t{message}")
print(f"Salt:\t\t{salt.hex()}")
print(f"Salt:\t\t{base64.b64encode(salt).decode()}")
print(f"scrypt param:\tn=2**{N}, r={r}, p={p}")
print(f"\nKey:\t\t{key.hex()}")
print(f"Key:\t\t{base64.b64encode(key).decode()}")
if (rtn==None): print("KDF Verified")

> From the Cryptography libary, which other KDFs are available?


# bcrypt
MD5 and SHA-1 produce a hash signature, but this can be attacked by rainbow tables. Bcrypt is a more powerful hash generator for passwords and uses salt to create a non-recurrent hash. It was designed by Niels Provos and David Mazières, and is based on the Blowfish cipher. It is used as the default password hashing method for BSD and other systems. 

Overall it uses a 128-bit salt value, which requires 22 Radix-64 characters. It can use a number of iterations, which will slow down any brute-force cracking of the hashed value.

For example, “Hello” with a salt value of “$2a$06$NkYh0RCM8pNWPaYvRLgN9.” gives:

$2a$06$NkYh0RCM8pNWPaYvRLgN9.LbJw4gcnWCOQYIom0P08UEZRQQjbfpy

As illustrated below, the first part is "$2a$" (or "$2b$"), and then followed by the number of iterations used (in this case is it 6 iterations (where each additional iternation doubles the hash time). The 128-bit (22 character) salt values comes after this, and then finally there is a 184-bit hash code (which is 31 characters).

<img src='graphics/bc.png' width="800px">

The slowness of Bcrypt is highlighted with a recent AWS EC2 server benchmark using hashcat [here]:

Hash type: MD5 Speed/sec: 380.02M words
Hash type: SHA1 Speed/sec: 218.86M words
Hash type: SHA256 Speed/sec: 110.37M words
Hash type: bcrypt, Blowfish(OpenBSD) Speed/sec: 25.86k words
Hash type: NTLM. Speed/sec: 370.22M words

You can see that Blowfish is almost four times slower than MD5 (380,000,000 words/sec down to only 25,860 words/sec). With John The Ripper:

md5crypt [MD5 32/64 X2] 318237 c/s real, 8881 c/s virtual
bcrypt ("$2a$05", 32 iterations)  25488 c/s real, 708 c/s virtual
LM [DES 128/128 SSE2-16] 88090K c/s real, 2462K c/s virtual


In [3]:
# 06_05.py
# https://asecuritysite.com/bcrypt/kdf
import binascii
from Crypto.Protocol.KDF import PBKDF2, scrypt,HKDF
import bcrypt
from Crypto.Hash import SHA256
from Crypto.Random import get_random_bytes
import sys

password="qwerty"
salt = get_random_bytes(16)
s=""
type=1
bytes=16

if (len(sys.argv)>1):
	password=str(sys.argv[1])
if (len(sys.argv)>2):
	s=str(sys.argv[2])
if (len(sys.argv)>3):
	type=int(sys.argv[3])
if (len(sys.argv)>4):
	bytes=int(sys.argv[4])


salt=binascii.unhexlify(s)


KEK = bcrypt.kdf(password=password.encode(),salt=b'salt',desired_key_bytes=bytes,rounds=100)
print ("Using bcrypt")



print (f"Password: {password}, Salt: {s}")
print ("\nHash: ",binascii.hexlify(KEK))

ModuleNotFoundError: No module named 'bcrypt'

> Change the program so that the number of rounds is 500, 1000, 5000 and 10000. What happens to the creation of the hash?

## Argon 2.0
This case we will use PyNaCl (Networking and Cryptography) library, and which is a Python binding to libsodium. We will hash a password using SHA-256 and SHA-512, and also create a KDF (Key Derivation Function) using scrypt and Argon2. With Argon2, we have a memory robust key derivation function from a password and a salt value. Argon2 was designed Alex Biryukov, Daniel Dinu, and Dmitry Khovratovich is a key derivation function (KDF), where were we can create hashed values of passwords, or create encryption keys based on a password. It was a winner of the Password Hashing Competition in July 2015, and is robust against GPU and side channel attacks [paper]. We will also use the salt value as a key for the key-hash method of SIPHash24. 

With many fast hashing methods, such as MD5, we can now get billions or even trillions or hashes per second, where even 9 or 10 character passwords can be cracked for a reasonable financial cost. This includes salting of the password, as the salt is contained with the hashed password, and can be easily cracked with brute force (or dictionaries). The alternative is to use a hashing method which has a cost in memory, and for CPU processing. We also want to create a method which makes it difficult to apply parallel threads (and thus run it on GPUs). So a step forward is Argon2 which was designed Alex Biryukov, Daniel Dinu, and Dmitry Khovratovich as a key derivation function. It was a winner of the Password Hashing Competition in July 2015. It is resistant to GPU attacks, and also has a memory cost. The costs include: execution time (CPU cost); memory required (memory cost); and degree of parallelism.

The parameters include:

* Password (P): Defines the password bytes to be hashed
* Salt (S): Defines the bytes to be used for salting.
* Parallelism (p): Defines the number of thread that are required for the parallelism.
* TagLength (T): Define the number of bytes to return for the hash.
* MemorySizeKB (m): Amount of memory (in KB) to use.

$argon2id$v=19$m=8,t=3,p=1$yh3bPPtc2tSnm8xpK1RZbw$CBE1uk3HK23zkotoY9280NWemhMj6FnljoDMSZ8PZ58

Note: You will need to install the libary with "pip install PyNaCl".

<img src='graphics/g_hash_06.png' width="800px">

In [1]:
# https://asecuritysite.com/argon2/nacl02
# 06_06.py
import nacl.hash
import nacl.hashlib
import nacl.pwhash
import nacl.encoding
import binascii
import sys

from os import urandom

password='hello'
salt=''


salt=salt.zfill(16)

print("Password: ",password)
print("Salt: ",salt)

salt=salt.encode()
password=password.encode()

mlevel=nacl.pwhash.MEMLIMIT_INTERACTIVE
olevel=nacl.pwhash.OPSLIMIT_INTERACTIVE

print ("\nSHA256: ",nacl.hash.sha256(password))
print ("SHA512: ",nacl.hash.sha512(password))
print ("\nBlake2b: ",nacl.hashlib.blake2b(password,salt=salt).hexdigest())
print ("Scrypt: ",binascii.hexlify(nacl.hashlib.scrypt(password,salt, n=2, r=8, p=1, maxmem=2**25, dklen=64)))


print("\nArgon2id")
print(binascii.hexlify(nacl.pwhash.argon2id.kdf(size=32,password=password,salt=salt,opslimit=3,memlimit=8192)))
print(nacl.pwhash.argon2id.str(password=password,opslimit=3,memlimit=8192))

print("\nArgon2i")

print(binascii.hexlify(nacl.pwhash.argon2i.kdf(size=32,password=password,salt=salt,opslimit=3,memlimit=8192)))
print(nacl.pwhash.argon2i.str(password=password,opslimit=3,memlimit=8192))

salt=salt.zfill(32)
print("\nScrypt")
print(binascii.hexlify(nacl.pwhash.scrypt.kdf(size=32,password=password,salt=salt,opslimit=olevel,memlimit=mlevel)))
print(nacl.pwhash.scrypt.str(password=password,opslimit=3,memlimit=mlevel))

ModuleNotFoundError: No module named 'nacl'

> What is the value of the salt we have used?
> The program for different values of opslimits, and observe the result.