In [1]:
# hashing algorithms
# convert input data into a fixed size string of bytes

import hashlib

In [None]:
# MD5 - not secure for cryptography - Message Digest algo
#       output length = 128 bits / 32 hex characters
#       useful for checksums and file integrity

text = "batman"

md5_hash = hashlib.md5(text.encode())
md5_hash.hexdigest()

'ec0e2603172c73a8b644bb9456c1ff6e'

In [None]:
# SHA-1 - output = 160 bits / 40 hex chars - Secure Hashing Algo
#         not secure against collisions

sha1_hash = hashlib.sha1(text.encode())
sha1_hash.hexdigest()

'5c6d9edc3a951cda763f650235cfc41a3fc23fe8'

In [7]:
# SHA-256 - output = 256 bits / 64 hex chars
#           widely used in SSL cers, blockchain and secure sys

sha256_hash = hashlib.sha256(text.encode())
sha256_hash.hexdigest()

'1532e76dbe9d43d0dea98c331ca5ae8a65c5e8e8b99d3e2a42ae989356f6242a'

In [10]:
# SHA-512 - 512 bits / 128 chars
#           more secure but longer output

sha512_hash = hashlib.sha512(text.encode())
sha512_hash.hexdigest()

'5e325d89a5fceb1ba257f50d7e7c1a807ae8b19756e252c326c44e84e357749d3e780b7db1fb32ec029e7850d3b0bba032a33611d2a54a1db8097c81f2b23814'

In [14]:
# output size comparision

algos = ["md5", "sha1", "sha256", "sha512"]

for algo in algos:
    h = hashlib.new(algo)
    h.update(text.encode())
    print(f"{algo.upper()} len: {len(h.hexdigest())} characters")


MD5 len: 32 characters
SHA1 len: 40 characters
SHA256 len: 64 characters
SHA512 len: 128 characters


In [16]:
# avalanche effect - small change in string reflecting as a large hash diff

txt1 = "i am batman"
txt2 = "I am batman"


print("SHA256 txt1: ", hashlib.sha256(txt1.encode()).hexdigest())
print("SHA256 txt2: ", hashlib.sha256(txt2.encode()).hexdigest())

SHA256 txt1:  1090efd8e22b42655a133b812d3ae9c814d445b8661c55a316e2768d00fa1968
SHA256 txt2:  92d985f4b01db6420c2c2e0f992aa20006be8a93ef60b0cb992200735f448dda


In [18]:
# hashing complete files

def hash_file(filename):
    h = hashlib.sha256()

    with open(filename, "rb") as f:
        while chunk := f.read(4096):
            h.update(chunk)
    return h.hexdigest()

print(hash_file("text.txt"))

85a7b30d78c1d6205f16e0c41a3a881e4a0f6c71aa08d15a1e219314b7cf7863
