In [10]:
import pandas as pd

In [15]:
# Implementation of SHA-2 with 256 bit hashes
def sha256(input_text):
    # Starting values used in algorithm
    h0 = 0x6a09e667
    h1 = 0xbb67ae85
    h2 = 0x3c6ef372
    h3 = 0xa54ff53a
    h4 = 0x510e527f
    h5 = 0x9b05688c
    h6 = 0x1f83d9ab
    h7 = 0x5be0cd19

    k = [
        0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
        0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
        0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
        0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
        0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
        0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
        0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
        0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
    ]
    
    # Prepare data
    m = bytearray(bytes(input_text, encoding='utf-8'))
    l = len(m)
    m.append(128)
    while ((len(m) * 8 + 64) % 512) != 0:
        m.append(0)
    m.extend(int_to_uint64(l * 8))
    
    # Split into chunks of size 512 bit each
    chunks = []
    for i in range(0, len(m), 64):
        chunks.append(m[i:i+64])
        
    # Hash each chunk
    for chunk in chunks:
        w = []
        for i in range(0, 64, 4):
            w.append(int.from_bytes(bytes(chunk[i:i+4]), byteorder='big'))
            
        for i in range(16, 64):
            s0 = rotright(w[i-15],  7) ^ rotright(w[i-15], 18) ^ (w[i-15] >>  3)
            s1 = rotright(w[i- 2], 17) ^ rotright(w[i- 2], 19) ^ (w[i- 2] >> 10)
            w.append((w[i-16] + s0 + w[i-7] + s1) % (2 ** 32))
        
        a = h0
        b = h1
        c = h2
        d = h3
        e = h4
        f = h5
        g = h6
        h = h7
        
        for i in range(0, 64):
            s1 = rotright(e, 6) ^ rotright(e, 11) ^ rotright(e, 25)
            ch = (e & f) ^ ((~e) & g)
            temp1 = (h + s1 + ch + k[i] + w[i])
                
            s0 = rotright(a, 2) ^ rotright(a, 13) ^ rotright(a, 22)
            maj = (a & b) ^ (a & c) ^ (b & c)
            temp2 = (s0 + maj)
            
            h = g
            g = f
            f = e
            e = (d + temp1) % (2 ** 32)
            d = c
            c = b
            b = a
            a = (temp1 + temp2) % (2 ** 32)
            
        h0 = (h0 + a) % (2 ** 32)
        h1 = (h1 + b) % (2 ** 32)
        h2 = (h2 + c) % (2 ** 32)
        h3 = (h3 + d) % (2 ** 32)
        
        h4 = (h4 + e) % (2 ** 32)
        h5 = (h5 + f) % (2 ** 32)
        h6 = (h6 + g) % (2 ** 32)
        h7 = (h7 + h) % (2 ** 32)
            
    
    digest = hex(h0)[2:] + hex(h1)[2:] + hex(h2)[2:] + hex(h3)[2:] + hex(h4)[2:] + hex(h5)[2:] + hex(h6)[2:] + hex(h7)[2:]
    return digest
    
sha256("Anika Martz")

'85bdf6bccd80dc17a3f36154d779c959a14a1463375905657968ce15471116b8'

In [3]:
from binascii import unhexlify

# Converts integer value to 64-bit bytearray (big-endian)
def int_to_uint64(value):
    length = value.bit_length()
    # Get length so that it is a multiple of 8 (byte)
    if length % 8:
        length += 8 - (length % 8)
    # Creates format string for value '%0<length>x' -> prepend with zeroes until length is reached
    # and print as hex
    format_for_value = "%%0%dx" % (length // 4)
    res = unhexlify(format_for_value % value)

    value_ba = bytearray(res)

    # If value is too long return only last eight bytes
    if len(value_ba) > 8:
        return value_ba[-8:]
    # If value is too short prepend null bytes
    elif len(value_ba) < 8:
        while len(value_ba) < 8:
            value_ba.insert(0, 0)
    return value_ba

In [4]:
# Rotates an integer right by num places
def rotright(val, num):
    return (val >> num) | (val << (32 - num))

In [11]:
# Import data
data = pd.read_csv('orders.csv')
data

Unnamed: 0,name,product_id,amount,price
0,Anika Martz,23,1,36.0
1,Anika Martz,45,3,29.5
2,Anika Martz,36,2,50.3
3,Justine Blau,2,6,10.8
4,Justine Blau,45,6,59.0
5,Holger Gant,36,1,25.15
6,Anika Martz,15,7,30.0
7,Anika Martz,4,9,27.99
8,Holger Gant,36,2,50.3
9,Holger Gant,15,14,60.0


In [16]:
import secrets
salt = secrets.token_urlsafe(16)
salt

'hPcH6yQ1LxKhQ10OvnFCgQ'

In [17]:
# Hash column name
data['name'] = data['name'].apply(lambda x: sha256(x + salt))
data

Unnamed: 0,name,product_id,amount,price
0,4afadc85c6e230e21510ccd9d433f93c7ec24f36c7d47a...,23,1,36.0
1,4afadc85c6e230e21510ccd9d433f93c7ec24f36c7d47a...,45,3,29.5
2,4afadc85c6e230e21510ccd9d433f93c7ec24f36c7d47a...,36,2,50.3
3,810a566a1610bd02cf5fd7f62431eb13ad319f416aa70a...,2,6,10.8
4,810a566a1610bd02cf5fd7f62431eb13ad319f416aa70a...,45,6,59.0
5,5e95e3dae0a30da6e6ea657a7c946d43256b40a5d84c30...,36,1,25.15
6,4afadc85c6e230e21510ccd9d433f93c7ec24f36c7d47a...,15,7,30.0
7,4afadc85c6e230e21510ccd9d433f93c7ec24f36c7d47a...,4,9,27.99
8,5e95e3dae0a30da6e6ea657a7c946d43256b40a5d84c30...,36,2,50.3
9,5e95e3dae0a30da6e6ea657a7c946d43256b40a5d84c30...,15,14,60.0


In [13]:
# Export back to csv
data.to_csv("anonymized_order.csv")