In [1]:
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives import padding

import os
import base64
import pandas as pd

In [2]:
# Helper function to apply padding
def pad(data):
    padder = padding.PKCS7(128).padder()
    padded_data = padder.update(data) + padder.finalize()
    return padded_data

# Helper function to remove padding
def unpad(data):
    unpadder = padding.PKCS7(128).unpadder()
    unpadded_data = unpadder.update(data) + unpadder.finalize()
    return unpadded_data

In [3]:
# Step 1: Generate a 256-bit (32-byte) AES key (run only once to create and save the key)
def generate_key():
    key = os.urandom(32)  # AES-256 requires a 256-bit key (32 bytes)
    with open("./keys/aes-256-secret.key", "wb") as key_file:
        key_file.write(key)

# Uncomment to generate a new key
generate_key()

In [5]:
# Step 2: Load the AES key
def load_key():
    with open("./keys/aes-256-secret.key", "rb") as key_file:
        return key_file.read()

load_key()

b'\x9a\xbe\x19\xbc\x0b{n\xec\x00\xfbB`\x02\x02\xcb\xb1a\x18ad\xc6\xee\xc74?\xbc\xd5f\xba/sY'

In [10]:
# Step 3: Encrypt a specific column in the CSV
def encrypt_column(input_csv, output_csv, column_name):
    key = load_key()
    
    # Load the CSV file
    df = pd.read_csv(input_csv)
    print("Before", "\n", df.head())
    
    # Encrypt the target column
    encrypted_column = []
    for value in df[column_name]:
        iv = os.urandom(16)  # AES block size is 16 bytes, so IV is 16 bytes
        cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
        encryptor = cipher.encryptor()
        
        padded_data = pad(str(value).encode())
        encrypted_value = encryptor.update(padded_data) + encryptor.finalize()
        
        # Encode IV + encrypted value as a base64 string to store in CSV
        encrypted_column.append(base64.b64encode(iv + encrypted_value).decode())
    
    # Replace the column in the DataFrame with the encrypted data
    df[column_name] = encrypted_column
    
    # Save the encrypted CSV
    df.to_csv(output_csv, index=False)
    print("-" * 100)
    print("After", "\n", df.head())
    print(f"Encrypted column '{column_name}' and saved to {output_csv}")

In [11]:
# Step 4: Decrypt the column from an encrypted CSV
def decrypt_column(input_csv, output_csv, column_name):
    key = load_key()
    
    # Load the encrypted CSV
    df = pd.read_csv(input_csv)
    print("Before", "\n", df.head())
    
    # Decrypt the target column
    decrypted_column = []
    for encrypted_value in df[column_name]:
        # Decode the base64 string to get IV + encrypted value
        encrypted_data = base64.b64decode(encrypted_value)
        iv = encrypted_data[:16]  # Extract IV (first 16 bytes)
        encrypted_value = encrypted_data[16:]  # Extract encrypted data
        
        cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
        decryptor = cipher.decryptor()
        
        padded_data = decryptor.update(encrypted_value) + decryptor.finalize()
        decrypted_value = unpad(padded_data).decode()  # Remove padding and decode to string
        
        decrypted_column.append(decrypted_value)
    
    # Replace the column in the DataFrame with the decrypted data
    df[column_name] = decrypted_column
    
    # Save the decrypted CSV
    df.to_csv(output_csv, index=False)
    print("-" * 100)
    print("After", "\n", df.head())
    print(f"Decrypted column '{column_name}' and saved to {output_csv}")

# Usage

#### Encrypt a column

In [12]:
encrypt_column("./data/mock-data.csv", "./output/aes-256/encrypted.csv", "sensitive_column")

Before 
    id   name sensitive_column
0   1  Alice          Secret1
1   2    Bob          Secret2
----------------------------------------------------------------------------------------------------
After 
    id   name                              sensitive_column
0   1  Alice  N31AWykg3FOJ9bKQ+VXCJ+oTduAa7pQuPD5DLxwHTQw=
1   2    Bob  KG242YkJ/9y9fAX3qb0Gr5Wum4tdZEshLh9JimvNeLw=
Encrypted column 'sensitive_column' and saved to ./output/aes-256/encrypted.csv


#### Decrypt the column

In [13]:
decrypt_column("./output/aes-256/encrypted.csv", "./output/aes-256/decrypted.csv", "sensitive_column")

Before 
    id   name                              sensitive_column
0   1  Alice  N31AWykg3FOJ9bKQ+VXCJ+oTduAa7pQuPD5DLxwHTQw=
1   2    Bob  KG242YkJ/9y9fAX3qb0Gr5Wum4tdZEshLh9JimvNeLw=
----------------------------------------------------------------------------------------------------
After 
    id   name sensitive_column
0   1  Alice          Secret1
1   2    Bob          Secret2
Decrypted column 'sensitive_column' and saved to ./output/aes-256/decrypted.csv
