In [2]:
import base64
import hashlib
import re
import secrets

from faker import Faker


In [3]:
fake = Faker(locale="ko_KR")

In [None]:
address_pattern = r"([가-힣]+(시|도))\s*([가-힣]+(시|군|구))"
f_address = fake.address()

match = re.search(address_pattern, f_address)

if match:
    print(match.group(0))

In [37]:
def transfer_address(address: str):
    return " ".join(address.strip().split()[:2])


def transfer_hash(text: str, salt: str):
    text = hashlib.sha256((text + salt).encode("utf-8")).hexdigest()
    text = hashlib.sha256((text + salt).encode("utf-8")).digest()
    text = base64.b64encode(text).decode("utf-8")
    
    return text

In [None]:
salt = secrets.token_bytes(16)
salt = base64.b64encode(salt).decode("utf-8")
salt

In [None]:
profile = fake.profile()

profile["ssn"] = transfer_hash(profile["ssn"], salt)
profile["residence"] = transfer_address(profile["residence"])
profile["username"] = transfer_hash(profile["username"], salt)
profile["name"] = transfer_hash(profile["name"], salt)
profile["address"] = transfer_hash(profile["address"], salt)
profile["mail"] = transfer_hash(profile["mail"], salt)

del (
    profile["current_location"],
    profile["blood_group"],
    profile["website"],
    profile["birthdate"],
)

profile

In [None]:
card_profile = fake.credit_card_full().strip().split("\n")

card_name = card_profile[0]
last_name, first_name = card_profile[1].strip().split()
card_number, card_expire = card_profile[2].strip().split()
security_type, security_number = card_profile[3].strip().split(": ")

card_profile = {
    "card_name": card_name,
    "last_name": last_name,
    "first_name": first_name,
    "card_number": card_number,
    "card_expire": card_expire,
    "security_type": security_type,
    "security_number": security_number,
}
card_profile

In [None]:
card_profile["name"] = transfer_hash(
    card_profile["first_name"] + card_profile["last_name"], salt
)
card_profile["card_number"] = transfer_hash(card_profile["card_number"], salt)
card_profile["security_number"] = transfer_hash(card_profile["security_number"], salt)

del card_profile["first_name"], card_profile["last_name"]


card_profile

In [23]:
import polars as pl

profiles = [fake.profile() for i in range(100000)]
df_profiles = pl.DataFrame(profiles)

In [None]:
exprs = [
    pl.col("ssn").map_elements(lambda x: transfer_hash(x, salt), return_dtype=pl.String),
    pl.col("username").map_elements(lambda x: transfer_hash(x, salt), return_dtype=pl.String),
    pl.col("name").map_elements(lambda x: transfer_hash(x, salt), return_dtype=pl.String),
    pl.col("residence").map_elements(transfer_address, return_dtype=pl.String),
    pl.col("address").map_elements(transfer_address, return_dtype=pl.String),
]

df_profiles.with_columns(*exprs)