In [None]:
from cryptography.fernet import Fernet
from pyspark.sql.functions import col, udf, lit
from pyspark.sql.types import StringType

In [None]:
# Criando dados aleatórios
data = [
  ["1", "john", "company 1", 12345],
  ["2", "peter", "company 2", 23423],
  ["3", "bobby", "company 3", None],
  ["4", "tony", "company 2", 77464],
  ["5", "steve", "company 1", 80804],
  ["6", "anna", None, 32432]
]

columns = ['Employee_ID', 'Employee_Name', 'Company_Name', 'Password']

dataframe = spark.createDataFrame(data, columns)
display(dataframe)

Employee_ID,Employee_Name,Company_Name,Password
1,john,company 1,12345.0
2,peter,company 2,23423.0
3,bobby,company 3,
4,tony,company 2,77464.0
5,steve,company 1,80804.0
6,anna,,32432.0


In [None]:
# Guarde a chave em um lugar seguro
key = Fernet.generate_key()

# Função para encriptar
@udf(returnType=StringType())
def encrypt_data(text, MASTER_KEY):
    
    if not text:
        # Se o texto for null, retorna vazio
        return
    else:
        # Transformando em string
        text = str(text)
    
    # Chave para encriptar
    f = Fernet(MASTER_KEY)
    
    # Tranformando texto em bytes
    text_b = bytes(text, 'utf-8')
    
    # Encriptando
    cipher_text = f.encrypt(text_b)
    
    # Passando para ascii
    cipher_text = str(cipher_text.decode('ascii'))
    
    return cipher_text

# Função para decriptar
@udf(returnType=StringType())
def decrypt_data(cipher_text, MASTER_KEY):

    if not cipher_text:
        # Se o texto for null, retorna vazio
        return
    else:
        # Transformando em string
        cipher_text = str(cipher_text)
    
    # Chave para decriptar
    f = Fernet(MASTER_KEY)
    
    # Decriptando
    clear_val = f.decrypt(cipher_text.encode()).decode()
    
    return clear_val

In [None]:
# Encriptando coluna que ja é string
encrypted = dataframe.withColumn("Company_Name", encrypt_data(col("Company_Name"),lit(key)))
display(encrypted)

Employee_ID,Employee_Name,Company_Name,Password
1,john,gAAAAABg0jo5jpPRlbz6vYP2ZcEgz_phXDbx9Z_-3ug1etVdSpnSL4tJp3m3wm5BZ0u1QOdHfZolnFenrgAs9KsNGoXXC6Xxbg==,12345.0
2,peter,gAAAAABg0jo5YZ_s8feDklpC7jQzek4KzjRjI-ULrYjaHu67I8anFfx8ofDKYN2FVVonIefQE3P25uObXsGi898Fhp2zJcjACg==,23423.0
3,bobby,gAAAAABg0jo5B9jBARP9kn_hU1EQt07DBVcLp_rIj1zv98jXJKeWAdfkiZsY9agrTUpNTzB0uaEsrlanxN-dKraF6cgxA6uZ8A==,
4,tony,gAAAAABg0jo5-NsYwEfzBn3CV6E93NAUwtXrFfrCdZR6oHVkNwu423HSRfm1sVmqvgYhr6ozk-s6iU2sXIZFyNNfRF2mlWxbRg==,77464.0
5,steve,gAAAAABg0jo5LfTKOdpXXpBHYJGYIub7cbW12LuNV3VGRHIPds0rOljOJ4LzVaz3Rqwfp_YM0A32KUIDyXt9euO2UKk8RZASYA==,80804.0
6,anna,,32432.0


In [None]:
# Decriptando coluna que ja é string
decrypted = encrypted.withColumn("Company_Name", decrypt_data(col("Company_Name"),lit(key)))
display(decrypted)

Employee_ID,Employee_Name,Company_Name,Password
1,john,company 1,12345.0
2,peter,company 2,23423.0
3,bobby,company 3,
4,tony,company 2,77464.0
5,steve,company 1,80804.0
6,anna,,32432.0


In [None]:
# Encriptando coluna que é só number
encryptedNumber = dataframe.withColumn("Password", encrypt_data(col("Password"),lit(key)))
display(encryptedNumber)

Employee_ID,Employee_Name,Company_Name,Password
1,john,company 1,gAAAAABg0jo8O64S6mYoXBgK1PCzk6Wc5UJW6FxYFwSDhRCuVJerNeU87cvsqja81DkOJbruerL56j88s5Ul6nsfDYGn1qrl_A==
2,peter,company 2,gAAAAABg0jo8cUTL_Vl5zUE8kizpDP9Z_QVKAYG5F-6A1iXagx-K4Yzy4LzUxJYXNiIc4x0wiqkQjaF7pKw_YHP9-ZPTiLH5Vg==
3,bobby,company 3,
4,tony,company 2,gAAAAABg0jo8j4b99HGPSOxePcQLnazH928nNMbUkErKNRkZFbDZolv32WBIonu6pIFLuUFzb4S1tv4aHheSORszZGV1d6ZHMA==
5,steve,company 1,gAAAAABg0jo8Ufvva-f45Yy96OyZ9y8gIGhiIrf1s2PWqLdgH2KJ5ZEoLcCIVtEx-0LA4nK7P_KcoDmsYOz_HbbHwgNobNSrqg==
6,anna,,gAAAAABg0jo8gd_mGlyPQBHevS7LfcD44nhSJNlWHJkftC_Sh3rM8_7hogs9djgPCQe8usp2ElOCSYl9lF5e7Zee_F6BB_mMWw==


In [None]:
# Decriptando coluna que é number
decryptedNumber = encryptedNumber.withColumn("Password", decrypt_data(col("Password"),lit(key)))
display(decryptedNumber)

Employee_ID,Employee_Name,Company_Name,Password
1,john,company 1,12345.0
2,peter,company 2,23423.0
3,bobby,company 3,
4,tony,company 2,77464.0
5,steve,company 1,80804.0
6,anna,,32432.0
