In [0]:
%pip install -r ../requirements.txt

In [0]:

dbutils.widgets.text("metastore_id", defaultValue=sql("SELECT element_at(split(current_metastore(), ':'), -1) AS metastore").first().metastore)
dbutils.widgets.text("catalog", defaultValue="production")
dbutils.widgets.text("new_schema", defaultValue="finance")
dbutils.widgets.text("region", defaultValue="eu-west-1")
dbutils.widgets.text("uc_service_credential", defaultValue="production-aws-kms")

metastore_id = dbutils.widgets.get("metastore_id")
catalog = dbutils.widgets.get("catalog")
region = dbutils.widgets.get("region")
new_schema = dbutils.widgets.get("new_schema")
service_credential = dbutils.widgets.get("uc_service_credential")

In [0]:
import boto3

crypto_functions = dbutils.import_notebook("notebooks.envelope_encryption_v2.common.crypto_functions")

session = boto3.Session(botocore_session=dbutils.credentials.getServiceCredentialsProvider(service_credential), region_name=region)

key_alias = f"alias/unity_catalog/{metastore_id}/{catalog}/cmk"

In [0]:
%sql
USE CATALOG IDENTIFIER(:catalog)

In [0]:
from databricks.sdk import WorkspaceClient
from pyspark.sql.types import StructType, StructField, IntegerType, DateType, TimestampType, StringType, BooleanType, BinaryType
from datetime import datetime
from datetime import date

ws = WorkspaceClient()

dek = crypto_functions.generate_data_key(session=session, key_alias=key_alias, encryption_context={"metastore/catalog": f"{metastore_id}/{catalog}"}).get("CiphertextBlob")

keyvault_schema = StructType([
    StructField("id", IntegerType(), False),
    StructField("created_date", DateType(), False),
    StructField("created_time", TimestampType(), False),
    StructField("last_modified_time", TimestampType(), False),
    StructField("created_by", StringType(), False),
    StructField("managed_by", StringType(), False),
    StructField("key_alias", StringType(), False),
    StructField("key_enabled", BooleanType(), False),
    StructField("key_version", IntegerType(), True),
    StructField("key_type", StringType(), False),
    StructField("key", BinaryType(), False)
])

next_id =sql("SELECT (MAX(id)) +1 AS next_id FROM crypto.keyvault").first().next_id

alias = f"{catalog}.{new_schema}"

key_version = sql(f"SELECT MAX(key_version) AS max_version FROM crypto.keyvault WHERE key_alias = '{alias}'").first().max_version

if not key_version:
  key_version = 1
else:
  key_version += 1

keyvault_data = [{"id": next_id, "created_date": date.today(), "created_time": datetime.now(), "last_modified_time": datetime.now(), "created_by": ws.current_user.me().user_name, "managed_by": ws.current_user.me().user_name, "key_enabled": True, "key_version": key_version, "key_type": "ENCRYPTED_DEK", "key_alias": alias, "key": dek}]

df = spark.createDataFrame(keyvault_data, keyvault_schema)
display(df)

### Todo
* Change get_encrypted_dek() to optionally take a key_version or automatically select the latest
* Revise the keyvault creation / new key code to merge into
* Add RLF to keyvault?
* Can any of the functions be optimized?
* Test serverless SQL with SEG - allow pypi
* Test classic with PL
* Automatically provision / add to groups