In [None]:
# Lake Creator Demo notebook.
# List of activities 
## Deregister cms secured bucket location
## Delete and create cms_secured_db database 
## Add Lakeformation policy tags to the Database, tables and respective columns
## Register cms secured bucket location


In [None]:
# Imports
import json 
import boto3
import logging
import pprint
from pandas import DataFrame
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger()

In [None]:
# Import orbit helpers
from aws_orbit_sdk.database import get_athena
from aws_orbit_sdk.common import get_workspace,get_scratch_database

In [None]:
# Clients
lfc = boto3.client('lakeformation')
iamc = boto3.client('iam')
ssmc = boto3.client('ssm')
gluec = boto3.client('glue')

# Step 1 - Orbit configuration details


In [None]:
workspace = get_workspace()

catalog_id = workspace['EksPodRoleArn'].split(':')[-2] 
orbit_lake_creator_role_arn = workspace['EksPodRoleArn']
env_name = workspace['env_name']
team_space = workspace['team_space']
assert team_space == 'lake-creator'


In [None]:
# Define parameters
unsecured_glue_db = "cms_raw_db"
secured_glue_db = "cms_secured_db"

In [None]:
def get_ssm_parameters(ssm_string, ignore_not_found=False):
    try:
        return json.loads(ssmc.get_parameter(Name=ssm_string)['Parameter']['Value'])
    except Exception as e:
        if ignore_not_found:
            return {}
        else:
            raise e

        
def get_demo_configuration():
    return get_ssm_parameters(f"/orbit/{env_name}/demo", True)

demo_config = get_demo_configuration()
lake_bucket = demo_config.get("LakeBucket").split(':::')[1]
secured_lake_bucket = demo_config.get("SecuredLakeBucket").split(':::')[1]
secured_location = f"s3://{secured_lake_bucket}/{secured_glue_db}/"

(lake_bucket,secured_lake_bucket, secured_location)

# Step 2 - Clean orbit secured bucket and cms_secured_db


## Deregister orbit secured bucket with lakeformation location

In [None]:
try:
    deregister_resource_response = lfc.deregister_resource(ResourceArn=f"arn:aws:s3:::{secured_lake_bucket}")
    print(deregister_resource_response['ResponseMetadata']['HTTPStatusCode'])
except Exception as e:
    print("location was not yet registered")
    print(e)


In [None]:
%reload_ext sql
%config SqlMagic.autocommit=False # for engines that do not support autommit
athena = get_athena()
%connect_to_athena -database default

In [None]:
!aws s3 ls $secured_location --recursive 

## Drop and clean previous created database


In [None]:


%sql drop database if exists $secured_glue_db CASCADE


In [None]:
!aws s3 rm --recursive $secured_location --quiet

# Step 3 - Create Database In Glue

## We are all set to start creating our secured database in our secured s3 location by running an Athena SQL query. We will quickly check our database list to ensure it was created succesfully:

In [None]:
create_db = f"create database {secured_glue_db} LOCATION '{secured_location}'"
create_db

In [None]:
athena.current_engine.execute(create_db)


In [None]:
%sql show databases

## Create Tables
It's time to create new tables in our secured database from our unsecured database data. We will run a load_tables() function which iterate over all of the tables:

The load_tables() function performs the following steps:

- Retrieves the definitions of all the tables in our secured db as a list of the requested Table objects
- For each table object creates a new Parquet formatted table in our secured database located in our secured s3 location
- Runs a query on secured table to check if creation successful

In [None]:
import time

def load_tables():
    response = gluec.get_tables(
        DatabaseName='cms_raw_db'
    )
    response
    for table in response['TableList']:
        createTable = """
                CREATE TABLE {}.{}
                WITH (
                    format = 'Parquet',
                    parquet_compression = 'SNAPPY',
                    external_location = '{}/{}'
                )
                AS
                (select * from {}.{})                      
            """.format(secured_glue_db,table['Name'], secured_location,table['Name'],unsecured_glue_db,table['Name'])

        print(f'creating table {table["Name"]}...')
        athena.current_engine.execute(createTable)
        print(f'created table {table["Name"]}')
        query = f"select count(*) as {table['Name']}_count from cms_secured_db.{table['Name']}"
        try:
            res = athena.current_engine.execute(query)
        except: 
            print("Unexpected error:", sys.exc_info()[0])
            print("Try again to run query...")
            %sql drop database if exists $secured_glue_db CASCADE 
            !aws s3 rm --recursive $secured_location --quiet
            !sleep 10s
            # try one more time
            res = athena.current_engine.execute(query)

        df = DataFrame(res.fetchall())
        print(df)


In [None]:
load_tables()

# Step 4 - Adding lakeformation policy tags to the resources - Database, Tables and Columns.

## Adding tag to Database

In [None]:
db_add_lf_tags_to_resource_response = lfc.add_lf_tags_to_resource(
    CatalogId=catalog_id,
    Resource={
        'Database': {
            'CatalogId': catalog_id,
            'Name': secured_glue_db
        },
    },
    LFTags=[
        {
            'CatalogId': catalog_id,
            'TagKey': 'security-level',
            'TagValues': [
                'sec-5',
            ]
        },
    ]
)


In [None]:
assert 200 == db_add_lf_tags_to_resource_response['ResponseMetadata']['HTTPStatusCode']

## Adding tag to Database Table. Overrides the database inherited tag.
### One way to increase security is to tag an entire table with a higher security level. Here we will give a table a sec-4 security level:

In [None]:
table_add_lf_tags_to_resource_response = lfc.add_lf_tags_to_resource(
    CatalogId=catalog_id,
    Resource={
        'Table': {
            'CatalogId': catalog_id,
            'DatabaseName': secured_glue_db,
            'Name': 'inpatient_claims',
        },
    },
    LFTags=[
        {
            'CatalogId': catalog_id,
            'TagKey': 'security-level',
            'TagValues': [
                'sec-4',
            ]
        },
    ]
)

In [None]:
assert 200 == table_add_lf_tags_to_resource_response['ResponseMetadata']['HTTPStatusCode']

## Adding tag to Table column. Overrides the column tag inherited from database.
### Add high secure tag to columns.

In [None]:
table_columns_add_lf_tags_to_resource_response = lfc.add_lf_tags_to_resource(
    CatalogId=catalog_id,
    Resource={
        'TableWithColumns': {
            'CatalogId': catalog_id,
            'DatabaseName': secured_glue_db,
            'Name': 'beneficiary_summary',
            'ColumnNames': [
                'sp_depressn',
                'sp_diabetes'
            ]
        },
    },
    LFTags=[
        {
            'CatalogId': catalog_id,
            'TagKey': 'security-level',
            'TagValues': [
                'sec-2',
            ]
        },
    ]
)

In [None]:
assert 200 == table_columns_add_lf_tags_to_resource_response['ResponseMetadata']['HTTPStatusCode']

# Step 5 - Register secured bucket location with lake formation

In [None]:
reg_s3_location_response = lfc.register_resource(ResourceArn=f"arn:aws:s3:::{secured_lake_bucket}",UseServiceLinkedRole=True)


In [None]:
assert 200 == reg_s3_location_response['ResponseMetadata']['HTTPStatusCode']

# Step 6 - Quick check on the created tables.


In [None]:
%reload_ext sql
%config SqlMagic.autocommit=False # for engines that do not support autommit
athena = get_athena()


In [None]:
%connect_to_athena -database cms_secured_db


In [None]:
%sql select * from cms_secured_db.inpatient_claims limit 1

In [None]:
%sql select sp_depressn, sp_diabetes from cms_secured_db.beneficiary_summary limit 1

In [None]:
%sql select clm_pmt_amt, nch_prmry_pyr_clm_pd_amt from cms_secured_db.outpatient_claims limit 1

# End of orbit lake creator demo notebook.