# Cleanup
This notebook cleansup (deletes) resources created by previous notebooks so that the Cloud Formation stack can be safely deleted. For example, objects in S3 buckets,  SageMaker model endpoint etc. 

## Imports

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import sagemaker
import logging
import psutil
import boto3
import json
import time
import sys
import os

In [2]:
# import from a different path
path = Path(os.path.abspath(os.getcwd()))
package_dir = f'{str(path.parent)}/utils'
print(package_dir)
sys.path.insert(0, package_dir)
import utils

/home/ec2-user/SageMaker/feature-store-expedia/utils


## Setup Logging

In [3]:
logger = logging.getLogger('__name__')
logging.basicConfig(format="%(asctime)s,%(filename)s,%(funcName)s,%(lineno)s,%(levelname)s,p%(process)s,%(message)s", level=logging.INFO)       

## Global Constants

In [4]:
# global constants
STACK_NAME = "expedia-feature-store-demo-v2"
LOCAL_DATA_DIR = "../data"


## Setup Config Variables

In [5]:
# read output variables from cloud formation stack, these will be used as parameters throughout
# the code
data_bucket_name = utils.get_cfn_stack_outputs(STACK_NAME, 'DataBucketName')
model_bucket_name = utils.get_cfn_stack_outputs(STACK_NAME, 'MLModelBucketName')
athena_query_results_bucket_name = utils.get_cfn_stack_outputs(STACK_NAME, 'AthenaQueryResultsBucketName')
feature_store_bucket_name = utils.get_cfn_stack_outputs(STACK_NAME, 'FeatureStoreBucketName')
hotel_cluster_prediction_fn_arn = utils.get_cfn_stack_outputs(STACK_NAME, 'HotelClusterPredictionFunction')
hotel_cluster_prediction_ddb_table_name = utils.get_cfn_stack_outputs(STACK_NAME, 'HotelClusterPredictionsTableName')

logger.info(f"data_bucket_name={data_bucket_name},\nathena_query_results_bucket_name={athena_query_results_bucket_name},\n"
            f"model_bucket_name={model_bucket_name}\nfeature_store_bucket_name={feature_store_bucket_name},\n"
            f"hotel_cluster_prediction_fn_arn={hotel_cluster_prediction_fn_arn}\nhotel_cluster_prediction_ddb_table_name={hotel_cluster_prediction_ddb_table_name}")

2022-06-27 19:00:50,066,<ipython-input-5-4992a499e393>,<module>,10,INFO,p10102,data_bucket_name=expedia-customer-behavior-data-2345bbc0,
athena_query_results_bucket_name=athena-query-results-2345bbc0,
model_bucket_name=expedia-ml-models-2345bbc0
feature_store_bucket_name=expedia-feature-store-offline-2345bbc0,
hotel_cluster_prediction_fn_arn=arn:aws:lambda:us-east-1:924873211303:function:PredictHotelCluster
hotel_cluster_prediction_ddb_table_name=ExpediaPerCustomerHotelClusterPredictions


In [6]:
# read outputs from previous notebooks that are needed by this notebook.
# these are available as local files.
customer_inputs_fg_name = utils.read_param("customer_inputs_fg_name")
destinations_fg_name = utils.read_param("destinations_fg_name")
customer_inputs_fg_table = utils.read_param("customer_inputs_fg_table")
destinations_fg_table = utils.read_param("destinations_fg_table")
customer_inputs_fg_name = utils.read_param("customer_inputs_fg_name")
ml_model_endpoint_name = utils.read_param("endpoint_name")

# read params from the cloud formation stack
raw_data_dir = utils.get_cfn_stack_parameters(STACK_NAME, 'RawDataDir')
app_name = utils.get_cfn_stack_parameters(STACK_NAME, 'AppName')

training_dataset_fname = utils.get_cfn_stack_parameters(STACK_NAME, 'TrainingDatasetFileName')
test_dataset_fname = utils.get_cfn_stack_parameters(STACK_NAME, 'TestDatasetFileName')
validation_dataset_fname = utils.get_cfn_stack_parameters(STACK_NAME, 'ValidationDatasetFileName')

training_job_instance_type = utils.get_cfn_stack_parameters(STACK_NAME, 'TrainingJobInstanceType')
if training_job_instance_type is None:
    training_job_instance_type = "ml.m5.xlarge"
training_job_instance_count = int(utils.get_cfn_stack_parameters(STACK_NAME, 'TrainingJobNodeInstanceCount'))

model_ep_instance_type = utils.get_cfn_stack_parameters(STACK_NAME, 'ModelEndpointInstanceType')
model_ep_instance_count = int(utils.get_cfn_stack_parameters(STACK_NAME, 'ModelEndpointInstanceCount'))

customer_input_stream_name = utils.get_cfn_stack_parameters(STACK_NAME, 'CustomerInputStreamName')
            
logger.info(f"customer_inputs_fg_table={customer_inputs_fg_table},\ndestinations_fg_table={destinations_fg_table},\n"
            f"customer_inputs_fg_name={customer_inputs_fg_name},\ndestinations_fg_name={destinations_fg_name}\n"
            f"raw_data_dir={raw_data_dir},\ntraining_dataset_fname={training_dataset_fname},\n"
            f"test_dataset_fname={test_dataset_fname},\nvalidation_dataset_fname=-{validation_dataset_fname}\n"
            f"training_job_instance_type={training_job_instance_type},\ntraining_job_instance_count={training_job_instance_count},\n"
            f"model_ep_instance_type={model_ep_instance_type},\nmodel_ep_instance_count={model_ep_instance_count},\ncustomer_input_stream_name={customer_input_stream_name}")

2022-06-27 19:00:50,074,utils.py,read_param,131,INFO,p10102,read_param, fpath=../config/customer_inputs_fg_name, read customer_inputs_fg_name=expedia-customer-inputs-2022-6-24-21-43
2022-06-27 19:00:50,075,utils.py,read_param,131,INFO,p10102,read_param, fpath=../config/destinations_fg_name, read destinations_fg_name=expedia-destinations-2022-6-24-21-43
2022-06-27 19:00:50,076,utils.py,read_param,131,INFO,p10102,read_param, fpath=../config/customer_inputs_fg_table, read customer_inputs_fg_table=expedia-customer-inputs-2022-6-24-21-43-1656107063
2022-06-27 19:00:50,077,utils.py,read_param,131,INFO,p10102,read_param, fpath=../config/destinations_fg_table, read destinations_fg_table=expedia-destinations-2022-6-24-21-43-1656107143
2022-06-27 19:00:50,078,utils.py,read_param,131,INFO,p10102,read_param, fpath=../config/customer_inputs_fg_name, read customer_inputs_fg_name=expedia-customer-inputs-2022-6-24-21-43
2022-06-27 19:00:50,078,utils.py,read_param,131,INFO,p10102,read_param, fpath=../c

## Cleanup s3 buckets

In [7]:
bucket_list = [data_bucket_name, model_bucket_name, athena_query_results_bucket_name, feature_store_bucket_name]
_ = list(map(utils.delete_all_objs_from_s3_bucket, bucket_list))

2022-06-27 19:00:51,192,utils.py,delete_all_objs_from_s3_bucket,188,INFO,p10102,delete_all_objs_from_s3_bucket, going to delete objects from bucket=expedia-customer-behavior-data-2345bbc0
2022-06-27 19:00:51,269,utils.py,delete_all_objs_from_s3_bucket,191,INFO,p10102,delete_all_objs_from_s3_bucket, after deleting objects from bucket=expedia-customer-behavior-data-2345bbc0
2022-06-27 19:00:51,277,utils.py,delete_all_objs_from_s3_bucket,188,INFO,p10102,delete_all_objs_from_s3_bucket, going to delete objects from bucket=expedia-ml-models-2345bbc0
2022-06-27 19:00:51,351,utils.py,delete_all_objs_from_s3_bucket,191,INFO,p10102,delete_all_objs_from_s3_bucket, after deleting objects from bucket=expedia-ml-models-2345bbc0
2022-06-27 19:00:51,358,utils.py,delete_all_objs_from_s3_bucket,188,INFO,p10102,delete_all_objs_from_s3_bucket, going to delete objects from bucket=athena-query-results-2345bbc0
2022-06-27 19:00:51,431,utils.py,delete_all_objs_from_s3_bucket,191,INFO,p10102,delete_all_objs_fr

# Cleanup SageMaker resources

In [8]:
utils.delete_sagemaker_model_resources(ml_model_endpoint_name)

2022-06-27 19:00:51,559,utils.py,delete_sagemaker_model_resources,207,INFO,p10102,delete_sagemaker_model_resources, ep_name=hotel-cluster-prediction-ml-model-2022-06-26-15-50-01-705
2022-06-27 19:00:51,669,utils.py,delete_sagemaker_model_resources,229,ERROR,p10102,delete_sagemaker_model_resources, exception=An error occurred (ValidationException) when calling the DescribeEndpointConfig operation: Could not find endpoint configuration "arn:aws:sagemaker:us-east-1:924873211303:endpoint-config/hotel-cluster-prediction-ml-model-2022-06-26-15-50-01-705".
