## Create Database in Athena

In [5]:
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name
account_id = boto3.client("sts").get_caller_identity().get("Account")

sm = boto3.Session().client(service_name="sagemaker", region_name=region)

## Import Pyathena

In [2]:
!pip install --disable-pip-version-check -q PyAthena==2.1.0
from pyathena import connect

[0m

Create Athena Database for personal income table

In [6]:
database_name = "hotels"

In [7]:
# Set S3 staging directory -- this is a temporary directory used for Athena queries
s3_staging_dir = "s3://{0}/athena/staging".format(bucket)

In [8]:
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)

In [9]:
statement = "CREATE DATABASE IF NOT EXISTS {}".format(database_name)
print(statement)

CREATE DATABASE IF NOT EXISTS hotels


In [10]:
import pandas as pd

pd.read_sql(statement, conn)

In [11]:
statement = "SHOW DATABASES"

df_show = pd.read_sql(statement, conn)
df_show.head(10)

Unnamed: 0,database_name
0,default
1,disp_income
2,dsoaws
3,dspincome1
4,help
5,hotels
6,income
7,personalincome
8,test
9,travel_airbnb


In [10]:
if database_name in df_show.values:
    ingest_create_athena_db_passed = True

In [11]:
%store ingest_create_athena_db_passed

Stored 'ingest_create_athena_db_passed' (bool)


In [12]:
%store

Stored variables and their in-db values:
balanced_bias_data_jsonlines_s3_uri                   -> 's3://sagemaker-us-east-1-839757285876/bias-detect
balanced_bias_data_s3_uri                             -> 's3://sagemaker-us-east-1-839757285876/bias-detect
bias_data_s3_uri                                      -> 's3://sagemaker-us-east-1-839757285876/bias-detect
ingest_create_athena_db_passed                        -> True
ingest_create_athena_table_csv_passed                 -> True
ingest_create_athena_table_parquet_passed             -> True
ingest_create_athena_table_tsv_passed                 -> True
s3_private_path_csv                                   -> 's3://sagemaker-us-east-1-839757285876/projectbuck
s3_private_path_tsv                                   -> 's3://sagemaker-us-east-1-839757285876/amazon-revi
s3_public_path_csv                                    -> 's3://projectbucketvsbr22/income/'
s3_public_path_tsv                                    -> 's3://amazon-reviews-p

In [13]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [14]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}

<IPython.core.display.Javascript object>