# Set Up Data in Athena

## Import libraries

In [37]:
import boto3
import os
import pandas as pd
import sagemaker
import warnings

from IPython.core.display import HTML
from pyathena import connect

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Filter out the specific UserWarning related to DBAPI2 objects
warnings.filterwarnings('ignore', message="pandas only supports SQLAlchemy connectable")

In [38]:
# check stored variables
%store

Stored variables and their in-db values:
bucket_name                           -> 'wizard-of-tasks-dataset-5432'
setup_dependencies_passed             -> True
setup_s3_bucket_passed                -> True


## Setup Table in Athena

In [39]:
# save Amazon information
account_id = boto3.client("sts").get_caller_identity().get("Account")
region = boto3.Session().region_name
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()
s3 = boto3.client('s3', region_name=sagemaker_session.boto_region_name)

In [40]:
# get bucket_name
%store -r bucket_name
print(bucket_name)

wizard-of-tasks-dataset-5432


In [41]:
# set database name
database_name = "wizard_of_tasks"

# set S3 staging directory
s3_staging_dir = "s3://{0}/athena/staging".format(bucket_name)

# create connection
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)

# create database if it doesn't exist
statement = "CREATE DATABASE IF NOT EXISTS {}".format(database_name)
pd.read_sql(statement, conn)

In [42]:
# verify the database has been created
statement = "SHOW DATABASES"

df_show = pd.read_sql(statement, conn)
df_show.head(5)

Unnamed: 0,database_name
0,awsdata
1,default
2,sagemaker_featurestore
3,wizard_of_tasks


In [93]:
# Table name
table_name = "data"

# S3 path to dataset
s3_path = 's3://{}/data/'.format(bucket_name)

# drop the table if it already exists
statement = f"DROP TABLE IF EXISTS {database_name}.{table_name}"
pd.read_sql(statement, conn)

# create table statement updated to reflect the actual structure
statement = f"""CREATE EXTERNAL TABLE IF NOT EXISTS {database_name}.{table_name}(
         question STRING,
         intent_question STRING,
         history STRING,
         conversation_id STRING,
         document_url_question STRING,
         domain_question STRING,
         text_answer STRING,
         intent_answer STRING,
         domain_answer STRING,
         question_id STRING,
         title STRING,
         description STRING,
         ingredients STRING,
         steps STRING
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '^' LINES TERMINATED BY '\n' LOCATION '{s3_path}'
TBLPROPERTIES ('skip.header.line.count'='1')"""
pd.read_sql(statement, conn)

In [94]:
# verify table has been created
statement = "SHOW TABLES in {}".format(database_name)
df_show = pd.read_sql(statement, conn)
df_show.head(5)

Unnamed: 0,tab_name
0,data


In [95]:
# pull data
statement = """SELECT * FROM {}.{}""".format(
    database_name, table_name
)
df = pd.read_sql(statement, conn)
df.head(10)

Unnamed: 0,question,intent_question,history,conversation_id,document_url_question,domain_question,text_answer,intent_answer,domain_answer,question_id,title,description,ingredients,steps
0,How do we prepare the tree?,ask_question_recipe_steps,student: I'm ready for the first step now plea...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Have you selected a pot? This is a very import...,answer_question_recipe_steps,diy,diy-1-1,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...
1,"Ok, I have a nice dark green pot, perfect. Wha...",ask_question_ingredients_tools,student: I've got an idea of where I want it t...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Next we will prepare the tree for potting. The...,answer_question_recipe_steps,diy,diy-1-2,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...
2,Does that mean basil grows best in the spring ...,ask_question_recipe_steps,"student: Gotcha! Once I have all those tools,...",Wizard-of-Task-diy-2,https://www.wikihow.com/Grow-Basil,diy,"Yes, like most plants, basil likes a temperate...",answer_question_recipe_steps,diy,diy-2-3,How to Grow Basil,"\nBasil is easy to grow, and transforms ordina...",,"['Choose the kind of basil you wish to grow.',..."
3,I don't really have access to those right now ...,ask_question_recipe_steps,"student: Okay, now what should I do after that...",Wizard-of-Task-diy-3,https://www.wikihow.com/Remove-Salt-Build-up-o...,diy,You can just rub it on the main zipper piece,answer_question_recipe_steps,diy,diy-3-1,How to Remove Salt Build up on a Zipper,\nWhether it’s from roads and sidewalks in the...,,"['Open the zipper as much as possible.', 'Use ..."
4,If I could only choose one thing to decoupage ...,ask_question_recipe_steps,student: What is the easiest type of material ...,Wizard-of-Task-diy-5,https://www.wikihow.com/Decoupage,diy,I would highly recommend either decoupaging wo...,answer_question_recipe_steps,diy,diy-5-5,How to Decoupage,\nIf you'd like to give new life to a piece of...,,"""['Cover your workspace with paper to protect ..."
5,Can you tell me what a vacuum thermoflask is u...,ask_question_ingredients_tools,,Wizard-of-Task-diy-7,https://www.wikihow.com/Clean-a-Vacuum-Thermos...,diy,You can fold it and put it in your wardrobe.,answer_question_recipe_steps,diy,diy-7-0,How to Clean a Vacuum Thermosflask That Has St...,\nInsulated flasks are all the rage these days...,,['Put some baking soda and vinegar into the bo...
6,"I have added the salt, what materials do I nee...",ask_question_ingredients_tools,"student: Ok, I've crushed my ice and filled th...",Wizard-of-Task-diy-7,https://www.wikihow.com/Clean-a-Vacuum-Thermos...,diy,Firstly you will want to put some baking soda ...,answer_question_recipe_steps,diy,diy-7-3,How to Clean a Vacuum Thermosflask That Has St...,\nInsulated flasks are all the rage these days...,,['Put some baking soda and vinegar into the bo...
7,What do I do with the flask?,ask_question_recipe_steps,"student: I have added the salt, what materials...",Wizard-of-Task-diy-7,https://www.wikihow.com/Clean-a-Vacuum-Thermos...,diy,Now mix baking soda and vinegar in your flask.,answer_question_recipe_steps,diy,diy-7-4,How to Clean a Vacuum Thermosflask That Has St...,\nInsulated flasks are all the rage these days...,,['Put some baking soda and vinegar into the bo...
8,"I have added salt, what do I need for the next...",ask_question_ingredients_tools,"student: I have made sure the flask is dry, wh...",Wizard-of-Task-diy-7,https://www.wikihow.com/Clean-a-Vacuum-Thermos...,diy,Next lets fill the flask with hot water. I've ...,answer_question_recipe_steps,diy,diy-7-5,How to Clean a Vacuum Thermosflask That Has St...,\nInsulated flasks are all the rage these days...,,['Put some baking soda and vinegar into the bo...
9,Could I substitute baking soda if I don't have...,ask_question_recipe_steps,teacher: Now you need to rinse out the flask |...,Wizard-of-Task-diy-7,https://www.wikihow.com/Clean-a-Vacuum-Thermos...,diy,Now you want to get rid of the mixture and rin...,answer_question_recipe_steps,diy,diy-7-6,How to Clean a Vacuum Thermosflask That Has St...,\nInsulated flasks are all the rage these days...,,['Put some baking soda and vinegar into the bo...


In [96]:
# review the new table in Athena
display(
    HTML(
        '<b>Review <a target="top" href="https://console.aws.amazon.com/glue/home?region={}#">Data Catalog</a></b>'.format(
            region
        )
    )
)

## Shut down notebook resources

In [97]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [13]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}

<IPython.core.display.Javascript object>