# Explore Data

## Import libraries

In [2]:
import boto3
import os
import pandas as pd
import sagemaker
import warnings

from IPython.core.display import HTML
from pyathena import connect

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Filter out the specific UserWarning related to DBAPI2 objects
warnings.filterwarnings('ignore', message="pandas only supports SQLAlchemy connectable")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


In [3]:
# check stored variables
%store

Stored variables and their in-db values:
bucket_name                           -> 'wizard-of-tasks-dataset-5432'
setup_dependencies_passed             -> True
setup_s3_bucket_passed                -> True


## Pull Data from Athena

In [4]:
# save Amazon information
account_id = boto3.client("sts").get_caller_identity().get("Account")
region = boto3.Session().region_name
role = sagemaker.get_execution_role()
sagemaker_session = sagemaker.Session()
s3 = boto3.client('s3', region_name=sagemaker_session.boto_region_name)

In [5]:
# get bucket_name
%store -r bucket_name
print(bucket_name)

# set S3 staging directory
s3_staging_dir = "s3://{0}/athena/staging".format(bucket_name)

# create connection
conn = connect(region_name=region, s3_staging_dir=s3_staging_dir)

wizard-of-tasks-dataset-5432


In [8]:
# set database name and table name
database_name = "wizard_of_tasks"
table_name = "data"

# pull data
statement = """SELECT * FROM {}.{}""".format(
    database_name, table_name
)
df = pd.read_sql(statement, conn)
df.head(5)

Unnamed: 0,question,intent_question,history,conversation_id,document_url_question,domain_question,text_answer,intent_answer,domain_answer,question_id,title,description,ingredients,steps
0,How do we prepare the tree?,ask_question_recipe_steps,student: I'm ready for the first step now plea...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Have you selected a pot? This is a very import...,answer_question_recipe_steps,diy,diy-1-1,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...
1,"Ok, I have a nice dark green pot, perfect. Wha...",ask_question_ingredients_tools,student: I've got an idea of where I want it t...,Wizard-of-Task-diy-1,https://www.wikihow.com/Start-a-Bonsai-Tree,diy,Next we will prepare the tree for potting. The...,answer_question_recipe_steps,diy,diy-1-2,How to Start a Bonsai Tree,"""\nThe ancient art of growing Bonsai trees is ...",,['Select a suitable species of tree for your c...
2,Does that mean basil grows best in the spring ...,ask_question_recipe_steps,"student: Gotcha! Once I have all those tools,...",Wizard-of-Task-diy-2,https://www.wikihow.com/Grow-Basil,diy,"Yes, like most plants, basil likes a temperate...",answer_question_recipe_steps,diy,diy-2-3,How to Grow Basil,"\nBasil is easy to grow, and transforms ordina...",,"['Choose the kind of basil you wish to grow.',..."
3,I don't really have access to those right now ...,ask_question_recipe_steps,"student: Okay, now what should I do after that...",Wizard-of-Task-diy-3,https://www.wikihow.com/Remove-Salt-Build-up-o...,diy,You can just rub it on the main zipper piece,answer_question_recipe_steps,diy,diy-3-1,How to Remove Salt Build up on a Zipper,\nWhether it’s from roads and sidewalks in the...,,"['Open the zipper as much as possible.', 'Use ..."
4,If I could only choose one thing to decoupage ...,ask_question_recipe_steps,student: What is the easiest type of material ...,Wizard-of-Task-diy-5,https://www.wikihow.com/Decoupage,diy,I would highly recommend either decoupaging wo...,answer_question_recipe_steps,diy,diy-5-5,How to Decoupage,\nIf you'd like to give new life to a piece of...,,"""['Cover your workspace with paper to protect ..."


## Explore Data

**INSERT EXPLORATION**

## Shut down notebook resources

In [12]:
%%html

<p><b>Shutting down your kernel for this notebook to release resources.</b></p>
<button class="sm-command-button" data-commandlinker-command="kernelmenu:shutdown" style="display:none;">Shutdown Kernel</button>
        
<script>
try {
    els = document.getElementsByClassName("sm-command-button");
    els[0].click();
}
catch(err) {
    // NoOp
}    
</script>

In [13]:
%%javascript

try {
    Jupyter.notebook.save_checkpoint();
    Jupyter.notebook.session.delete();
}
catch(err) {
    // NoOp
}

<IPython.core.display.Javascript object>