# AWS Data Wrangler
https://aws-data-wrangler.readthedocs.io/

We are waiting for a fix on this issue:
https://github.com/awslabs/aws-data-wrangler/issues/134

In [3]:
!pip install -q awswrangler==0.3.2

[33mYou are using pip version 10.0.1, however version 20.0.2 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [10]:
import boto3

# Get region 
session = boto3.session.Session()
region_name = session.region_name

In [4]:
import awswrangler as wr

In [11]:
# Database & Table 
database_name = 'dsoaws'
table_name_tsv = 'amazon_reviews_tsv'


## Reading from AWS Athena to Pandas

In [18]:
%%time
sess = wr.Session(region_name=region_name, athena_ctas_approach=False)
df = sess.pandas.read_sql_athena(
    sql='SELECT * FROM {} LIMIT 500000'.format(table_name_tsv),
    database='{}'.format(database_name)
)

CPU times: user 20.7 s, sys: 555 ms, total: 21.2 s
Wall time: 29.9 s


In [15]:
df.head(5)

Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date
0,US,22480053,R28HBXXO1UEVJT,0843952016,34858117,The Rising,Books,5,0,0,N,N,Great Twist on Zombie Mythos,"I've known about this one for a long time, but...",2012-05-03
1,US,44244451,RZKRFS2UUMFFU,031088926X,676347131,Sticky Faith Teen Curriculum with DVD: 10 Less...,Books,5,15,15,N,Y,Helpful and Practical,The student curriculum was better than I expec...,2012-05-03
2,US,20357422,R2WAU9MD9K6JQA,0615268102,763837025,Black Passenger Yellow Cabs: Of Exile And Exce...,Books,3,6,8,N,N,Paul,"I found \""Black Passenger / Yellow Cabs\"" to b...",2012-05-03
3,US,13235208,R36SCTKYTVPZPC,1900869225,785539232,Direction and Destiny in the Birth Chart,Books,5,10,11,N,Y,Direction and Destiny in the Birth Chart,"Sasportas is a brilliant Astrologer, a captiva...",2012-05-03
4,US,26301786,R10BM6JUOJX27Q,1565129938,64646125,Until the Next Time,Books,3,0,0,Y,N,This was Okay,"I wanted to love this book, I really did since...",2012-05-03


## Reading from AWS Athena to Pandas with the blazing fast CTAS approach

In [19]:
%%time

sess = wr.Session(region_name=region_name, athena_ctas_approach=True)
df = sess.pandas.read_sql_athena(
    sql='SELECT * FROM {} LIMIT 500000'.format(table_name_tsv),
    database='{}'.format(database_name)
)

CPU times: user 1.06 s, sys: 507 ms, total: 1.56 s
Wall time: 10.5 s


In [13]:
df.head(5)

Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date
0,US,24509695,R3VR960AHLFKDV,B004HB5E0E,488241329,Shoal Creek Computer Desk,Furniture,4,0,0,N,Y,... desk is very study and it i has a beautifu...,This desk is very study and it i has a beauti...,2015-08-31
1,US,34731776,R16LGVMFKIUT0G,B0042TNMMS,205864445,Dorel Home Products Delaney Large Rectangular ...,Furniture,5,0,0,N,Y,Five Stars,Great item,2015-08-31
2,US,1272331,R1AIMEEPYHMOE4,B0030MPBZ4,124663823,Bathroom Vanity Table Jewelry Makeup Desk Benc...,Furniture,5,1,1,N,Y,Five Stars,"Perfect fit for my bedroom, been wanting one s...",2015-08-31
3,US,45284262,R1892CCSZWZ9SR,B005G02ESA,382367578,Sleep Master Ultima Comfort Memory Foam 6 Inch...,Furniture,3,0,0,N,Y,Good enough,"We use this on a trundle bed. So, it does not...",2015-08-31
4,US,30003523,R285P679YWVKD1,B005JS8AUA,309497463,"1 1/4"" GashGuards: Deluxe Rubberized Plastic B...",Furniture,3,0,0,N,N,Gash Gards for daybed,The product is fine...just seemed like it took...,2015-08-31


### Reading from AWS Athena to Pandas in chunks (For memory restrictions)

In [25]:
%%time

sess = wr.Session(region_name=region_name, athena_ctas_approach=False)

df_iter = sess.pandas.read_sql_athena(
    sql='SELECT * FROM {} LIMIT 100000'.format(table_name_tsv),
    database='{}'.format(database_name),
    max_result_size=512_000  # 512 KB
)

CPU times: user 108 ms, sys: 8.22 ms, total: 116 ms
Wall time: 4.18 s


In [27]:
for df in df_iter:
    print(df)  # Do whatever you want

### Athena query to receive the result as python primitives (Iterable[Dict[str, Any])

In [None]:
# TODO: Look into this example
for row in wr.athena.query(query="...", database="..."):
    print(row)