# Demo: loading data from Amazon into iguazio DB

In [1]:
import pandas as pd
import v3io_frames as v3f
client = v3f.Client()
tablename = 'bank'

## Step 1: Load data from Amazon S3 into Pandas

In [2]:
# read S3 file into a data frame and show its data & metadata
df = pd.read_csv('https://s3.amazonaws.com/apache-zeppelin/tutorial/bank/bank.csv', sep=';')
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


## Step 2: Write Pandas dataframe into the database using a single command
data is streamed into the database via fast NoSQL APIs

In [3]:
out = client.write('v3io', tablename, df)

## Step 3: Read from the Database with DB side SQL
offload data filtering, grouping, joins, etc to a scale-out high speed DB engine.
Using Apache Presto as the ANSI SQL processing layer over iguazio NoSQL

In [4]:
%sql select * from bigdata.bank where balance > 10000

Done.


loan,education,previous,housing,poutcome,duration,marital,default,balance,month,contact,campaign,y,job,day,age,pdays
no,secondary,0,no,unknown,219,married,no,26452,jul,telephone,2,no,retired,15,75,-1
no,secondary,0,yes,unknown,249,married,no,19317,aug,cellular,1,yes,retired,4,68,-1


## Read the data as a structured stream
the frames API returns dataframe iterator (a stream) to accelerate performance
and allow cuncurent data movement and processing

In [5]:
df = pd.concat(client.read(backend='v3io', table=tablename, filter="balance>20000"), sort=False)
df.head(8)

Unnamed: 0_level_0,contact,day,poutcome,default,age,__name,duration,education,loan,balance,housing,month,previous,job,y,campaign,pdays,marital
__name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
75,telephone,15,unknown,no,75,75,219,secondary,no,26452,no,jul,0,retired,no,2,-1,married
