<a id="ingest-from-amazon-s3-to-nosql-table-using-v3io-frames-n-pandas"></a>
#### Ingesting Data 
https://github.com/v3io/frames

In [None]:
!mkdir -p /User/examples # <=> /v3io/${V3IO_HOME}/examples or /v3io/users/${V3IO_USERNAME}/examples

In [None]:
%%sh
CSV_PATH="/User/examples/stocks.csv"
curl -L "iguazio-sample-data.s3.amazonaws.com/2018-03-26_BINS_XETR08.csv" > ${CSV_PATH}

In [None]:
# Use V3IO Frames to convert the CSV file that was ingested in the AWS S3 data-collection example to a NoSQL table.
# NOTE: Make sure to first create a V3IO Frames service from the "Services" page of the platform dashboard, and run the
# "Ingesting Files from Amazon S3 to the Platform File System Using curl" example to create users/$V3IO_USERNAME/examples/stocks.csv.
import pandas as pd
import v3io_frames as v3f
import os

In [None]:
# Create a V3IO Frames client for the "users" data container
client = v3f.Client("framesd:8081", container="users")

In [None]:
# Full CSV file path
csv_path = os.path.join("/User", "examples", "stocks.csv")
# Relative NoSQL table path within the "users" container
rel_nosql_table_path = os.path.join(os.getenv('V3IO_USERNAME'), "examples", "stocks_example_tab")

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv(csv_path, header="infer")

# Convert the CSV file to a NoSQL table
client.write("kv", rel_nosql_table_path, df)

In [None]:
# Use Presto to query the NoSQL table that was created in the previous step
presto_nosql_table_path = os.path.join('v3io.users."' + os.getenv('V3IO_USERNAME'), 'training/data/examples', 'stocks_example_tab"')
%sql select * from $presto_nosql_table_path limit 10

<a id="getting-started-example"></a>
## Data Collection and Exploration Getting-Started Example


In [None]:
%%sh
DIR_PATH="/User/examples/" # <=> "/v3io/${V3IO_HOME}/examples/" or "/v3io/users/${V3IO_USERNAME}/examples/"
CSV_PATH="${DIR_PATH}stocks.csv"

# Create the examples directory
mkdir -p ${DIR_PATH}

# Download a sample stocks CSV file from the Iguazio sample data-set Amazon S3 bucket to the examples directory
curl -L "iguazio-sample-data.s3.amazonaws.com/2018-03-26_BINS_XETR08.csv" > ${CSV_PATH}

<a id="getting-started-example-step-convert-csv-to-nosql-table"></a>
### Convert the Sample CSV File to a NoSQL Table



In [None]:
import os

In [None]:
# Example diretory path - a <running user>/examples directory in the "users" container
dir_path = os.path.join('/User/', "examples")
# CSV file path
csv_path = os.path.join(dir_path, "stocks.csv")
# NoSQL table path
nosql_table_path = os.path.join(dir_path, "stocks_tab")


# Read the sample CSV file into a Spark DataFrame, and let Spark infer the schema of the data
df = pd.read_csv(csv_path)

# Show the DataFrame data
df.head(10)

In [None]:
# Write the DataFrame data to a NoSQL table in a platform data container.
# Define the "ISIN" column (attribute) as the table's primary key.
client.write('kv',nosql_table_path,dfs=df)

<a id="getting-started-example-step-run-sql-queries"></a>
### Step 3: Run Interactive SQL Queries

Use the `%sql` Jupyter magic to run an SQL queries on the "stocks_tab" table that was created in the previous step.
(The queries is processed using Presto.)
The example runs a `SELECT` query that reads the first ten table items.

In [None]:
presto_nosql_table_path = os.path.join('v3io.users."' + os.getenv('V3IO_USERNAME'), 'examples', 'stocks_example_tab"')

In [None]:
%sql select * from $presto_nosql_table_path limit 10

# SQL to Dataframe

In [None]:
df = %sql select securitydesc,count(*) as cnt from $presto_nosql_table_path group by securitydesc limit 10

In [None]:
df

In [None]:
df = client.read('kv',nosql_table_path)

In [None]:
df.head(10)

In [None]:
df = client.read('kv',nosql_table_path,filter='TradedVolume > 500')

In [None]:
df.head(10)

<a id="getting-started-example-step-convert-data-to-parquet"></a>
### Convert the Data to a Parquet Table

In [None]:
# Write the DataFrame data that was read from the CSV file in Step 2 to a Parquet table in a platform data container
prqt_table_path = os.path.join(dir_path,"stocks_prqt")
df.to_parquet(prqt_table_path)