[![AWS Data Wrangler](_static/logo.png "AWS Data Wrangler")](https://github.com/awslabs/aws-data-wrangler)

# 3 - Amazon S3

In [1]:
import awswrangler as wr
import pandas as pd
import boto3

df = pd.DataFrame({
    "id": [1, 2],
    "name": ["foo", "boo"]
})

## Enter your bucket name:

In [2]:
import getpass
bucket = getpass.getpass()

 ··········································


## CSV file(s)

In [3]:
path = f"s3://{bucket}/csv/file.csv"

wr.s3.to_csv(df, path, index=False)
wr.s3.read_csv(path)

Unnamed: 0,id,name
0,1,foo
1,2,boo


In [4]:
path = f"s3://{bucket}/csv/file.csv"
path2 = f"s3://{bucket}/csv/file2.csv"

wr.s3.to_csv(df, path, index=False)
wr.s3.to_csv(df, path2, index=False)

wr.s3.read_csv([path, path2])  # Reading by list

Unnamed: 0,id,name
0,1,foo
1,2,boo
2,1,foo
3,2,boo


In [5]:
wr.s3.read_csv(path = f"s3://{bucket}/csv/")  # Reading by prefix

Unnamed: 0,id,name
0,1,foo
1,2,boo
2,1,foo
3,2,boo


## JSON files

In [6]:
path = f"s3://{bucket}/json/file.json"

wr.s3.to_json(df, path)
wr.s3.read_json(path)

Unnamed: 0,id,name
0,1,foo
1,2,boo


In [7]:
path = f"s3://{bucket}/json/file.json"
path2 = f"s3://{bucket}/json/file2.json"

wr.s3.to_json(df, path)
wr.s3.to_json(df, path2)

wr.s3.read_json([path, path2])  # Reading by list

Unnamed: 0,id,name
0,1,foo
1,2,boo
2,1,foo
3,2,boo


In [8]:
wr.s3.read_json(path = f"s3://{bucket}/json/")  # Reading by prefix

Unnamed: 0,id,name
0,1,foo
1,2,boo
2,1,foo
3,2,boo


## Parquet files

> For more complex features releated to Parquet Dataset check the tutorial number 4

In [9]:
path = f"s3://{bucket}/parquet/file.parquet"

wr.s3.to_parquet(df, path)
wr.s3.read_parquet(path)

path_or_paths: ['s3://aws-data-wrangler-test-bucket-ql9ou148dw6r/parquet/file.parquet']


Unnamed: 0,id,name
0,1,foo
1,2,boo


In [10]:
path = f"s3://{bucket}/parquet/file.parquet"
path2 = f"s3://{bucket}/parquet/file2.parquet"

wr.s3.to_parquet(df, path)
wr.s3.to_parquet(df, path2)

wr.s3.read_parquet([path, path2])  # Reading by list

path_or_paths: ['s3://aws-data-wrangler-test-bucket-ql9ou148dw6r/parquet/file.parquet', 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/parquet/file2.parquet']


Unnamed: 0,id,name
0,1,foo
1,2,boo
2,1,foo
3,2,boo


In [11]:
wr.s3.read_parquet(path = f"s3://{bucket}/parquet/")  # Reading by prefix

path_or_paths: ['s3://aws-data-wrangler-test-bucket-ql9ou148dw6r/parquet/file.parquet', 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/parquet/file2.parquet']


Unnamed: 0,id,name
0,1,foo
1,2,boo
2,1,foo
3,2,boo


## Fixed-width formatted files (Only Read)

In [12]:
content = "1  Herfelingen 27-12-18\n"\
          "2    Lambusart 14-06-18\n"\
          "3 Spormaggiore 15-04-18"
boto3.client("s3").put_object(Body=content, Bucket=bucket, Key="file.txt")

wr.s3.read_fwf(f"s3://{bucket}/file.txt", names=["id", "name", "date"])

Unnamed: 0,id,name,date
0,1,Herfelingen,27-12-18
1,2,Lambusart,14-06-18
2,3,Spormaggiore,15-04-18


## Listing

In [13]:
wr.s3.list_objects(f"s3://{bucket}/")

['s3://aws-data-wrangler-test-bucket-ql9ou148dw6r/csv/file.csv',
 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/csv/file2.csv',
 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/file.txt',
 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/json/file.json',
 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/json/file2.json',
 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/parquet/file.parquet',
 's3://aws-data-wrangler-test-bucket-ql9ou148dw6r/parquet/file2.parquet']

## Deleting

In [14]:
wr.s3.delete_objects(f"s3://{bucket}/")