# AWS S3

Working example of writing and reading from an S3 bucket using the `boto3` library.

Following:
https://towardsdatascience.com/reading-and-writing-files-from-to-amazon-s3-with-pandas-ccaf90bfe86c


In [None]:
# Set up to use local modules
%load_ext autoreload
%autoreload 2
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
sys.path.insert(0, module_path)

from dotenv import load_dotenv
import pandas as pd
import pandas as pd

from src import process

load_dotenv()

AWS_S3_BUCKET = os.getenv("AWS_S3_TEST_BUCKET")
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")


data_df = process.load_processed_results()
# display(data_df.tail())

In [None]:
books_df = pd.DataFrame(
    data={"Title": ["Book I", "Book II", "Book V"], "Price": [56.6, 59.87, 74.54]},
    columns=["Title", "Price"],
)

key = "data/raw/books.csv"

books_df.to_csv(
    f"s3://{AWS_S3_BUCKET}/{key}",
    index=False,
    storage_options={
        "key": AWS_ACCESS_KEY_ID,
        "secret": AWS_SECRET_ACCESS_KEY,
    },
)
print(f"Data saved at: s3://{AWS_S3_BUCKET}/{key}")

In [None]:
key = "data/raw/books.csv"

# Read the data from the S3 bucket
loaded_df = pd.read_csv(
    f"s3://{AWS_S3_BUCKET}/{key}",
    storage_options={
        "key": AWS_ACCESS_KEY_ID,
        "secret": AWS_SECRET_ACCESS_KEY,
    },
)
display(loaded_df)

# List files in a bucket


In [None]:
import boto3

session = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
)

s3 = session.resource("s3")
my_bucket = s3.Bucket("monkeytype-analysis")
# List all files in my_bucket/data/raw
for my_bucket_object in my_bucket.objects.filter(Prefix="data/raw/"):
    print(my_bucket_object.key)

In [None]:
# Find all files in my_bucket/data/raw that match the pattern "results-*.csv"
# Include the full path in the key
keys = [
    my_bucket_object.key
    for my_bucket_object in my_bucket.objects.filter(Prefix="data/raw/")
    if "results-" in my_bucket_object.key
]
print(keys)

In [None]:
# Append the bucket path to the keys
bucket_path = f"s3://{AWS_S3_BUCKET}/"
keys = [bucket_path + key for key in keys]
print(keys)

In [None]:
my_bucket_object = my_bucket.Object(keys[0])