## Lab 2: Exploring AWS Boto3

#### Load Credentials

In [2]:
from dotenv import load_dotenv
load_dotenv('../.env')

True

#### S3 Client API

In [3]:
# using Boto3 S3 client api
import boto3
import os

s3_client = boto3.client('s3')

#### List Buckets

In [4]:
# list bucket names that contain 'techcatalyst'
buckets = s3_client.list_buckets()
for bucket in buckets['Buckets']:
    if 'techcatalyst' in bucket['Name']:
        print(bucket['Name'])

capstone-techcatalyst-conformed
capstone-techcatalyst-raw
capstone-techcatalyst-transformed
techcatalyst-public
techcatalyst-raw
techcatalyst-transformed


In [5]:
# list objects in a specific bucket "techcatalyst-raw" 
bucket_name = 'techcatalyst-raw'
objects = s3_client.list_objects_v2(Bucket=bucket_name)
for obj in objects.get('Contents', []):
    print(obj['Key'])

BLAKE/test_export.parquet
BLAKE/upload_file_method_GOOG.csv
BLAKE/upload_fileobj_method.txt
BLAKE_wr/9288e6c1eed4476d98eade7875cbf9c0.snappy.parquet
Ben/Million_Songs/
Ben/bingchilling.txt
Ben/gooooog.csv
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000000_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000001_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000002_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000003_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000004_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000005_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000006_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000007_000000.snappy.parquet
Ben/parquetGoogleStock/da5315ad16f94e5488251de78076a594_000008_000000.snappy.parquet
Ben/parquetGoogleStock/da5315a

In [6]:
# list objects that are CSV in a specific bucket "techcatalyst-raw" 
for obj in objects.get('Contents', []):
    if obj.get('Key').endswith('csv'):
        print(obj.get('Key'))

BLAKE/upload_file_method_GOOG.csv
Ben/gooooog.csv
accidents/accidents_2017_to_2023_english.csv
fabiola/fabiola_GOOG.csv
jaden/uploads/new_file.csv
shaswat/GOOG_NEW.csv
stage/yellow_tripdata.csv
stocks/GOOG.csv
suchitha/GOOG.csv.csv
tatwan/GOOG.csv
tatwan/GOOG_NEW.csv


#### Download object using `download_file`

In [7]:
s3_client.download_file(Bucket='techcatalyst-raw',  # from which bucket
                        Key='stocks/GOOG.csv', # what is the object name, this include the directory/key.csv
                        Filename='fabiola_GOOG.csv') # Filename is what you want to call it once it is downloaded

#### Download object using `download_fileobj`

In [8]:
import io
io_temp = io.BytesIO()
temp = s3_client.download_fileobj(Bucket='techcatalyst-raw', 
                           			Key='stocks/GOOG.csv',
                            		Fileobj=io_temp) # pass th io.BytesIO object
io_temp.seek(0)

0

In [9]:
# show buffer content
print(io_temp.getvalue()[:100])

b'Date,Open,High,Low,Close,Volume\r\n1/2/2025 16:00:00,191.49,193.2,188.71,190.63,17545162\r\n1/3/2025 16:'


In [10]:
next(io_temp)

b'Date,Open,High,Low,Close,Volume\r\n'

In [11]:
# writing the content of the BytesIO object to a file 
with open('google_stock_downloaded.csv', 'wb') as f:
    f.write(io_temp.getvalue())

#### Upload local file using `upload_file`

In [12]:
# uploading a local file using upload_file
s3_client.upload_file(Filename='fabiola_GOOG.csv', # local file name
                      Bucket='techcatalyst-raw', # the bucket target
                      Key='fabiola/fabiola_GOOG.csv') # destination name, make sure it include YOURNAME/ANY_FILE_NAME.csv

#### Upload file using `upload_fileobj`

In [13]:
in_memory_file = io.BytesIO(b"Fabiola says hi :)")
s3_client.upload_fileobj(Fileobj=in_memory_file,
                          Bucket='techcatalyst-raw', 
                          Key='fabiola/in_memory_file.txt') # destination name, make sure it include YOURNAME/ANY_FILE_NAME.txt

In [24]:
# list objects in a specific bucket "techcatalyst-raw" with prefix "fabiola"
objects = s3_client.list_objects_v2(Bucket='techcatalyst-raw', Prefix='fabiola')
objects.get('Contents')

[{'Key': 'fabiola/fabiola_GOOG.csv',
  'LastModified': datetime.datetime(2025, 8, 4, 19, 38, 39, tzinfo=tzlocal()),
  'ETag': '"8cbbdc687ee45f1fe58a522e16d423c2"',
  'ChecksumAlgorithm': ['CRC32'],
  'ChecksumType': 'FULL_OBJECT',
  'Size': 7889,
  'StorageClass': 'STANDARD'},
 {'Key': 'fabiola/in_memory_file.txt',
  'LastModified': datetime.datetime(2025, 8, 4, 19, 38, 39, tzinfo=tzlocal()),
  'ETag': '"2a2de95b53a44c2c704ba5d77eef3829"',
  'ChecksumAlgorithm': ['CRC32'],
  'ChecksumType': 'FULL_OBJECT',
  'Size': 18,
  'StorageClass': 'STANDARD'},
 {'Key': 'fabiola/test_export.parquet',
  'LastModified': datetime.datetime(2025, 7, 25, 15, 44, 14, tzinfo=tzlocal()),
  'ETag': '"3c8cff84500ad4c4c85361e38dce05ed-1"',
  'ChecksumAlgorithm': ['CRC64NVME'],
  'ChecksumType': 'FULL_OBJECT',
  'Size': 8393,
  'StorageClass': 'STANDARD'}]