# boto3 S3 operations

In [2]:
import boto3

## List buckets

In [34]:
def list_buckets():
    s3 = boto3.resource('s3')
    for bucket in s3.buckets.all():
        print(bucket.name)

list_buckets()

classgpt
hackathonfiles123
kafka-stock-market-project-ben


## list folders in bucket

In [26]:
def list_folders():
    bucket = s3.Bucket("classgpt")
    folders = set()
    for obj in bucket.objects.all():
        # split the key by / and take the first element
        folders.add(obj.key.split("/")[0])

    return folders

list_folders()

{'COMS472', 'CPRE419', 'STAT474'}

## List files

In [33]:
from collections import defaultdict
from pprint import pprint

def list_files():
    bucket = s3.Bucket("classgpt")

    classes = defaultdict(list)
    
    # loop through only the parent directory
    for obj in bucket.objects.filter():
        cname, fname = obj.key.split("/")
        if not fname.endswith(".json"):
            classes[cname].append(fname)

    return classes

pprint(list_files())

defaultdict(<class 'list'>,
            {'COMS472': ['lecture01-intro-2up.pdf',
                         'lecture02-agents-2up.pdf',
                         'lecture03-1-search-2up.pdf',
                         'lecture03-2-informedSearch-2up.pdf',
                         'lecture04-localSearch-2up.pdf',
                         'lecture05-CSP-2up.pdf',
                         'lecture06-game-2up.pdf'],
             'CPRE419': [''],
             'STAT474': ['']})


## Check if folder exist

In [48]:
def folder_exists(folder_name):
    s3 = boto3.resource("s3")
    bucket = s3.Bucket("classgpt")
    for _ in bucket.objects.filter(Prefix=f"{folder_name}/"):
        return True
    return False

folder_exists("COMS472")

True

In [12]:
folder_exists("test")

False

## Check if file exist

In [18]:
import botocore

def file_exists(folder_name, file_name):
    s3 = boto3.resource('s3')

    try:
        s3.Object("classgpt", f"{folder_name}/{file_name}").load()
        return True
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            return False
        else:
            raise


folder_name = "COMS472"
file_name = "lecture01-intro-2up.pdf"
file_exists(folder_name, file_name)

True

In [19]:
folder_name = "COMS472"
file_name = "random.pdf"
file_exists(folder_name, file_name)

False

## Create folder

In [35]:
def create_folder(folder_name):
    s3 = boto3.resource("s3")
    bucket = s3.Bucket("classgpt")

    if not folder_exists(bucket, folder_name):
        bucket.put_object(Key=f"{folder_name}/")
    

create_folder("test")
list_folders()

{'COMS472', 'CPRE419', 'STAT474', 'test'}

## Adding files

In [46]:
def upload_files(file_obj, file_path):
    s3 = boto3.resource("s3")
    bucket = s3.Bucket("classgpt")

    bucket.upload_fileobj(file_obj, file_path)

with open("pdfs/lecture01-intro-2up.pdf", "rb") as f:
    upload_files(f, "COMS472/test.pdf")

list_files()

defaultdict(list,
            {'COMS472': ['lecture01-intro-2up.pdf',
              'lecture02-agents-2up.pdf',
              'lecture03-1-search-2up.pdf',
              'lecture03-2-informedSearch-2up.pdf',
              'lecture04-localSearch-2up.pdf',
              'lecture05-CSP-2up.pdf',
              'lecture06-game-2up.pdf',
              'test.pdf'],
             'CPRE419': [''],
             'STAT474': ['']})

## remove folder

In [36]:
def remove_folder(folder_name):
    s3 = boto3.resource("s3")
    bucket = s3.Bucket("classgpt")

    if folder_exists(bucket, folder_name):
        for key in bucket.objects.filter(Prefix=f"{folder_name}/"):
            key.delete()

remove_folder("test")
list_folders()

{'COMS472', 'CPRE419', 'STAT474'}

## remove files

In [47]:
def remove_file(folder_name, file_name):
    s3 = boto3.resource("s3")
    bucket = s3.Bucket("classgpt")

    if folder_exists(bucket, folder_name):
        bucket.objects.filter(Prefix=f"{folder_name}/{file_name}").delete(
            Delete={"Objects": [{"Key": f"{folder_name}/{file_name}"}]}
        )

remove_file("COMS472", "test.pdf")
list_files()

defaultdict(list,
            {'COMS472': ['lecture01-intro-2up.pdf',
              'lecture02-agents-2up.pdf',
              'lecture03-1-search-2up.pdf',
              'lecture03-2-informedSearch-2up.pdf',
              'lecture04-localSearch-2up.pdf',
              'lecture05-CSP-2up.pdf',
              'lecture06-game-2up.pdf'],
             'CPRE419': [''],
             'STAT474': ['']})

## Create a class

In [62]:
# create a dataclass titled s3 that implements the methods above

class S3:
    def __init__(self, bucket_name):
        self.bucket_name = bucket_name
        self.s3 = boto3.resource("s3")
        self.bucket = self.s3.Bucket(bucket_name)

    def list_folders(self):
        folders = set()
        for obj in self.bucket.objects.filter():
            folders.add(obj.key.split("/")[0])

        return folders

    def list_files(self):
        classes = defaultdict(list)

        # loop through only the parent directory
        for obj in self.bucket.objects.filter():
            cname, fname = obj.key.split("/")
            if not fname.endswith(".json"):
                classes[cname].append(fname)

        return classes

    def folder_exists(self, folder_name):
        for _ in self.bucket.objects.filter(Prefix=f"{folder_name}/"):
            return True
        return False

    def file_exists(self, folder_name, file_name):
        try:
            self.s3.Object(self.bucket_name, f"{folder_name}/{file_name}").load()
            return True
        except botocore.exceptions.ClientError as e:
            if e.response['Error']['Code'] == "404":
                return False
            else:
                raise

    def create_folder(self, folder_name):
        if not self.folder_exists(folder_name):
            self.bucket.put_object(Key=f"{folder_name}/")

    def upload_files(self, file_obj, file_path):
        self.bucket.upload_fileobj(file_obj, file_path)

    def remove_folder(self, folder_name):
        if self.folder_exists(folder_name):
            for key in self.bucket.objects.filter(Prefix=f"{folder_name}/"):
                key.delete()

    def remove_file(self, folder_name, file_name):
        if self.folder_exists(folder_name):
            self.bucket.objects.filter(Prefix=f"{folder_name}/{file_name}").delete(
                Delete={"Objects": [{"Key": f"{folder_name}/{file_name}"}]}
            )

In [64]:
s3 = S3("classgpt")

# test all methods
s3.list_folders()

{'COMS472', 'CPRE419', 'STAT474'}

In [56]:
s3.list_files()

defaultdict(list,
            {'COMS472': ['lecture01-intro-2up.pdf',
              'lecture02-agents-2up.pdf',
              'lecture03-1-search-2up.pdf',
              'lecture03-2-informedSearch-2up.pdf',
              'lecture04-localSearch-2up.pdf',
              'lecture05-CSP-2up.pdf',
              'lecture06-game-2up.pdf'],
             'CPRE419': [''],
             'STAT474': ['']})

In [57]:
s3.folder_exists("COMS472")

True

In [58]:
s3.file_exists("COMS472", "lecture01-intro-2up.pdf")

True

In [59]:
s3.create_folder("test")
s3.list_folders()

{'COMS472', 'CPRE419', 'STAT474', 'test'}

In [60]:
s3.remove_folder("test")
s3.list_folders()

{'COMS472', 'CPRE419', 'STAT474'}

In [65]:
with open("pdfs/lecture01-intro-2up.pdf", "rb") as f:
    s3.upload_files(f, "COMS472/test.pdf")
s3.list_files()

defaultdict(list,
            {'COMS472': ['lecture01-intro-2up.pdf',
              'lecture02-agents-2up.pdf',
              'lecture03-1-search-2up.pdf',
              'lecture03-2-informedSearch-2up.pdf',
              'lecture04-localSearch-2up.pdf',
              'lecture05-CSP-2up.pdf',
              'lecture06-game-2up.pdf',
              'test.pdf'],
             'CPRE419': [''],
             'STAT474': ['']})

In [66]:
s3.remove_file("COMS472", "test.pdf")
s3.list_files()

defaultdict(list,
            {'COMS472': ['lecture01-intro-2up.pdf',
              'lecture02-agents-2up.pdf',
              'lecture03-1-search-2up.pdf',
              'lecture03-2-informedSearch-2up.pdf',
              'lecture04-localSearch-2up.pdf',
              'lecture05-CSP-2up.pdf',
              'lecture06-game-2up.pdf'],
             'CPRE419': [''],
             'STAT474': ['']})