In [1]:
import pandas as pd

import os
from datetime import datetime
import boto3
from botocore import UNSIGNED
from botocore.client import Config

client = boto3.client('s3', aws_access_key_id='', aws_secret_access_key='')
client._request_signer.sign = (lambda *args, **kwargs: None)

def download_dir(prefix, local, bucket, client=client):
    """
    params:
    - prefix: pattern to match in s3
    - local: local path to folder in which to place files
    - bucket: s3 bucket with target contents
    - client: initialized s3 client object
    """
    keys = []
    dirs = []
    next_token = ''
    base_kwargs = {
        'Bucket':bucket,
        'Prefix':prefix,
    }
    while next_token is not None:
        kwargs = base_kwargs.copy()
        if next_token != '':
            kwargs.update({'ContinuationToken': next_token})
        results = client.list_objects_v2(**kwargs)
        contents = results.get('Contents')
        for i in contents:
            k = i.get('Key')
            if k[-1] != '/':
                keys.append(k)
            else:
                dirs.append(k)
        next_token = results.get('NextContinuationToken')
    for d in dirs:
        dest_pathname = os.path.join(local, d)
        if not os.path.exists(os.path.dirname(dest_pathname)):
            os.makedirs(os.path.dirname(dest_pathname))
    for k in keys:
        dest_pathname = os.path.join(local, k)
        if not os.path.exists(os.path.dirname(dest_pathname)):
            os.makedirs(os.path.dirname(dest_pathname))
        client.download_file(bucket, k, dest_pathname)

In [5]:
bucket = "workshop-leaderboard"
s3_prefix = "deep-learning/"
local_dir = "/tmp/leaderboard/"

#download all files in S3
download_dir(s3_prefix, local_dir, bucket)

#build list of files
files = []
for (dirpath, dirnames, filenames) in os.walk(local_dir + s3_prefix):
    files.extend(filenames)

#concatenate all files into one big dataframe
leaderboard = []
for f in files:
    #skip older files
    try:
        date = datetime.strptime(f.split('_')[0], '%Y%m%d').date()
    except:
        print(f'Could not parse date from file {f}')
        date = None
    if not date == datetime.today().date():
        continue
    #read files from today
    leaderboard.append(pd.read_csv(local_dir + s3_prefix + f))
#combine, sort, and show leaderboard
leaderboard = pd.concat(leaderboard)
leaderboard.sort_values(by=['accuracy'], ascending=False).reset_index(drop=True).head(5)

Unnamed: 0,user,date,accuracy,learning_rate,batch_size,epochs,training_time
0,melanie-admin,20220823,67.5,0.001,100,10,39.532104
