# 1. Storing data on the cloud

## 1.1. Initialize S3 Bucket

The S3 bucket will host our test_features data set which we can call in our lambda function to perform a prediction.

In [None]:
# Initizalize S3 Bucket:

import boto3

def create_bucket(region:str, bucket_name:str) -> dict:

    s3 = boto3.client('s3')
    response = s3.create_bucket(
        Bucket = bucket_name,
        # CreateBucketConfiguration = {
        #     'LocationConstraint':region
        # }
        # not needed when your region is default (us-east-1)
    )

    return response

region = 'us-east-1'
bucket_name = 'joaomj-lambda-buckets-2022'

create_bucket(region, bucket_name)

## 1.2. Upload test data to S3 Bucket

In [None]:
from io import BytesIO
import joblib
import boto3
import pandas as pd

def UploadToS3(data, bucket:str, key:str):

    # Temporary store the dataset in a file object
    # This file object will be uploaded onto a S3 bucket by calling the .upload_fileobj function
    with BytesIO() as f:
        joblib.dump(data, f) #
        f.seek(0)
        (
            boto3
            .client('s3')
            .upload_fileobj(Bucket = bucket, Key = key, Fileobj = f)
        )


# loading dataset
df_test = pd.read_csv('./app/test_dataset.csv')

bucket_name = 'joaomj-lambda-buckets-2022'
key = 'validation/df_test.joblib'

UploadToS3(df_test, bucket_name, key)

## 1.3. List objects in the S3 Bucket

Check if the data was correctly stored in the S3 Bucket.

In [None]:
import boto3

def listS3objects(bucket:str) -> list:

    # connect to S3 resource
    s3 = boto3.resource('s3')
    my_bucket = s3.Bucket(bucket)

    # list all object keys in S3 bucket
    # obj_list = [object_summary.key for object_summary in my_bucket.objects.all()]

    for object_summary in my_bucket.objects.all():
        obj_list = [object_summary.key]

    return obj_list

print(listS3objects('joaomj-lambda-buckets-2022'))