# 1. Storing data on the cloud

## 1.1. Initialize S3 Bucket

The S3 bucket will host our test_features data set which we can call in our lambda function to perform a prediction.

In [None]:
# Initizalize S3 Bucket:

import boto3

def create_bucket(region:str, bucket_name:str) -> dict:

    s3 = boto3.client('s3')
    response = s3.create_bucket(
        Bucket = bucket_name,
        # CreateBucketConfiguration = {
        #     'LocationConstraint':region
        # }
        # not needed when your region is default (us-east-1)
    )

    return response

region = 'us-east-1'
bucket_name = 'joaomj-lambda-buckets-2022'

create_bucket(region, bucket_name)

## 1.2. Upload test data to S3 Bucket

In [None]:
from io import BytesIO
import joblib
import boto3
import pandas as pd

def UploadToS3(data, bucket:str, key:str):

    # Temporary store the dataset in a file object
    # This file object will be uploaded onto a S3 bucket by calling the .upload_fileobj function
    with BytesIO() as f:
        joblib.dump(data, f) #
        f.seek(0)
        (
            boto3
            .client('s3')
            .upload_fileobj(Bucket = bucket, Key = key, Fileobj = f)
        )


# loading dataset
df_test = pd.read_csv('./app/test_dataset.csv')

bucket_name = 'joaomj-lambda-buckets-2022'
key = 'validation/df_test.joblib'

UploadToS3(df_test, bucket_name, key)

## 1.3. List objects in the S3 Bucket

Check if the data was correctly stored in the S3 Bucket.

In [None]:
import boto3

def listS3objects(bucket:str) -> list:

    # connect to S3 resource
    s3 = boto3.resource('s3')
    my_bucket = s3.Bucket(bucket)

    # list all object keys in S3 bucket
    # obj_list = [object_summary.key for object_summary in my_bucket.objects.all()]

    for object_summary in my_bucket.objects.all():
        obj_list = [object_summary.key]

    return obj_list

print(listS3objects('joaomj-lambda-buckets-2022'))

# 2. Deploying Lambda Function

# 2.1. Locally

In [1]:
import requests
import json
import numpy as np

bucket_name = 'joaomj-lambda-buckets-2022'
key =  'validation/df_test.joblib'
percentage = 0.4

data = {
    'bucket':bucket_name,
    'key':key,
    'percentage':percentage,
}

headers = {
    'Content-type': "application/json"
}

# Main code for post HTTP request
url = "http://127.0.0.1:3000/predict"
response = requests.request("POST", url, headers=headers, data=json.dumps(data))

# Show confusion matrix and display accuracy
lambda_predictions = np.array(response.json())
lambda_predictions

array([['43790', 'Female', '25', ..., '31559.0', '116', '1.0'],
       ['227205', 'Female', '24', ..., '27473.0', '66', '1.0'],
       ['14595', 'Male', '51', ..., '32497.0', '216', '1.0'],
       ...,
       ['37661', 'Male', '26', ..., '30419.0', '14', '1.0'],
       ['176658', 'Female', '22', ..., '36345.0', '281', '1.0'],
       ['234987', 'Female', '27', ..., '41482.0', '119', '1.0']],
      dtype='<U32')

In [None]:
# viewing the response
import pandas as pd

#resp = pd.read_json(lambda_predictions['body'])

columns = [    
    'id', 
    'gender', 
    'age',
    'region_code',
    'policy_sales_channel',
    'driving_license',
    'vehicle_age',
    'vehicle_damage',
    'previously_insured',
    'annual_premium',
    'vintage',
    'score'
    ]
df = pd.DataFrame(lambda_predictions, columns = columns)

In [7]:
df.head()

Unnamed: 0,id,gender,age,region_code,policy_sales_channel,driving_license,vehicle_age,vehicle_damage,previously_insured,annual_premium,vintage,score
0,43790,Female,25,50,152,1,New,0,0,31559.0,116,1.0
1,227205,Female,24,18,152,1,New,0,1,27473.0,66,1.0
2,14595,Male,51,47,113,1,Average,0,1,32497.0,216,1.0
3,35188,Female,31,18,152,1,New,0,1,24021.0,205,1.0
4,11138,Male,27,46,152,1,New,0,1,27043.0,163,1.0
