In [2]:
%%capture
%pip install awswrangler

In [3]:
import boto3
import pandas as pd
import awswrangler as wr
from datetime import timedelta, timezone, datetime

models = ['schedule', 'retry1']
timetoday = datetime.now(timezone.utc).strftime('%Y-%m-%d')
bucket_name = 'sagemaker-us-east-1-123456'

def load_pred(model):
    prefix = 'forecast_output/' + model 
    path = f's3://{bucket_name}/{prefix}'   
    suffix = 'part0.csv'    
    df = wr.s3.read_csv(path=path, path_suffix=suffix, last_modified_begin=datetime.now(timezone.utc)-timedelta(hours=24))
    return df
    
def transform(df):
    df = df.drop(['item_id', 'p10', 'p90'], axis=1)
    df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%dT%H:%M:%SZ")
    df['date'] = df['date']+timedelta(hours=8)
    return df
    
def put_time(model, dayofweek, best_time, dynamodb=None):
    if not dynamodb:
        dynamodb = boto3.resource('dynamodb')

    table = dynamodb.Table('BestBillTime')
    response = table.put_item(
       Item={
            'mt_category': model,
            'day_of_week': dayofweek,
            'best_time': str(best_time)
        }
    )
    return response

for model in models:
    df = load_pred(model)
    df = transform(df)
    dayofweek=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    df['dayofweek'] = df.date.dt.dayofweek
    best_idxs = df.groupby('dayofweek')['p50'].idxmax()
    print(df.iloc[best_idxs]) 

                   date       p50  dayofweek
89  2020-10-26 08:00:00  0.249328          0
113 2020-10-27 08:00:00  0.228981          1
137 2020-10-28 08:00:00  0.197706          2
161 2020-10-29 08:00:00  0.165558          3
17  2020-10-23 08:00:00  0.135449          4
41  2020-10-24 08:00:00  0.182038          5
75  2020-10-25 18:00:00  0.122888          6
                  date       p50  dayofweek
24 2020-10-22 19:00:00  0.017628          3
22 2020-10-23 17:00:00  0.018364          4
