In [122]:
import pandas as pd
import s3fs
from sagemaker.estimator import Estimator
from sagemaker import image_uris
import sagemaker
import boto3

fs = s3fs.S3FileSystem(anon=False)
s3_bucket = 'greenhouseprocessed'

# List files in the directory
files = fs.ls(f'{s3_bucket}/')


# Read each file into a DataFrame and concatenate them
df_list = [pd.read_parquet(f's3://{file}', storage_options={'anon': False}) for file in files]#
full_df = pd.concat(df_list, ignore_index=True)



In [123]:
# remove heat_index (calculated field)
full_df = full_df.drop(columns = ['heat_index'])

In [124]:
# remove datetime prior to start date (2023-11-26 03:02:45)

full_df = full_df[full_df['datetime']>= '2023-11-26 03:02:45']

In [125]:
full_df.set_index('datetime', inplace=True)

In [126]:
# Aggregate Data down to the Minutes, as DeepAR can only predict down to 1 Min. We will take the mean of all the values over 1 min
full_df = full_df.resample('1min').mean()
full_df = full_df.sort_values('datetime')


In [127]:
full_df['start'] = full_df.index

In [128]:
full_df['start'] = full_df['start'].dt.strftime('%Y-%m-%dT%H:%M:%S')

In [129]:
# Time series array creation. 1 Hour blocks
# Define the window size (60 minutes for 1 hour)
window_size = 60
sliding_windows = []

for start in range(len(full_df) - window_size + 1):
    end = start + window_size
    window = full_df[start:end]
    sliding_windows.append({
        "start": str(window['start'].iloc[0]),
        "target": window['temperature'].tolist(),
        "dynamic_feat": [window['humidity'].tolist()]
    })


In [130]:
# Calculate the index for the split
train_size = int(len(sliding_windows) * 0.8)

# Split the DataFrame
train_df = sliding_windows[:train_size]
test_df = sliding_windows[train_size:]

In [131]:
import json

# to convert each time series to a single line JSON string
def convert_to_line_delimited_json(list_of_dicts):
    return '\n'.join(json.dumps(obj) for obj in list_of_dicts)

# Convert to line-delimited JSON
train_json = convert_to_line_delimited_json(train_df)
test_json = convert_to_line_delimited_json(test_df)

# Save to a file
with open('train.json', 'w') as file:
    file.write(train_json)
with open('test.json', 'w') as file:
    file.write(test_json)

In [132]:
# Upload to S3
sagemaker.Session().upload_data(path='train.json', bucket='greenhousetraintest', key_prefix='train')
sagemaker.Session().upload_data(path='test.json', bucket='greenhousetraintest', key_prefix='test')

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


's3://greenhousetraintest/test/test.json'

In [133]:
 #Set your region (e.g., 'us-east-2')
region = sagemaker.Session().boto_region_name

# Get the DeepAR image URI
image_uri = image_uris.retrieve(framework='forecasting-deepar', region=region)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


INFO:sagemaker.image_uris:Same images used for training and inference. Defaulting to image scope: inference.
INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: 1.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.


In [134]:
# Define Parmas
bucket = 'greenhousemodels'
train_path = 's3://greenhousetraintest/train/train.json'
test_path = 's3://greenhousetraintest/test/test.json'

In [135]:
# Define the estimator
estimator = Estimator(
    image_uri=image_uri,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.large',
    output_path='s3://{}/output'.format(bucket),
    sagemaker_session=sagemaker.Session()
)

# Set required hyperparameters for DeepAR
estimator.set_hyperparameters(
    time_freq='1min',         
    epochs=20, 
    num_dynamic_feat = 1,
    context_length=30,         # 30 minutes since our total time series has 60 values 
    prediction_length=30       # Predicting 30 minutes into the future
)

# Fit the model
estimator.fit({'train': train_path, 'test': test_path})

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


INFO:sagemaker:Creating training-job with name: forecasting-deepar-2023-11-28-01-46-12-324


2023-11-28 01:46:12 Starting - Starting the training job...
2023-11-28 01:46:28 Starting - Preparing the instances for training......
2023-11-28 01:47:28 Downloading - Downloading input data...
2023-11-28 01:48:02 Training - Downloading the training image.....................
2023-11-28 01:51:29 Training - Training image download completed. Training in progress.[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34mRunning custom environment configuration script[0m
  if num_device is 1 and 'dist' not in kvstore:[0m
[34m[11/28/2023 01:51:36 INFO 139781948995392] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'_kvstore': 'auto', '_num_gpus': 'auto', '_num_kv_servers': 'auto', '_tuning_objective_metric': '', 'cardinality': 'auto', 'dropout_rate': '0.10', 'early_stopping_patience': '', 'embedding_dimension': '10', 'learning_rate': '0.001', 'likelihood': 

In [140]:
sagemaker_client = boto3.client('sagemaker')
response = sagemaker_client.list_training_jobs(
    MaxResults=10,
    SortBy='CreationTime',
    SortOrder='Descending'
)

# Iterate through the jobs to find your DeepAR training job
for job in response['TrainingJobSummaries']:
    job_name = job['TrainingJobName']
    job_details = sagemaker_client.describe_training_job(TrainingJobName=job_name)
    if 'deepar' in job_details['AlgorithmSpecification']['TrainingImage']:
        print(f"Job Name: {job_name}")
        print(f"Image URI: {job_details['AlgorithmSpecification']['TrainingImage']}")
        break


Job Name: forecasting-deepar-2023-11-28-01-46-12-324
Image URI: 566113047672.dkr.ecr.us-east-2.amazonaws.com/forecasting-deepar:1


In [5]:
import boto3
sagemaker_client = boto3.client('sagemaker')
model_name = 'TimeSeries'
model_arn = 'arn:aws:sagemaker:us-east-2:516411340133:model-package/TimeSeries/1'

response = sagemaker_client.create_model(
    ModelName=model_name,
    PrimaryContainer={
        'ModelPackageName': model_arn,
    },
    ExecutionRoleArn='arn:aws:iam::516411340133:role/service-role/AmazonSageMaker-ExecutionRole-20231125T182973'
)


In [8]:
endpoint_config_name = 'timeseries-endpoint-config'
sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            'VariantName': 'AllTraffic',
            'ModelName': model_name,
            'InstanceType': 'ml.t2.medium',
            'InitialInstanceCount': 1
        }
    ]
)

{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-2:516411340133:endpoint-config/timeseries-endpoint-config',
 'ResponseMetadata': {'RequestId': 'd07aeb0c-911b-459a-9380-d5c59d7a8b33',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'd07aeb0c-911b-459a-9380-d5c59d7a8b33',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '107',
   'date': 'Tue, 28 Nov 2023 02:47:17 GMT'},
  'RetryAttempts': 0}}

In [10]:
endpoint_name = 'timeseries-endpoint'

sagemaker_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name
)

{'EndpointArn': 'arn:aws:sagemaker:us-east-2:516411340133:endpoint/timeseries-endpoint',
 'ResponseMetadata': {'RequestId': '0d904f3a-b7ff-4127-a382-d2c28f1a5271',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0d904f3a-b7ff-4127-a382-d2c28f1a5271',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '87',
   'date': 'Tue, 28 Nov 2023 02:47:24 GMT'},
  'RetryAttempts': 0}}

In [14]:
import boto3

# Initialize the Lambda client
lambda_client = boto3.client('lambda')

# Function name
function_name = 'greenhouse-inference'

# Invoke the Lambda function
response = lambda_client.invoke(
    FunctionName=function_name,
    InvocationType='RequestResponse'  # 'Event' for async
)

# Print the response
print(response)

ClientError: An error occurred (AccessDeniedException) when calling the Invoke operation: User: arn:aws:sts::516411340133:assumed-role/AmazonSageMaker-ExecutionRole-20231125T182973/SageMaker is not authorized to perform: lambda:InvokeFunction on resource: arn:aws:lambda:us-east-2:516411340133:function:greenhouse-inference because no identity-based policy allows the lambda:InvokeFunction action