# Install dependencies

In [None]:
%pip install -U -r requirements.txt

# Import SageMaker Defaults Configurations

The Amazon SageMaker Python SDK supports setting of default values for AWS infrastructure primitive types, such as instance types, Amazon S3 folder locations, and IAM roles. You can override the default locations of these files by setting the `SAGEMAKER_USER_CONFIG_OVERRIDE` environment variables for the user-defined configuration file paths.

In [None]:
import os

# Use the current working directory as the location for SageMaker Python SDK config file
os.environ["SAGEMAKER_USER_CONFIG_OVERRIDE"] = os.getcwd()

# Download dataset

Download the dataset from the UCI website.

In [None]:
import urllib
import os

input_data_dir = 'data/'
if not os.path.exists(input_data_dir):
    os.makedirs(input_data_dir)
input_data_path = os.path.join(input_data_dir, 'predictive_maintenance_raw_data_header.csv')
dataset_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00601/ai4i2020.csv"
urllib.request.urlretrieve(dataset_url, input_data_path)

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv(input_data_path)

print('The shape of the dataset is:', df.shape)

# Test case 1: Register features in feature store

## Expected result: Features should be registered

In [None]:
import sagemaker
import boto3
import sys
import pandas as pd
import numpy as np
import io
from sagemaker.session import Session
from sagemaker import get_execution_role

prefix = "sagemaker-featurestore-introduction"
role = get_execution_role()

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
s3_bucket_name = sagemaker_session.default_bucket()

In [None]:
df_copy = df

In [None]:
columns = ['Type', 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Machine failure']

new_columns = [col.replace('[', '').replace(']', '').replace(' ', '_') for col in df.columns]
df_copy.columns = new_columns

df_copy.head()

## Test case 1a: Create a Feature Group

### Expected result: A new feature group should be created

In [None]:
from time import gmtime, strftime, sleep

product_id_feature_group_name = "product-id-feature-group-" + strftime("%d-%H-%M-%S", gmtime())

In [None]:
from sagemaker.feature_store.feature_group import FeatureGroup

product_id_feature_group = FeatureGroup(
    name=product_id_feature_group_name, sagemaker_session=sagemaker_session
)

## Test case 2b: Create a Feature Group definition

### Expected result: A new feature group definition from the dataset should be created

In [None]:
import time

current_time_sec = int(round(time.time()))

record_identifier_feature_name = "Product_ID"

Append `EventTime` feature to your data frame. This parameter is required, and time stamps each data point.

In [None]:
df_copy["EventTime"] = pd.Series([current_time_sec] * len(df_copy), dtype="float64")

df_copy.head()

In [None]:
product_id_feature_group.load_feature_definitions(data_frame=df_copy)

Below we call create to create the feature group

In [None]:
product_id_feature_group.create(
    s3_uri=f"s3://{s3_bucket_name}/{prefix}",
    record_identifier_name=record_identifier_feature_name,
    event_time_feature_name="EventTime",
    role_arn=role,
    enable_online_store=True,
)