# Access to collected fureatures

## 1. Setup

In [None]:
%pip install boto3 sagemaker pandas

In [4]:
# If you are in local deeloment mode, set AWS Profile to access service.
# Or you are in SageMaker Unified Studio or in SageMaker Notebook Instance, it runs on sagemaker default role.
LOCAL_MODE = False
if LOCAL_MODE:
    import os
    os.environ['AWS_PROFILE'] = 'msf'

In [None]:
import os
import logging
import boto3
import sagemaker
from sagemaker.feature_store.feature_group import FeatureGroup

logger = logging.getLogger('__name__')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

In [None]:
sagemaker_session = sagemaker.Session()
default_bucket = sagemaker_session.default_bucket()
logger.info(f'Default S3 bucket = {default_bucket}')

## 2. Get feature record from the Online feature store (SageMaker FeatureStore)

In [9]:
sagemaker_client = sagemaker_session.boto_session.client("sagemaker")
featurestore_runtime_client = sagemaker_session.boto_session.client('sagemaker-featurestore-runtime')

### List of feature groups

In [25]:
feature_group_prefix = "proto-coupon"

In [38]:
list_fg = sagemaker_client.search(
    Resource="FeatureGroup",
    SearchExpression={
        'Filters': [
            {
                'Name': 'FeatureGroupName',
                'Operator': 'Contains',
                'Value': feature_group_prefix
            },
        ]
    }
)

for fg in list_fg['Results']: 
    print(f"{fg['FeatureGroup']['FeatureGroupName']} [{fg['FeatureGroup']['RecordIdentifierFeatureName']}]")

proto-coupon-location-daily-count [location_with_date]
proto-coupon-prefix-count [loc_coupon_prefix]
proto-coupon-location-invalid-count [location_code]


### Get an online feature value

In [31]:
# Query parameters
feature_group_name = 'proto-coupon-prefix-count'
record_identifier = 'loc0000#0000-0000-0000-'

In [44]:
# Query
feature_record = featurestore_runtime_client.get_record(
    FeatureGroupName=feature_group_name, 
    RecordIdentifierValueAsString=record_identifier
)

for rec in feature_record['Record']:
    print(f"{rec['FeatureName']} :: {rec['ValueAsString']}")

loc_coupon_prefix :: loc0000#0000-0000-0000-
msg_count :: 1
event_time :: 2025-02-04T21:30:59.587Z


## 3. Query feature records from Offline feature store (Athena)

### Athena Setup

In [42]:
# Query result location
output_location = f's3://{default_bucket}/proto-featurestore/query-results/'
print(f'Athena query output location: \n{output_location}')

Athena query output location: 
s3://sagemaker-us-east-1-528757807778/proto-featurestore/query-results/


In [51]:
# Find Athena table name
coupon_fg_name = 'proto-coupon-prefix-count'

coupon_fg = FeatureGroup(name=coupon_fg_name, sagemaker_session=sagemaker_session)
fg_athena = coupon_fg.athena_query()
fg_table = fg_athena.table_name

print(f'Athena table name: \n{fg_table}')

Athena table name: 
proto_coupon_prefix_count_1737693307


### Get offline feature values

In [52]:
query_string = f'''
    SELECT * 
    FROM "{fg_table}" 
    ORDER BY event_time
'''.replace('\n    ', ' ').strip()

print(query_string)

SELECT *  FROM "proto_coupon_prefix_count_1737693307"  ORDER BY event_time


In [None]:
fg_athena.run(query_string=query_string, output_location=output_location)
fg_athena.wait()
result_df = fg_athena.as_dataframe()

In [58]:
print(f"Num of Results : {len(result_df)}")
result_df.head(10)

Num of Results : 7


Unnamed: 0,write_time,api_invocation_time,is_deleted,loc_coupon_prefix,msg_count,event_time
0,2025-02-04 13:14:32.250000 UTC,2025-02-04 13:09:36.000000 UTC,False,loc0009#9054-9710-7280-,4,2025-02-04T18:41:28.025Z
1,2025-02-04 13:14:32.250000 UTC,2025-02-04 13:09:35.000000 UTC,False,loc0009#9054-9710-7280-,4,2025-02-04T18:41:28.025Z
2,2025-02-04 13:14:28.077000 UTC,2025-02-04 13:09:36.000000 UTC,False,loc0000#0000-0000-0000-,3,2025-02-04T20:29:52.395Z
3,2025-02-04 13:14:28.077000 UTC,2025-02-04 13:09:36.000000 UTC,False,loc0000#0000-0000-0000-,1,2025-02-04T21:18:11.021Z
4,2025-02-04 13:14:28.077000 UTC,2025-02-04 13:09:36.000000 UTC,False,loc0000#0000-0000-0000-,1,2025-02-04T21:28:22.157Z
5,2025-02-04 13:14:28.077000 UTC,2025-02-04 13:09:36.000000 UTC,False,loc0000#0000-0000-0000-,2,2025-02-04T21:30:59.587Z
6,2025-02-04 13:14:28.077000 UTC,2025-02-04 13:09:36.000000 UTC,False,loc0000#0000-0000-0000-,1,2025-02-04T21:30:59.587Z
