# Batch Time and Real-time Prediction

In [1]:
# Importing the necessary library
import boto3
import sagemaker
import pandas as pd
from joblib import dump, load
import s3fs

In [2]:
# Initialising new sagemaker session as "sess".
sess = sagemaker.Session()
# Bucket variable is used for storing the location of the bucket
bucket = 'sagemaker-studio-009676737623-l4vs7j0o0ib'
# Assigning the prefix variable
prefix = 'mlops-level1-data'
# Check for necessary permission needed for training and deploying models. 
role = sagemaker.get_execution_role()
# To understand where this session is configured to operate.
region = boto3.Session().region_name

## Test Data

In [3]:
# Creating a string for the test path 
test_data_path = f's3://{bucket}/{prefix}/test_data.csv'

In [4]:
# Importing the dataset
test_data = pd.read_csv(test_data_path)
test_data.shape

(610, 562)

In [5]:
# Treating missing values
test_data.isna().sum()
test_data.dropna(inplace = True)
test_data.shape

(609, 562)

In [6]:
## Get Features
fs = s3fs.S3FileSystem() # Updated method name
filename = f's3://{bucket}/{prefix}/feature/feature.joblib'
with fs.open(filename, encoding='utf8') as fh:
    cols = load(fh)

## Get Encoder object
filename = f's3://{bucket}/{prefix}/feature/encoder.joblib'
with fs.open(filename, encoding='utf8') as fh:
    encoder = load(fh)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [7]:
cols

array(['tGravityAcc-energy()-X', 'tGravityAcc-min()-X',
       'tGravityAcc-max()-X', 'tGravityAcc-mean()-X',
       'angle(X,gravityMean)', 'tGravityAcc-min()-Y',
       'tGravityAcc-mean()-Y', 'tGravityAcc-max()-Y',
       'fBodyAccJerk-entropy()-X', 'angle(Y,gravityMean)',
       'tBodyAccMag-max()', 'tBodyAcc-max()-X', 'Activity'], dtype=object)

In [8]:
processed_test_data = test_data[cols]
test_x = processed_test_data.drop('Activity', axis = 1)
test_y= processed_test_data[['Activity']]
test_x.shape, test_y.shape


((609, 12), (609, 1))

## Save Processed Test to S3

In [9]:
test_x_path = f"s3://{bucket}/{prefix}/prediction/test/test_x.csv"
test_y_path =f"s3://{bucket}/{prefix}/prediction/test/test_y.csv"
test_x.to_csv(test_x_path, index=False, header=False)
test_y.to_csv(test_y_path, index=False, header=False)


# Real time Prediction

In [10]:
client = boto3.client('sagemaker-runtime')

In [11]:
endpoint_name = 'HumanActivity-InferenceEndpoint-2023-09-25-0546'

In [12]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import CSVSerializer, JSONSerializer
from sagemaker.deserializers import CSVDeserializer , JSONDeserializer

predictor_csv = Predictor(endpoint_name=endpoint_name,
                      serializer=CSVSerializer(),
                     deserializer=CSVDeserializer())

In [24]:
prediction = predictor_csv.predict(test_x.sample(10))

### Check Data Capture

In [25]:
data_capture = 's3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/datacapture/HumanActivity-InferenceEndpoint-2023-09-25-0546/AllTraffic/2023/09/25/05/52-24-109-c21b0608-1627-419c-b2b4-cb46d0bc9471.jsonl'

In [26]:
json_df = pd.read_json(data_capture,lines=True)

In [27]:
json_df

Unnamed: 0,captureData,eventMetadata,eventVersion
0,{'endpointInput': {'observedContentType': 'tex...,{'eventId': 'e0dab2a7-c708-4383-add4-e3e91839c...,0


In [28]:
input_data = json_df['captureData'][0]

In [29]:
input_data

{'endpointInput': {'observedContentType': 'text/csv',
  'mode': 'INPUT',
  'data': '0.85347998,0.96082418,0.87444833,0.94588224,-0.76618025,-0.23752323,-0.25529462,-0.26764716,0.61088042,0.25860267,-0.23349994,-0.11948537\n0.91274816,0.98775397,0.89477777,0.96841804,-0.84829438,-0.1297641,-0.15630201,-0.17596367,-1.0,0.19031033,-0.98774589,-0.94002751\n0.5801554,0.85049913,0.76602185,0.83695957,-0.57936742,0.16770112,0.15187734,0.14608124,-1.0,-0.0215668929999999,-0.9703291,-0.935389\n0.80532159,0.89002018,0.89029919,0.9268682,-0.66274148,-0.37340565,-0.3408577,-0.30006795,0.34116322,0.31518482,-0.20362057,-0.16829118\n0.66876946,0.89441143,0.80001788,0.87324201,-0.64322033,0.16910986,0.14758583,0.11814495,-1.0,-0.0183603289999999,-0.99454562,-0.94068373\n0.71970232,0.90456433,0.83266707,0.8936393,-0.63506427,-0.33647365,-0.34974781,-0.3483829,0.68488217,0.32473847,-0.13377804,-0.14169753\n-0.92941913,-0.15352959,-0.26546099,-0.19748218,0.39125589,0.87105746,0.85950028,0.81066466,-0.94