# Batch Time and Real-time Prediction

In [4]:
# Importing the necessary library
import boto3
import sagemaker
import pandas as pd
from joblib import dump, load
import s3fs

In [5]:
# Initialising new sagemaker session as "sess".
sess = sagemaker.Session()
# Bucket variable is used for storing the location of the bucket
bucket = 'sagemaker-studio-009676737623-l4vs7j0o0ib'
# Assigning the prefix variable 
prefix = 'mlops-level1-data' 
# Check for necessary permission needed for training and deploying models. 
role = sagemaker.get_execution_role()
# To understand where this session is configured to operate.
region = boto3.Session().region_name

## Test Data

In [6]:
# Creating a string for the test path 
test_data_path = f's3://{bucket}/{prefix}/test_data.csv'

In [7]:
# Importing the dataset
test_data = pd.read_csv(test_data_path)
test_data.shape

(610, 562)

In [8]:
test_data.isna().sum()

tBodyAcc-mean()-X                       0
tBodyAcc-mean()-Y                       0
tBodyAcc-mean()-Z                       0
tBodyAcc-std()-X                        0
tBodyAcc-std()-Y                        0
                                       ..
angle(tBodyGyroJerkMean,gravityMean)    1
angle(X,gravityMean)                    1
angle(Y,gravityMean)                    1
angle(Z,gravityMean)                    1
Activity                                1
Length: 562, dtype: int64

In [9]:
test_data.dropna(inplace = True)

In [10]:
test_data.shape

(609, 562)

In [11]:
#test_data.fillna(0,inplace = True)

In [12]:
## Get Features
fs = s3fs.S3FileSystem() # Updated method name
filename = f's3://{bucket}/{prefix}/feature/feature.joblib'
with fs.open(filename, encoding='utf8') as fh:
    cols = load(fh)

## Get Encoder object
filename = f's3://{bucket}/{prefix}/feature/encoder.joblib'
with fs.open(filename, encoding='utf8') as fh:
    encoder = load(fh)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [13]:
cols

array(['tGravityAcc-min()-X', 'tGravityAcc-mean()-Y',
       'tGravityAcc-energy()-X', 'angle(X,gravityMean)',
       'tGravityAcc-min()-Y', 'tGravityAcc-max()-X',
       'tGravityAcc-mean()-X', 'tGravityAcc-max()-Y',
       'fBodyAccJerk-entropy()-Z', 'tBodyAcc-max()-X',
       'angle(Y,gravityMean)', 'fBodyGyro-entropy()-Z', 'Activity'],
      dtype=object)

In [14]:
processed_test_data = test_data[cols]

In [15]:
test_x = processed_test_data.drop('Activity', axis = 1)
test_y= processed_test_data[['Activity']]

In [16]:
test_x.shape, test_y.shape


((609, 12), (609, 1))

## Save Processed Test to S3

In [17]:
test_x_path = f"s3://{bucket}/{prefix}/prediction/test/test_x.csv"
test_y_path =f"s3://{bucket}/{prefix}/prediction/test/test_y.csv"
test_x.to_csv(test_x_path, index=False, header=False)
test_y.to_csv(test_y_path, index=False, header=False)

## Model Prediction

In [18]:
final_model = 'rf-scikit-2023-09-15-14-24-17-765'#'rf-scikit-2023-09-12-12-25-23-946' ## Update Inference Endpoint


In [19]:
from sagemaker.transformer import Transformer
transformer_output_path = f"s3://{bucket}/{prefix}/transformer-output"

sklearn_transformer = Transformer(
    model_name=final_model,
    instance_count=1,
    instance_type='ml.m5.large', 
    output_path=transformer_output_path
)

sklearn_transformer.transform(
    data=test_x_path,
    data_type='S3Prefix',
    content_type='text/csv'
)


INFO:sagemaker:Creating transform job with name: sagemaker-scikit-learn-2023-09-15-14-43-41-450


..........................[34m2023-09-15 14:48:02,752 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2023-09-15 14:48:02,757 INFO - sagemaker-containers - No GPUs detected (normal if no gpus installed)[0m
[34m2023-09-15 14:48:02,758 INFO - sagemaker-containers - nginx config: [0m
[34mworker_processes auto;[0m
[34mdaemon off;[0m
[34mpid /tmp/nginx.pid;[0m
[34merror_log  /dev/stderr;[0m
[34mworker_rlimit_nofile 4096;[0m
[34mevents {
  worker_connections 2048;[0m
[34m}[0m
[34mhttp {
  include /etc/nginx/mime.types;
  default_type application/octet-stream;
  access_log /dev/stdout combined;
  upstream gunicorn {
    server unix:/tmp/gunicorn.sock;
  }
  server {
    listen 8080 deferred;
    client_max_body_size 0;
    keepalive_timeout 3;
    location ~ ^/(ping|invocations|execution-parameters) {
      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
      proxy_set_header Host $http_host;
      proxy_redirect off;
    

In [20]:
# cell 19
!aws s3 cp $transformer_output_path/test_x.csv.out ./tmp/predictions.csv


download: s3://sagemaker-studio-009676737623-l4vs7j0o0ib/mlops-level1-data/transformer-output/test_x.csv.out to tmp/predictions.csv


In [21]:
with open('./tmp/predictions.csv', 'r') as r:
    a = r.read()
prediction = [int(x) for x in a[1:-1].split(',')]
prediction_df = pd.DataFrame({'prediction':prediction})


In [22]:
prediction_df['label']= encoder.inverse_transform(prediction_df)

  y = column_or_1d(y, warn=True)


In [23]:
prediction_df['label'].value_counts()

label
WALKING               129
STANDING              127
LAYING                111
SITTING                87
WALKING_DOWNSTAIRS     78
WALKING_UPSTAIRS       77
Name: count, dtype: int64

### Prediction Evaluation

In [24]:
# cell 21
pd.crosstab(index=test_y['Activity'].values, columns=prediction_df['label'].values, rownames=['actuals'], colnames=['predictions'])


predictions,LAYING,SITTING,STANDING,WALKING,WALKING_DOWNSTAIRS,WALKING_UPSTAIRS
actuals,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LAYING,111,0,0,0,0,0
SITTING,0,85,14,0,0,0
STANDING,0,2,112,0,0,0
WALKING,0,0,0,114,9,3
WALKING_DOWNSTAIRS,0,0,0,1,69,2
WALKING_UPSTAIRS,0,0,1,14,0,72
