# MODEL Deployment, Inference on a Dataset

In [1]:
import boto3
import sagemaker
from sagemaker import get_execution_role

role = get_execution_role()
print(role)

region = boto3.Session().region_name

s3_client = boto3.client("s3")
sm_client = boto3.client("sagemaker")

sess = sagemaker.Session()

# project bucket
bucket_name = "aai-540-data"

# image source and lst files
images_prefix = "cct_resized"

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
arn:aws:iam::324183265896:role/service-role/AmazonSageMaker-ExecutionRole-20250604T045982


In [2]:
# Load trained model on Sagemaker container for transform batch processing 
from sagemaker.model import Model
from sagemaker import image_uris

# specifiy the trained model to evaluate
model_name = "sg-ic-transfer-learning"
model_prefix = "output/image-classification-2025-06-19-15-45-03-748"
model_sri = f"s3://{bucket_name}/{model_name}/{model_prefix}/output/model.tar.gz"



training_image = image_uris.retrieve(
    framework = "image-classification", region = sess.boto_region_name, version="latest"
)

model = Model(
    image_uri=training_image,  # e.g., the URI used for training, check how to apply to models not trained via Sagemaker
    model_data=model_sri,
    role=role
)

# create model endpoint and deploy
#endpoint_name = f"{model_name}-{datetime.utcnow():%Y-%m-%d-%H%M}"
#print("EndpointName =", endpoint_name)

model.deploy(
    initial_instance_count=1,
    instance_type='ml.g4dn.xlarge'  # or another suitable instance type
)




----------!

In [6]:
import json
# load index to category label 
with open('./label_to_enc.json', 'r') as file:  # Replace with your file name if different
    label_to_enc = json.load(file)

# invert: idx to label
enc_to_label = {v: k for k, v in label_to_enc.items()}


In [3]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import IdentitySerializer
from sagemaker.deserializers import JSONDeserializer

# Replace with your actual endpoint name 
#endpoint_name = 'image-classification-2025-06-19-12-33-26-561' # update code later, harcoded for now
endpoint_name = 'image-classification-2025-06-19-16-11-58-471'


In [4]:

# Create a Predictor object for the existing endpoint
# Set up the predictor with the appropriate serializer for raw image bytes
predictor = Predictor(
    endpoint_name=endpoint_name,
    serializer=IdentitySerializer(content_type='application/x-image'),  # For .jpg/.png images
    deserializer=JSONDeserializer()  # Assumes the model returns JSON
)



In [5]:
import numpy as np
import pandas as pd

In [7]:
# try on one image

# Now you can use predictor.predict() as usual
img_path = '5858bf6d-23d2-11e8-a6a3-ec086b02610b_0.jpg'
with open(img_path, 'rb') as f:
    payload = f.read()

result = predictor.predict(payload)

print(enc_to_label[np.argmax(result)])
#print([np.argmax(result)])

dog


In [11]:
val_df = pd.read_csv("./time_split_lst_files/validation.lst", sep='\t', header=None, names=['time_index', "true_encoded_label", "filename"])
val_df.head()

Unnamed: 0,time_index,true_encoded_label,filename
0,28049,8,59ec5408-23d2-11e8-a6a3-ec086b02610b_0.jpg
1,28050,8,597831ed-23d2-11e8-a6a3-ec086b02610b_0.jpg
2,28051,8,59ccd33a-23d2-11e8-a6a3-ec086b02610b_0.jpg
3,28052,8,59e918e7-23d2-11e8-a6a3-ec086b02610b_0.jpg
4,28053,8,59bfdd4e-23d2-11e8-a6a3-ec086b02610b_0.jpg


In [9]:
# download lst file to be evaluated
target_eval_lst = 'validation.lst'
local_filename = 'eval.lst'
s3_client.download_file(bucket_name, Key = f"dev_split/{target_eval_lst}", Filename = local_filename)



In [10]:
eval_df = []
# load validation lst file as pandas and add column names
eval_df = pd.read_csv(f"{local_filename}", sep='\t', header=None, names=['time_index', "true_enc", "filename"])
eval_df.head()

Unnamed: 0,time_index,true_enc,filename
0,28049,8,59ec5408-23d2-11e8-a6a3-ec086b02610b_0.jpg
1,28050,8,597831ed-23d2-11e8-a6a3-ec086b02610b_0.jpg
2,28051,8,59ccd33a-23d2-11e8-a6a3-ec086b02610b_0.jpg
3,28052,8,59e918e7-23d2-11e8-a6a3-ec086b02610b_0.jpg
4,28053,8,59bfdd4e-23d2-11e8-a6a3-ec086b02610b_0.jpg


In [11]:
eval_df = []
# load validation lst file as pandas and add column names
eval_df = pd.read_csv(f"{local_filename}", sep='\t', header=None, names=['time_index', "true_enc", "filename"])

# go through each image file in the df, download the image from the s3 bucket, then predict the label
eval_df['pred_enc'] = np.nan

for row in eval_df.itertuples(index=True):
    # set s3 location of image file
    #display(row)
    # Download image S3 object 
    key = f"{images_prefix}/{row.filename}"
    #display(bucket_name)
    #display(key)
    data = s3_client.get_object(Bucket=bucket_name, Key=key)['Body'].read()

    # Send to endpoint
    result = predictor.predict(data)
    # display progress every 1000 images
    if(row.Index%500 == 0):
        print(f"Made predictions on {row.Index} images so far, on this dataset")

    # add pred labels to dataset
    eval_df.at[row.Index, 'pred_enc'] = np.argmax(result)


Made predictions on 0 images so far, on this dataset
Made predictions on 500 images so far, on this dataset
Made predictions on 1000 images so far, on this dataset
Made predictions on 1500 images so far, on this dataset
Made predictions on 2000 images so far, on this dataset
Made predictions on 2500 images so far, on this dataset
Made predictions on 3000 images so far, on this dataset
Made predictions on 3500 images so far, on this dataset
Made predictions on 4000 images so far, on this dataset
Made predictions on 4500 images so far, on this dataset
Made predictions on 5000 images so far, on this dataset
Made predictions on 5500 images so far, on this dataset


In [15]:
# generate classification report
from sklearn.metrics import classification_report

# make sure true and pred labels are int
eval_df['true_label'] = eval_df['true_enc'].map(enc_to_label)
eval_df['pred_label'] = eval_df['pred_enc'].map(enc_to_label)
#val_df['pred_encoded_label'] = val_df['pred_encoded_label'].astype('Int64')



print(classification_report(eval_df['true_label'], eval_df['pred_label']))

              precision    recall  f1-score   support

        bird       0.31      0.73      0.44        26
      bobcat       0.81      0.87      0.84       438
         car       1.00      1.00      1.00       423
         cat       0.74      0.81      0.77       479
      coyote       0.88      0.84      0.86       659
        deer       1.00      0.80      0.89         5
         dog       0.84      0.73      0.78       342
       empty       0.00      0.00      0.00        13
      lizard       0.00      0.00      0.00         3
     opossum       0.99      0.92      0.95      1815
      rabbit       0.82      0.95      0.88       271
     raccoon       0.90      0.91      0.91      1021
      rodent       0.77      0.88      0.82        58
       skunk       0.97      0.87      0.92       191
    squirrel       0.72      0.94      0.82       169

    accuracy                           0.89      5913
   macro avg       0.72      0.75      0.72      5913
weighted avg       0.90   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [16]:
# try on test set

# download lst file to be evaluated
target_eval_lst = 'test.lst'
local_filename = 'eval.lst'
s3_client.download_file(bucket_name, Key = f"dev_split/{target_eval_lst}", Filename = local_filename)

eval_df = []
# load target lst file for evaluation as pandas and add column names
eval_df = pd.read_csv(f"{local_filename}", sep='\t', header=None, names=['time_index', "true_enc", "filename"])
display(eval_df.head())

# go through each image file in the df, download the image from the s3 bucket, then predict the label
eval_df['pred_enc'] = np.nan

for row in eval_df.itertuples(index=True):
    # set s3 location of image file
    #display(row)
    # Download image S3 object 
    key = f"{images_prefix}/{row.filename}"
    #display(bucket_name)
    #display(key)
    data = s3_client.get_object(Bucket=bucket_name, Key=key)['Body'].read()

    # Send to endpoint
    result = predictor.predict(data)
    # display progress every 1000 images
    if(row.Index%500 == 0):
        print(f"Made predictions on {row.Index} images so far, on this dataset")

    # add pred labels to dataset
    eval_df.at[row.Index, 'pred_enc'] = np.argmax(result)


Unnamed: 0,time_index,true_enc,filename
0,33962,0,5a0fd077-23d2-11e8-a6a3-ec086b02610b_0.jpg
1,33963,0,59862370-23d2-11e8-a6a3-ec086b02610b_0.jpg
2,33964,0,5a1fe939-23d2-11e8-a6a3-ec086b02610b_0.jpg
3,33965,0,59ccd0f7-23d2-11e8-a6a3-ec086b02610b_0.jpg
4,33966,0,5a17df7a-23d2-11e8-a6a3-ec086b02610b_0.jpg


Made predictions on 0 images so far, on this dataset
Made predictions on 500 images so far, on this dataset
Made predictions on 1000 images so far, on this dataset
Made predictions on 1500 images so far, on this dataset
Made predictions on 2000 images so far, on this dataset
Made predictions on 2500 images so far, on this dataset
Made predictions on 3000 images so far, on this dataset
Made predictions on 3500 images so far, on this dataset
Made predictions on 4000 images so far, on this dataset
Made predictions on 4500 images so far, on this dataset
Made predictions on 5000 images so far, on this dataset
Made predictions on 5500 images so far, on this dataset
Made predictions on 6000 images so far, on this dataset
Made predictions on 6500 images so far, on this dataset
Made predictions on 7000 images so far, on this dataset
Made predictions on 7500 images so far, on this dataset


In [17]:
# make sure true and pred labels are int
eval_df['true_label'] = eval_df['true_enc'].map(enc_to_label)
eval_df['pred_label'] = eval_df['pred_enc'].map(enc_to_label)
#val_df['pred_encoded_label'] = val_df['pred_encoded_label'].astype('Int64')



print(classification_report(eval_df['true_label'], eval_df['pred_label']))

              precision    recall  f1-score   support

      badger       0.00      0.00      0.00         4
        bird       0.23      0.50      0.32        54
      bobcat       0.75      0.83      0.79       569
         car       1.00      0.99      0.99        85
         cat       0.67      0.72      0.69       828
      coyote       0.82      0.84      0.83      1199
        deer       1.00      0.47      0.64        17
         dog       0.62      0.58      0.60       395
       empty       0.00      0.00      0.00         8
         fox       0.00      0.00      0.00         2
      lizard       0.00      0.00      0.00       126
     opossum       0.99      0.90      0.95      3092
      rabbit       0.76      0.90      0.82       298
     raccoon       0.83      0.91      0.87       737
      rodent       0.67      0.78      0.72        81
       skunk       0.86      0.82      0.84        51
    squirrel       0.69      0.86      0.76       386

    accuracy              

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [18]:
enc_to_label

{0: 'cat',
 1: 'opossum',
 2: 'squirrel',
 3: 'raccoon',
 4: 'bird',
 5: 'rabbit',
 6: 'dog',
 7: 'badger',
 8: 'bobcat',
 9: 'coyote',
 10: 'car',
 11: 'deer',
 12: 'rodent',
 13: 'skunk',
 14: 'empty',
 15: 'fox',
 16: 'lizard'}