In [1]:
import os
import json
import logging
from datetime import datetime

In [2]:
import boto3
import sagemaker
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/xdg-ubuntu/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/orion/.config/sagemaker/config.yaml


In [3]:
boto3.set_stream_logger(name="botocore.credentials", level=logging.WARNING)

In [4]:
def check_aws_configuration():
    try:
        # Attempt to create an AWS S3 client
        s3 = boto3.client('s3')
        
        # List buckets to verify AWS credentials and region
        buckets = s3.list_buckets()
        
        print("AWS Configuration is successful.")
        print("Available S3 Buckets:")
        for bucket in buckets['Buckets']:
            print(f"- {bucket['Name']}")
        
        return True
    except Exception as e:
        print(f"Error: {e}")
        print("AWS Configuration is not successful.")
        return False

# Check AWS configuration
check_aws_configuration()

AWS Configuration is successful.
Available S3 Buckets:
- airbyte-data-bucket
- apple-maps-rank-data
- aws-cloudtrail-logs-173829655778-1bc62e66
- aws-sam-cli-managed-default-samclisourcebucket-ujqrexj3n4l8
- campaigns-client-files
- cf-templates-1f8yr71fcychj-us-east-1
- chat-widget-dev-serverlessdeploymentbucket-c2tv25twqqud
- chat-widget-local-serverlessdeploymentbucket-5d5l8p5zoshf
- chrone-app-chat-campaign-jsons
- chrone-biz-keyword-jsons
- chrone-biz-react-website-dev
- chrone-biz-website-react
- chrone-community-data
- chrone-email-campaign-jsons
- chrone-global-assets
- chrone-internal-media-generation-reactjs
- chrone-lambda-layers-zip
- chrone-lead-magnet
- chrone-lead-magnet-dev
- chrone-ml-models
- chrone-public-media
- chrone-sales-rank-extraction-jsons
- chrone-sms-campaign-jsons
- chrone-sp-website
- chrone-sp-website-internal
- chrone-sp-website-media-bank
- chrone-sp-website-pages
- chrone-ui-reactjs
- chrone-website
- chrone-website-edit-application
- chrone.ai
- chro

True

In [5]:
# sess = sagemaker.Session()
sess = sagemaker.Session(boto_session=boto3.Session(region_name='us-east-1'))
region = sess.boto_region_name
print(region)

us-east-1


In [6]:
role_arn = "arn:aws:iam::173829655778:role/sage_make_full_access"

In [7]:
bucket_name = "imagetaggingdemo"
data_path = "img_clf_multilabel_lst"

nclasses = 131
nimgs_train = 926
nepochs = 500
mini_batch_size = 16

train_instance_type = "ml.g4dn.4xlarge"
job_name_prefix = "img-clf-multilabel"

In [8]:
train_image_uri = sagemaker.image_uris.retrieve(
    framework="image-classification",
    region=region,
    image_scope="training",
    version="latest",
)
print(train_image_uri)

Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: latest.


811284229777.dkr.ecr.us-east-1.amazonaws.com/image-classification:1


In [9]:
s3_output_path = "s3://{}/{}/{}".format(bucket_name, data_path, "model_output")


In [26]:
clf_estimator = sagemaker.estimator.Estimator(
    image_uri=train_image_uri,
    role=role_arn,
    instance_count=1,
    instance_type=train_instance_type,
    volume_size=100,
    max_run=360000,
    input_mode="File",
    output_path=s3_output_path,
    sagemaker_session=sess,
)

In [27]:
clf_estimator.set_hyperparameters(
    num_classes=nclasses,  # update this
    epochs=nepochs,  # update this
    num_training_samples=nimgs_train,  # update this
    mini_batch_size=mini_batch_size,  # update this
    num_layers=18,
    use_pretrained_model=1,
    image_shape="3,224,224",
    resize=256,
    learning_rate=0.001,
    use_weighted_loss=1,
    augmentation_type="crop_color_transform",
    precision_dtype="float32",
    multi_label=1,
)

In [28]:
s3_train_imgs = "s3://{}/{}/{}".format(bucket_name, data_path, "train_imgs")
s3_valid_imgs = "s3://{}/{}/{}".format(bucket_name, data_path, "valid_imgs")
s3_train_annot = "s3://{}/{}/{}".format(bucket_name, data_path, "train_annots")
s3_valid_annot = "s3://{}/{}/{}".format(bucket_name, data_path, "valid_annots")

train_imgs = sagemaker.inputs.TrainingInput(
    s3_train_imgs,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)
valid_imgs = sagemaker.inputs.TrainingInput(
    s3_valid_imgs,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)
train_annot = sagemaker.inputs.TrainingInput(
    s3_train_annot,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)
valid_annot = sagemaker.inputs.TrainingInput(
    s3_valid_annot,
    distribution="FullyReplicated",
    content_type="application/jpeg",
    s3_data_type="S3Prefix",
)

data_channels = {
    "train": train_imgs,
    "validation": valid_imgs,
    "train_lst": train_annot,
    "validation_lst": valid_annot,
}

In [29]:
print(sagemaker.__version__)


2.203.0


In [30]:
timestamp = (
    str(datetime.now().replace(microsecond=0)).replace(" ", "-").replace(":", "-")
)
# job_name = "Demo-Multi-Label"
job_name = "Multi-label-Image-Classification" + "-" + timestamp
print(job_name)

Multi-label-Image-Classification-2024-01-08-14-10-36


In [15]:
clf_estimator.fit(inputs=data_channels, logs=True, job_name=job_name)

INFO:sagemaker:Creating training-job with name: Multi-label-Image-Classification-2024-01-06-16-21-56


2024-01-06 10:51:59 Starting - Starting the training job...
2024-01-06 10:52:14 Starting - Preparing the instances for training......
2024-01-06 10:53:44 Downloading - Downloading the training image..................
2024-01-06 10:57:00 Training - Training image download completed. Training in progress....Docker entrypoint called with argument(s): train
Running default environment configuration script
Nvidia gpu devices, drivers and cuda toolkit versions (only available on hosts with GPU):
Sat Jan  6 10:57:43 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.12             Driver Version: 535.104.12   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                      

In [10]:
print(sagemaker.__version__)
print(sagemaker.__file__)

2.203.0
/home/orion/.local/lib/python3.10/site-packages/sagemaker/__init__.py


In [11]:
job_name = "Multi-label-Image-Classification"

In [12]:
infer_instance_type = "ml.t2.medium"
model_name = job_name
endpoint_name = job_name

In [13]:
from sagemaker.model import Model

model = Model(
    image_uri=train_image_uri,
    model_data="s3://imagetaggingdemo/img_clf_multilabel_lst/model_output/Multi-label-Image-Classification-2023-12-28-13-52-08/output/model.tar.gz",
    role=role_arn,
    sagemaker_session=sess,
)

In [14]:
predictor = model.deploy(
    initial_instance_count=1,
    instance_type=infer_instance_type,
    endpoint_name=endpoint_name,
)

ClientError: An error occurred (ValidationException) when calling the CreateEndpointConfig operation: Cannot create already existing endpoint configuration "arn:aws:sagemaker:us-east-1:173829655778:endpoint-config/multi-label-image-classification".

In [24]:
clf_predictor = clf_estimator.deploy(
    initial_instance_count=1,
    instance_type=infer_instance_type,
    endpoint_name=endpoint_name,
    model_name=model_name,
)

ValueError: Estimator is not associated with a training job

In [15]:
sgmkr_runt =  boto3.client("sagemaker-runtime", region_name='us-east-1')

In [16]:
# For Validation Data Set

# datafrmae to store the results
data = []

def read_and_encode_image(file_path):
    with open(file_path, "rb") as image:
        payload = image.read()
        payload = bytearray(payload)
    return payload



# Iterate over images
for filename in os.listdir("valid_data"):
    print(filename)
    image_content = read_and_encode_image("valid_data/" + filename)

    # Invoke the endpoint
    response = sgmkr_runt.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType='image/jpeg',
        Body=image_content
    )
    prediction = json.loads(response['Body'].read().decode())

    print(prediction)

    # Append the data for each image
    data.append({'image_name': filename, 'predicted_scores': prediction})

# Create a DataFrame from the list of dictionaries
    


    

facials 1 (5).jpg
[0.00011386704863980412, 6.696214427392988e-08, 0.0327482596039772, 1.4190081110143637e-08, 0.4356013536453247, 3.8125825085444376e-05, 1.4713998098159209e-05, 8.141036232700571e-06, 0.0010751263471320271, 0.3480430841445923, 7.575663403258659e-07, 1.3241225005344859e-08, 5.104126898913819e-07, 2.8125290555180982e-05, 1.431546365893155e-07, 0.0004785912169609219, 9.993184357881546e-05, 2.1559253582381643e-07, 3.125454028918284e-08, 5.903000783291645e-06, 9.450956213186146e-07, 3.9269924201335016e-08, 1.961200091216142e-09, 6.735738861607388e-05, 1.7183911040774547e-06, 6.769177707610652e-05, 0.000150295440107584, 7.602015728025435e-08, 7.439550972776487e-05, 5.701895489096387e-08, 0.41364753246307373, 1.0893659307953385e-08, 7.500436367990915e-06, 4.4278573341216543e-07, 0.0034604095853865147, 5.248530783319438e-07, 1.2278752459238262e-09, 0.10139429569244385, 5.743426712001565e-08, 0.0001114384358515963, 6.566797594587115e-08, 3.5402538287598873e-06, 0.97144460678100

In [None]:
import os
import boto3
import json

# Create an S3 client
s3 = boto3.client('s3')

# datafrmae to store the results
data = []

def read_and_encode_image_from_s3(bucket, key):
    response = s3.get_object(Bucket=bucket, Key=key)
    payload = response['Body'].read()
    payload = bytearray(payload)
    return payload

# Specify your S3 bucket name
s3_bucket = 'chrone-sp-website'
s3_prefix = '96a013fc-4748-4856-86de-71a8576bd91b/gallery/'

i = 0

# Iterate over images in S3
for filename in s3.list_objects(Bucket=s3_bucket, Prefix=s3_prefix)['Contents']:
    if i == 10:
        break
    i+=1
    image_key = filename['Key']
    print(image_key)

    # Invoke the endpoint
    response = sgmkr_runt.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType='image/jpeg',
        Body=read_and_encode_image_from_s3(s3_bucket, image_key)
    )
    prediction = json.loads(response['Body'].read().decode())

    print(prediction)

    # Append the data for each image
    data.append({'image_name': image_key, 'predicted_scores': prediction})


In [35]:
import pandas as pd
df = pd.DataFrame(data)

# printing df
print(df)

                       image_name  \
0               facials 1 (5).jpg   
1    extensionservicces_1 (7).jpg   
2           accupunture 1 (2).jpg   
3    extensionservicces_1 (9).jpg   
4               facials 1 (8).jpg   
5                       ht_16.jpg   
6              facials 1 (11).jpg   
7    extensionservicces_1 (5).jpg   
8             accupuncture_15.jpg   
9    extensionservicces_1 (1).jpg   
10          accupunture 1 (3).jpg   
11          accupunture 1 (7).jpg   
12  extensionservicces_1 (10).jpg   
13              facials 1 (1).jpg   
14         accupunture 1 (10).jpg   
15              facials 1 (4).jpg   
16   extensionservicces_1 (3).jpg   
17          accupunture 1 (4).jpg   
18       Untitled design (21).jpg   
19             facials 1 (12).jpg   
20              facials 1 (3).jpg   
21          accupunture 1 (8).jpg   
22   extensionservicces_1 (4).jpg   
23  extensionservicces_1 (11).jpg   
24              facials 1 (2).jpg   
25              facials 1 (6).jpg   
2

In [19]:
print(df['predicted_scores'])

0     [3.908900580995578e-08, 4.483782234387945e-08,...
1     [6.523827051491127e-12, 1.5011881737692079e-09...
2     [6.811636558268219e-05, 2.9997917039281674e-08...
3     [3.8311029015858367e-08, 4.501005079760034e-08...
4     [1.0684098938895659e-08, 2.0712323589577863e-0...
5     [5.665662161646878e-09, 7.794811907047006e-09,...
6     [2.758577579697885e-07, 5.0564572973144095e-08...
7     [3.9218431879817217e-07, 6.326765173980675e-07...
8     [0.0036101459991186857, 1.485780217080901e-06,...
9     [1.0100110747757185e-09, 6.9881309627817245e-0...
10    [6.123455023043789e-06, 1.306078303286995e-07,...
11    [4.014760079940061e-09, 6.7373351342325805e-09...
12    [1.084348721924755e-09, 7.283423997250793e-08,...
13    [2.6232946925830447e-09, 6.038159199306392e-07...
14    [1.445332298288804e-08, 6.96937902944228e-08, ...
15    [2.3558213158025865e-08, 3.696546402309764e-09...
16    [2.85292059074016e-12, 6.36888497496102e-09, 6...
17    [9.181959512716276e-07, 4.300413891655808e

In [26]:
with open("test.jpeg", "rb") as image:
        payload = image.read()
        payload = bytearray(payload)
        
# print(payload)        
response = sgmkr_runt.invoke_endpoint(
    EndpointName = endpoint_name,
    ContentType = 'image/jpeg',
    # Accept = "application/json;verbose",
    Body = payload,
)

prediction = json.loads(response['Body'].read().decode())
print(prediction)

[1.1380696918195099e-07, 7.56756701747463e-09, 0.0001550694287288934, 9.212412237502576e-07, 0.9976673126220703, 7.216151942657234e-08, 3.211607646491643e-10, 7.86391865403857e-06, 1.6014664083741081e-07, 5.5729153245920315e-05, 1.7786018133847392e-06, 7.411630753573206e-10, 4.43545999928574e-08, 9.571197551849764e-06, 1.0716439646785147e-05, 5.6300654250662774e-05, 1.166110976669188e-07, 0.0002544214075896889, 3.0548046314748944e-09, 1.3057114301773254e-05, 2.88339521148373e-07, 4.4426867740199327e-10, 4.594360048654211e-12, 2.882503680190496e-10, 1.154063102148939e-05, 6.0554317315109074e-05, 1.8473640395200164e-08, 0.006899641826748848, 8.450757604805403e-08, 9.05313246590822e-09, 0.011472121812403202, 7.2656334850762505e-06, 4.423169229994528e-05, 1.6440111494375742e-07, 1.0436249795020558e-05, 3.7407431818792247e-07, 1.5315249068947878e-11, 0.007918652147054672, 1.0959934293452989e-08, 0.9803165793418884, 6.064021285112631e-09, 7.574465499260441e-10, 0.0604897066950798, 1.58775932

In [27]:
a = prediction
b = ["anesthesia","Upper Body Wax","restorative dentistry","beauty training","Female","Kids","nail services","cosmetic injectables","mens wax","Square","mens colour","Extension services","Old","beauty treatment","portrait photography","grooming","business photography","wispy","Barber","plasma fibrolast","medical services","vitamin injections","wrinkles","microneedling","alternative therapy","wedding services","reflexology","teeth","tatto service","Hair Highlights","Right","body massage","chemical peels","group fitness classes","sugaring","physical therapy","lip treatments","Male","Bohemian Box Braids","piercing services","Locs","body waxing","Landscape","other","dreadlocs","passion twist","Left","cat eye","microblading","none","dermal fillers","sew ins","eyelash services","Gel Nails","lifestyle photography","weight loss services","waxing services","loc services","stretch mark treatment","group services","hair saloons","vajacial","acupunture","Hard Wax","perm","White","energy healing","fitness programs","Braids","Middle","Manicure","cornrows","crystal healing","detoxification services","crochet","eyebrow waxing","Eyebrows","Lower Body Wax","bridal services","Black","Body Contouring","Bridal","Strip Wax","updos","spa","grocery shopping","kids services","bikini wax","hair treatments","wig services","hair styling","artistic services","awakening programs","Dreads","feed ins","makeup services","Eyelash fills","nail polish","facial treatments","beauty services","weaves","Center","Knotless Braids","Brown","cupping therapy","microdermabrasion","personal training","eyebrow services","Pedicure","permanent makeup","skin care services","meditation","iv therepy","Potrait","Eyelash Lift","Acne treatment","yoga and meditation","tanning services","pilates classes","cosmetic dentistry","Box Braids","cooking classes","mens braids","discount services","bohemian twist","barber services","hard","Soft Wax","Facial waxing","light therepy","kinky twist"]
# link a and b
threshold = 0.6  # Set your threshold value here

# Filter elements from list b based on the threshold in list a
filtered_elements = [category for prob, category in zip(a, b) if prob >= threshold]
print([prob for prob, category in zip(a, b) if category == "Female"])
# Print the filtered elements
print(filtered_elements)

print(len(a))
print(len(filtered_elements))

[0.9976673126220703]
['Female', 'piercing services', 'Middle', 'Black', 'Potrait']
131
5


In [20]:
import numpy as np

b = ["anesthesia","Upper Body Wax","restorative dentistry","beauty training","Female","Kids","nail services","cosmetic injectables","mens wax","Square","mens colour","Extension services","Old","beauty treatment","portrait photography","grooming","business photography","wispy","Barber","plasma fibrolast","medical services","vitamin injections","wrinkles","microneedling","alternative therapy","wedding services","reflexology","teeth","tatto service","Hair Highlights","Right","body massage","chemical peels","group fitness classes","sugaring","physical therapy","lip treatments","Male","Bohemian Box Braids","piercing services","Locs","body waxing","Landscape","other","dreadlocs","passion twist","Left","cat eye","microblading","none","dermal fillers","sew ins","eyelash services","Gel Nails","lifestyle photography","weight loss services","waxing services","loc services","stretch mark treatment","group services","hair saloons","vajacial","acupunture","Hard Wax","perm","White","energy healing","fitness programs","Braids","Middle","Manicure","cornrows","crystal healing","detoxification services","crochet","eyebrow waxing","Eyebrows","Lower Body Wax","bridal services","Black","Body Contouring","Bridal","Strip Wax","updos","spa","grocery shopping","kids services","bikini wax","hair treatments","wig services","hair styling","artistic services","awakening programs","Dreads","feed ins","makeup services","Eyelash fills","nail polish","facial treatments","beauty services","weaves","Center","Knotless Braids","Brown","cupping therapy","microdermabrasion","personal training","eyebrow services","Pedicure","permanent makeup","skin care services","meditation","iv therepy","Potrait","Eyelash Lift","Acne treatment","yoga and meditation","tanning services","pilates classes","cosmetic dentistry","Box Braids","cooking classes","mens braids","discount services","bohemian twist","barber services","hard","Soft Wax","Facial waxing","light therepy","kinky twist"]

threshold = 0.6

selected = []

for image_name , prediction in df.iterrows():
    # print(image_name)
    # print(prediction)
    # Filter elements from list b based on the threshold in list a
    selected_categories = [category for prob, category in zip(prediction['predicted_scores'], b) if prob >= threshold]

    # Print the filtered elements
    # print(selected_categories)

    # Append the data for each image
    selected.append({'image_name': prediction['image_name'], 'selected_categories': selected_categories})

# Create a DataFrame from the list of dictionaries
df_selected = pd.DataFrame(selected)
print(df_selected.head())
# print(df_selected['image_name'])



                      image_name  \
0  extensionservicces_1 (10).jpg   
1              facials 1 (6).jpg   
2       Untitled design (22).jpg   
3   extensionservicces_1 (9).jpg   
4   extensionservicces_1 (5).jpg   

                                 selected_categories  
0   [Female, Male, Middle, Black, feed ins, Potrait]  
1  [Female, Square, White, Middle, facial treatme...  
2  [grooming, medical services, Male, White, Midd...  
3  [Female, White, Middle, Black, weaves, Center,...  
4  [Female, Middle, Black, Center, Potrait, Box B...  


In [28]:
# store the df in csv format
df_selected.to_csv("selected_500epochs.csv", index=False)


In [25]:
clf_predictor.delete_endpoint()


NameError: name 'clf_predictor' is not defined