# 2 Importando sagemaker e outras coisas

In [None]:
import os
import json
import boto3
import random
import shutil

from tqdm import tqdm

import sagemaker
from sagemaker.session import Session
from sagemaker import get_execution_role
from sagemaker.analytics import HyperparameterTuningJobAnalytics
from sagemaker.tuner import CategoricalParameter, ContinuousParameter, HyperparameterTuner, IntegerParameter
from sagemaker.pytorch import PyTorch, PyTorchModel
from sagemaker.predictor import Predictor
from sagemaker.debugger import Rule, rule_configs, ProfilerRule, DebuggerHookConfig, CollectionConfig, ProfilerConfig, FrameworkProfile
from smdebug.trials import create_trial
from smdebug.core.modes import ModeKeys

from IPython.display import Image, display

#tem que instalar c++ build tools https://visualstudio.microsoft.com/visual-cpp-build-tools/ e dar pip install --upgrade setuptools


## 3 Baixando Imagens

In [None]:
def download_and_arrange_data():
    s3_client = boto3.client('s3')
    
    with open('file_list.json', 'r') as f:
        d = json.load(f)
        
    for k, v in d.items():
        print(f"Downloading Images with {k} objects")
        directory = os.path.join('train_data', k)
        if not os.path.exists(directory):
            os.makedirs(directory)
        for file_path in tqdm(v):
            file_name = os.path.basename(file_path).split('.')[0] + '.jpg'
            s3_client.download_file('aft-vbi-pds', os.path.join('bin-images', file_name), 
                                    os.path.join(directory, file_name))

download_and_arrange_data()

#falta file_list.json que é um outro arquivo

## 4 Plotando gráfico dos dados

In [None]:
# Using plotly, create bar plot of number_imgs versus number_objects
import plotly.graph_objects as go

number_objects = ['1', '2', '3', '4', '5']
number_imgs = [len(d[n]) for n in d]
imgs_total = sum(number_imgs)

fig = go.Figure(data=[go.Bar(x=number_objects, y=number_imgs)])
# Add title and axis labels
fig.update_layout(title_text='Number of Images per Number of Objects in Image', 
                  xaxis_title_text='Number of Objects', 
                  yaxis_title_text='Number of Images')
# Set width and height of the figure
fig.update_layout(width=600, height=500)
fig.show()


# 5 Organizando Imagens

In [None]:
# Set the paths
train_folder = 'train_data'
test_folder = 'test_data'
validation_folder = 'validation_data'

# Create test and validation folders if they don't exist
os.makedirs(test_folder, exist_ok=True)
os.makedirs(validation_folder, exist_ok=True)

# Define the number of images to select
test_set_ratio = 0.05
validation_set_ratio = 0.1

# Iterate over each class directory
for class_dir in os.listdir(train_folder):
    class_path = os.path.join(train_folder, class_dir)

    if os.path.isdir(class_path):
        image_list = os.listdir(class_path)

        # Shuffle the image list and split it
        random.shuffle(image_list)
        test_set_size = int(len(image_list) * test_set_ratio)
        validation_set_size = int(len(image_list) * validation_set_ratio)
        test_set = image_list[:test_set_size]
        validation_set = image_list[test_set_size:test_set_size+validation_set_size]

        # Move test images to the test folder
        for image in test_set:
            src = os.path.join(class_path, image)
            dst = os.path.join(test_folder, class_dir, image)
            os.makedirs(os.path.dirname(dst), exist_ok=True)
            shutil.move(src, dst)

        # Move validation images to the validation folder
        for image in validation_set:
            src = os.path.join(class_path, image)
            dst = os.path.join(validation_folder, class_dir, image)
            os.makedirs(os.path.dirname(dst), exist_ok=True)
            shutil.move(src, dst)


# 6 Criando Imagem Docker e definindo e Definindo Hiperparâmetros

In [None]:
role = "arn:aws:iam::"
sagemaker_session = sagemaker.Session{}

#tem que copiar do cara lá

In [None]:
hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.001, 0.1),
    'batch_size': CategoricalParameter([16, 32, 64]),
}

objective_metric_name = "Valid Loss"
objective_type = "Minimize"
metric_definitions = [{"Name": "Valid Loss", "Regex": "Final Validation Loss: ([0-9\\.]+)"}]

estimator = PyTorch(entry_point="train.py",
                    base_job_name="object-counter",
                    role=role,
                    framework_version="2.0.0",
                    py_version="py312",
                    instance_count=1,
                    instance_type="ml.c5.2xlarge",
                    # hyperparameters=hyperparameters,
                    output_path='s3://{s3_bucket}/training_output')

tuner = HyperparameterTuner(
    estimator,
    objective_metric_name,
    hyperparameter_ranges,
    metric_definitions,
    max_jobs=4,
    max_parallel_jobs=1,
    objective_type=objective_type
)


In [None]:
tuner.fit({'training': input_path}, wait=True, logs='All')

In [None]:
exp = HyperparameterTuningJobAnalytics(hyperparameter_tuning_job_name='pytorch-training-230614-1612')
training_jobs = exp.dataframe()
training_jobs.sort_values('FinalObjectiveValue', ascending=0)

In [None]:
best_estimator = tuner.best_estimator()
best_estimator.hyperparameters()

## 7 Instanciando um pc com placa de vídeo

In [None]:
rules = [
    Rule.sagemaker(rule_configs.loss_not_decreasing()),
    Rule.sagemaker(rule_configs.vanishing_gradient()),
    ProfilerRule.sagemaker(rule_configs.LowGPUUtilization()),
    Rule.sagemaker(rule_configs.poor_weight_initialization()),
    ProfilerRule.sagemaker(rule_configs.ProfilerReport()),
]

profiler_config = ProfilerConfig(system_monitor_interval_millis=1000)
hyperparameters = {
'batch_size': 32,
'epochs': 20,
'learning_rate': 0.003
}

estimator = PyTorch(entry_point='train.py',
                    base_job_name='object-counter',
                    role = role,
                    framework_version='2.0.0',
                    py_version='py310',
                    instance_count=1,
                    instance_type='al.p3.2xlarge',
                    hyperparameters = hyperparameters,
                    output_path=f's3://{s3_bucket}/training_output',
                    rules = rules,
                    profiler_config=profiler_config)

In [None]:
tuner.fit({'training': input_path}, wait=True, logs='All')

In [None]:
# System usage statistics from profiler report
Image(filename='system-usage-statistics.jpg')

## 8 Model Deploying and Querying

In [None]:
model_location = estimator.model_data
model_location

In [None]:
jpeg_serializer = sagemaker.serializers.IdentitySerializer("image/jpeg")
json_deserializer = sagemaker.deserializers.JSONDeserializer()
class IngPredictor (Predictor):
    def _init_(self, endpoint_name, sagemaker_session):
        super(IngPredictor, self).__init_(
            endpoint_name,
            sagemaker_session=sagemaker session,
            serializer=jpeg_serializer,
            deserializer=json_deserializer
        )

pytorch_model= PyTorchModel(model_data=model_location,
                            role=role,
                            entry_point="deploy.py",
                            py_version="py310",
                            framework_version="2.0.0",
                            predictor_cls=IngPredictor
                            )
predictor = pytorch_model.deploy(initial_instance_count=1, instance_type="ml.m5.2xlarge")

In [None]:
from PIL import Image
import io
import numpy as np
test_class='5'
test_image= os.path.join(test_folder, test_class, random.choice(os.listdir(os.path.join(test_folder, test_class))))
with open(test_image, 'rb') as f:
    payload = f.read()
    print('Sending the following image to AWS SageMaker endpoint:')
    display(Image.open(io.Bytes10 (payload)))
    print('Expecting the following class: (test_class)')
    response = predictor.predict(payload, initial_args = {'ContentType': 'image/jpeg'})
    prediction = np.argmax(response, 1) + 1
    print(f'Predicted class: {prediction [0]}')

In [None]:
predictor.delete_point()