In [14]:
%pip install mlflow 'numpy<2'opencv-python scikit-learn boto3 python-dotenv requests

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [15]:
%pip install opencv-python


Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [16]:
import os 
import mlflow
import mlflow.sklearn
import numpy as np
import cv2
import boto3
import requests
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from dotenv import load_dotenv
import subprocess
import socket 



In [17]:
#Configurando o ambiente
PROFILE = 'local' # Mudar para `cloud`quando for rodar na AWS`

#Variaveis para execucao na nuvem obtidas atraves do terraform 
ALB_DNS_NAME = ''# Obter com terraform output alb_dns_name
DATASOURCE_BUCKET_NAME = '' # Obter com terraform output datasource_bucket_name


In [18]:
#Configurando as conexoes
print('--- Configurando conexoes ---')
print(f'Profile : {PROFILE}')

if PROFILE == 'cloud':
    print('Configurando em modo cloud')
    # Valida√ß√£o das vari√°veis obrigat√≥rias
    if not ALB_DNS_NAME or not DATASOURCE_BUCKET_NAME:
        raise ValueError("Para o profile = cloud, define:")
    # Configurando DNS primeiro (adicionando nomes nos ips)
    try:
        alb_dns = ALB_DNS_NAME
        alb_ip = socket.gethostbyname(alb_dns)
        hosts_entry = f"{alb_ip} mlflow.xrayscope.local"
        subprocess.run(['sudo', 'bash', '-c', f"echo '{hosts_entry}' >> /etc/hosts"], check=True)
        print('‚úÖ DNS configurado com sucesso')
    except Exception as e:
        print(f'‚ö†Ô∏è Erro ao configurar DNS: {e}')

    # Usar o nome DNS configurado
    MLFLOW_TRACKING_URI = 'http://mlflow.xrayscope.local'
    BUCKET_NAME = DATASOURCE_BUCKET_NAME
    MLFLOW_S3_ENDPOINT_URL = None
    os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'  # Ajuste conforme necess√°rio

else:
    print('Configurando em modo local')
    try:
        # Prefer .env.local (one level up), fallback to .env.example if present
        base = os.path.abspath(os.path.join(os.getcwd(), '..'))
        env_local = os.path.join(base, '.env.local')
        env_example = os.path.join(base, '.env.example')
        if os.path.exists(env_local):
            load_dotenv(dotenv_path=env_local)
        elif os.path.exists(env_example):
            load_dotenv(dotenv_path=env_example)
        else:
            print('Aviso: nenhum .env.local ou .env.example encontrado; usando vari√°veis de ambiente atuais')
    except Exception as e:
        print(f'Aviso: nao foi possivel carregar o arquivo de ambiente: {e}')

    EXECUTION_ENV = os.getenv('EXECUTION_ENVIRONMENT', 'local')
    # Fix: when EXECUTION_ENV is 'local' we want localhost endpoints
    if EXECUTION_ENV == 'local':
        MLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://localhost:5000')
        BUCKET_NAME = os.getenv('BUCKET_NAME', 'datasource')
        MLFLOW_S3_ENDPOINT_URL = os.getenv('MLFLOW_S3_ENDPOINT_URL', 'http://localhost:9000')
        AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
        AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
        if MLFLOW_S3_ENDPOINT_URL: os.environ['MLFLOW_S3_ENDPOINT_URL'] = MLFLOW_S3_ENDPOINT_URL
        if AWS_ACCESS_KEY_ID: os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID
        if AWS_SECRET_ACCESS_KEY: os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY
    else:
        # cloud or other execution env: try to read from env variables (or fallbacks)
        MLFLOW_TRACKING_URI = os.getenv('MLFLOW_TRACKING_URI', 'http://mlflow.xrayscope.local')
        BUCKET_NAME = os.getenv('BUCKET_NAME', DATASOURCE_BUCKET_NAME if 'DATASOURCE_BUCKET_NAME' in globals() else 'datasource')

# Ensure var is set for downstream code
os.environ['MLFLOW_TRACKING_URI'] = MLFLOW_TRACKING_URI
print(f'MLFLOW_TRACKING_URI: {MLFLOW_TRACKING_URI}')
print(f'BUCKET_NAME: {BUCKET_NAME}')



print('--------')


--- Configurando conexoes ---
Profile : local
Configurando em modo local
MLFLOW_TRACKING_URI: http://mlflow:5000
BUCKET_NAME: datasource
--------


In [19]:
def load_images_from_s3_folder(s3_client, bucket_name, prefix, label, img_size):
    print(f"Carregando imagens de: {prefix}...")
    images = []
    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
    if 'Contents' not in response: return images
    for obj in response['Contents']:
        key = obj['Key']
        if key.endswith(('.jpeg', '.jpg', '.png')):
            image_data = s3_client.get_object(Bucket=bucket_name, Key=key)['Body'].read()
            nparr = np.frombuffer(image_data, np.uint8)
            img = cv2.imdecode(nparr, cv2.IMREAD_GRAYSCALE)
            img_resized = cv2.resize(img, img_size)
            images.append((img_resized.flatten(), label))
    print(f"-> {len(images)} imagens carregadas.")
    return images

def load_data_from_minio(s3_client, bucket_name, img_size):
    print("\n--- Etapa de Carregamento de Dados ---")
    data_normal = load_images_from_s3_folder(s3_client, bucket_name, "NORMAL/", 0, img_size)
    data_pneumonia = load_images_from_s3_folder(s3_client, bucket_name, "PNEUMONIA/", 1, img_size)
    all_data = data_normal + data_pneumonia
    if not all_data: raise ValueError("Nenhuma imagem foi carregada.")
    np.random.shuffle(all_data)
    X = np.array([item[0] for item in all_data])
    y = np.array([item[1] for item in all_data])
    print("--- Dados carregados e prontos para o treinamento ---")
    return X, y

In [21]:
try:
    if PROFILE == "cloud" or (PROFILE == "local" and os.getenv("EXECUTION_ENVIRONMENT") == "cloud"):
        s3 = boto3.client("s3")
    else:
        s3 = boto3.client("s3", endpoint_url=os.getenv("MLFLOW_S3_ENDPOINT_URL"))
    
    X_data, y_data = load_data_from_minio(s3, bucket_name=BUCKET_NAME, img_size=(64, 64))
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42, stratify=y_data)
    
    print("\n--- Treinando a pr√≥xima Vers√£o do Modelo ---")
    model_v2 = RandomForestClassifier(n_estimators=150, random_state=42)
    model_v2.fit(X_train, y_train)
    accuracy = accuracy_score(y_test, model_v2.predict(X_test))
    print(f"Modelo v2 treinado. Acur√°cia: {accuracy:.4f}")
    
    print("\n--- Registrando no MLflow ---")
    MODEL_NAME = "RandomForest"
    
    experiment_name = "SageMaker_Experiments" if PROFILE == "cloud" else "Notebook_Experiments"
    mlflow.set_experiment(experiment_name)
    client = mlflow.tracking.MlflowClient()

    with mlflow.start_run(run_name=f"Training_{PROFILE}_RandomForest_v2") as run:
        mlflow.log_param("n_estimators", 150)
        mlflow.log_param("environment", PROFILE)
        mlflow.log_metric("accuracy", accuracy)
        
        model_info = mlflow.sklearn.log_model(
            sk_model=model_v2,
            artifact_path="model-files",
            registered_model_name=MODEL_NAME
        )
        
        print(f"\n‚úÖ SUCESSO! Modelo '{MODEL_NAME}' registrado como vers√£o {model_info.registered_model_version}.")
        
        print(f"Promovendo a vers√£o {model_info.registered_model_version} para 'Production'...")
        client.transition_model_version_stage(
            name=MODEL_NAME,
            version=model_info.registered_model_version,
            stage="Production",
            archive_existing_versions=True
        )
        print("‚úÖ Vers√£o promovida com sucesso!")
        
        if PROFILE == "cloud":
            print("üåê MLflow UI configurado")
            print("üéØ O webapp j√° est√° usando o novo modelo automaticamente!")
        else:
            print("Acesse a UI do MLflow para confirmar as altera√ß√µes: http://localhost:5000")

except Exception as e:
    print(f"\n‚ùå FALHA! Algo deu errado.")
    print(f"   Detalhe do erro: {e}")
    import traceback
    
    traceback.print_exc()


--- Etapa de Carregamento de Dados ---
Carregando imagens de: NORMAL/...

‚ùå FALHA! Algo deu errado.
   Detalhe do erro: Could not connect to the endpoint URL: "http://minio:9000/datasource?list-type=2&prefix=NORMAL%2F&encoding-type=url"


Traceback (most recent call last):
  File "/Users/alvarosamp/Library/Python/3.9/lib/python/site-packages/urllib3/connection.py", line 174, in _new_conn
    conn = connection.create_connection(
  File "/Users/alvarosamp/Library/Python/3.9/lib/python/site-packages/urllib3/util/connection.py", line 72, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "/Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/socket.py", line 953, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 8] nodename nor servname provided, or not known

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/alvarosamp/Library/Python/3.9/lib/python/site-packages/botocore/httpsession.py", line 465, in send
    urllib_response = conn.urlopen(
  File "/Users/alvarosamp/Library/Python/3.9/lib/python