In [None]:
import os
import json
import boto3
import pandas as pd
import redshift_connector
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn
import numpy as np
from datetime import datetime
from mlflow.tracking import MlflowClient


In [23]:


def get_secret(secret_id: str, region_name: str):
    client = boto3.client("secretsmanager", region_name=region_name)
    response = client.get_secret_value(SecretId=secret_id)
    secret = json.loads(response["SecretString"])
    return secret




In [24]:
def get_secret(secret_id, region):
    client = boto3.client("secretsmanager", region_name=region)
    response = client.get_secret_value(SecretId=secret_id)
    return json.loads(response["SecretString"])

def get_redshift_connection():
    secret_id = os.getenv("MLFLOW_SECRET_ID")
    region = os.getenv("AWS_DEFAULT_REGION")
    host = os.getenv("REDSHIFT_HOST")
    port = os.getenv("REDSHIFT_PORT")
    dbname = "dev"

    creds = get_secret(secret_id, region)
    conn = redshift_connector.connect(
        host=host,
        database=dbname,
        port=int(port),
        user=creds['username'],
        password=creds['password']
    )
    return conn



In [25]:


conn = get_redshift_connection()

query = """
SELECT date, ciudad, pais, temperatura, velocidad_viento, sensacion_termica,
       humedad, "timestamp" AS timestamp, name, artist, album, id
FROM weather_tracks
LIMIT 5000
"""

df = pd.read_sql(query, conn)
df.head()



  df = pd.read_sql(query, conn)


Unnamed: 0,date,ciudad,pais,temperatura,velocidad_viento,sensacion_termica,humedad,timestamp,name,artist,album,id
0,2025-08-10,San Miguel de Tucumán,AR,7,3.6,4.54,76,1754818442,Y Volveras,Franco Arroyo,Y Volveras,12zR9AbbezPIT0SUV2GwUC
1,2025-08-10,San Miguel de Tucumán,AR,7,3.6,4.54,76,1754818442,Baila Marimba,Sebastian,Discografía Completa Volumen 4,0QmwmijbMeK5i2SvSruhNI
2,2025-08-10,San Miguel de Tucumán,AR,7,3.6,4.54,76,1754818442,Si Tú Te Vas,Los Puesteros,Tocando al Frente,5DC0XKMEzM4W5C99Qmham1
3,2025-08-10,San Miguel de Tucumán,AR,7,3.6,4.54,76,1754818442,"Dime, Dime",Miguel Alejandro,Un Elegido,365gBtUpZ0NdoRFPRaO3Us
4,2025-08-10,San Miguel de Tucumán,AR,7,3.6,4.54,76,1754818442,Me Parece Que,"Chebere, Pelusa",Pelusa Mi Paso Por Chebere,7j124o3B4oTZeUMpfu453M


In [26]:

# Celda 5: Preprocesamiento
# Extraer día de la semana y hora
df['date'] = pd.to_datetime(df['date'])
df['dayofweek'] = df['date'].dt.dayofweek
df['hour'] = pd.to_datetime(df['timestamp'], unit='s').dt.hour

# Antes: ohe = OneHotEncoder(sparse=False, handle_unknown='ignore')
ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
ciudades_encoded = ohe.fit_transform(df[['ciudad']])
ciudades_df = pd.DataFrame(ciudades_encoded, columns=ohe.get_feature_names_out(['ciudad']))


# Concatenar features finales
X = pd.concat([df[['temperatura','velocidad_viento','sensacion_termica','humedad','dayofweek','hour']], ciudades_df], axis=1)
y = df['id']



In [27]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
mlflow.set_tracking_uri("http://mlflow:5000")
mlflow.set_experiment("musica_clima_experimento")
mlflow.set_registry_uri("file:/tmp/mlruns") 
with mlflow.start_run():
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train, y_train)
    
    y_pred = rf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    
    # Log de parámetros y métricas
    mlflow.log_param("modelo", "RandomForestClassifier")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", acc)
    
    # Log del modelo
    mlflow.sklearn.log_model(
        sk_model=rf,
        artifact_path="mi_modelo",   # más portable que "name"
        input_example=X.iloc[:5]     # un mini batch de inputs
    )

    print("Accuracy:", acc)
    print("Modelo registrado en MLflow")



Accuracy: 0.0
Modelo registrado en MLflow
🏃 View run upbeat-snake-538 at: http://192.168.100.8:5000/#/experiments/1/runs/3759ff0c467a46dbbf9d7295178c1a97
🧪 View experiment at: http://192.168.100.8:5000/#/experiments/1
