In [112]:
import mlflow
import mlflow.pyfunc
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import mlflow.sklearn

In [113]:
# Define a simple custom wrapper that uses pickle to load the model.
class PickleWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        # Get the local path to the saved model artifact.
        model_file = context.artifacts.get("model")
        with open(model_file, "rb") as f:
            self.model = pickle.load(f)
    def predict(self, context, model_input):
        return self.model.predict(model_input)




In [114]:
# Remove any function annotations to avoid cleanup errors.
PickleWrapper.load_context.__annotations__ = {}
PickleWrapper.predict.__annotations__ = {}

In [115]:
# %%
# Set the MLflow tracking URI.
# If running locally, use 127.0.0.1; if in Docker network, use tracking_server
mlflow.set_tracking_uri("http://localhost:5000")
# mlflow.set_tracking_uri("http://tracking_server:5000")
mlflow.set_experiment("Voice_Gender_Classification")

<Experiment: artifact_location='mlflow-artifacts:/906782352771345596', creation_time=1741919406139, experiment_id='906782352771345596', last_update_time=1741919406139, lifecycle_stage='active', name='Voice_Gender_Classification', tags={}>

In [116]:
mlflow.sklearn.autolog()

In [117]:
# %%
# Load your voice dataset (ensure voice.csv is in the correct location)
data = pd.read_csv("voice.csv") 

In [118]:
# Preprocess the data
X = data.drop("label", axis=1)
y = data["label"].map({"male": 1, "female": 0})

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [119]:
# %%
# Train the model
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)


2025/03/14 10:56:14 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '2585644fc58c4fe5966aa03b57e41d6b', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


🏃 View run crawling-dog-918 at: http://localhost:5000/#/experiments/906782352771345596/runs/2585644fc58c4fe5966aa03b57e41d6b
🧪 View experiment at: http://localhost:5000/#/experiments/906782352771345596
Accuracy: 0.9810725552050473


In [120]:
# Save the trained model using pickle
with open("model.pkl", "wb") as f:
    pickle.dump(model, f)

In [121]:
# Log the model with MLflow using the custom PickleWrapper.
with mlflow.start_run(run_name="simple_run") as run:
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("max_depth", 10)
    mlflow.log_metric("accuracy", accuracy)
    
    mlflow.pyfunc.log_model(
         artifact_path="model",
         python_model=PickleWrapper(),
         artifacts={"model": "model.pkl"}
    )
    
    run_id = run.info.run_id

print("Model logged under run_id:", run_id)

Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 2312.19it/s] 


🏃 View run simple_run at: http://localhost:5000/#/experiments/906782352771345596/runs/134278d1da014112acb561d8da3e01cf
🧪 View experiment at: http://localhost:5000/#/experiments/906782352771345596
Model logged under run_id: 134278d1da014112acb561d8da3e01cf
