In [2]:
import os

# Path to the unstructured dataset
dataset_path = r"D:\mlops\Mlflow\mlflow\unstructured_dataset_mlflow\human"

# List all files and directories in the dataset path
files = []
for root, dirs, filenames in os.walk(dataset_path):
    for filename in filenames:
        files.append(os.path.join(root, filename))

print(f"Total files found: {len(files)}")


Total files found: 202


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from skimage.io import imread
from skimage.transform import resize
import numpy as np
import glob
import os

# Parameters
img_size = (64, 64)
n_estimators = 100
random_state = 42
max_depth = 5

# Load image file paths
files = glob.glob(r"D:\mlops\Mlflow\mlflow\unstructured_dataset_mlflow\human\**\*.jpg", recursive=True)

# Load and process images
X = []
y = []

for file_path in files:
    img = imread(file_path)
    img_resized = resize(img, img_size, anti_aliasing=True)
    X.append(img_resized.flatten())

    # Infer label from file name
    filename = os.path.basename(file_path).lower()
    if filename.startswith("rider"):
        y.append(0)
    elif filename.startswith("walker"):
        y.append(1)
    else:
        print(f"⚠️ Skipping unknown file: {file_path}")
        continue


X = np.array(X)
y = np.array(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

# Train model
model = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        41

    accuracy                           1.00        41
   macro avg       1.00      1.00      1.00        41
weighted avg       1.00      1.00      1.00        41



In [11]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from skimage.io import imread
from skimage.transform import resize
import numpy as np
import glob
import os
import mlflow
import mlflow.sklearn

# Parameters
img_size = (64, 64)
n_estimators = 100
random_state = 42
max_depth = 5

# Load image file paths
files = glob.glob(r"D:\mlops\Mlflow\mlflow\unstructured_dataset_mlflow\human\**\*.jpg", recursive=True)

# Load and process images
X, y = [], []
for file_path in files:
    img = imread(file_path)
    img_resized = resize(img, img_size, anti_aliasing=True)
    X.append(img_resized.flatten())

    # Infer label from filename
    filename = os.path.basename(file_path).lower()
    if filename.startswith("rider"):
        y.append(0)
    elif filename.startswith("walker"):
        y.append(1)
    else:
        print(f"⚠️ Skipping unknown file: {file_path}")

X = np.array(X)
y = np.array(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)

# MLflow experiment setup
mlflow.set_experiment("unsturctured_human_dataset_rf")

with mlflow.start_run() as run:
    # Train model
    model = RandomForestClassifier(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)
    model.fit(X_train, y_train)

    # Predict and evaluate
    y_pred = model.predict(X_test)

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    print("🔍 Classification Report:\n", classification_report(y_test, y_pred))
    print(f"✅ Accuracy: {acc:.4f}")
    print(f"✅ F1 Score: {f1:.4f}")
    print(f"✅ Precision: {precision:.4f}")
    print(f"✅ Recall: {recall:.4f}")

    # Log parameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("random_state", random_state)
    mlflow.log_param("img_size", img_size)

    # Log metrics
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)

    # Log model
    mlflow.sklearn.log_model(model, "model")

    # Print Run ID
    run_id = run.info.run_id
    print(f"\n📌 Run ID: {run_id}")


2025/07/13 17:17:00 INFO mlflow.tracking.fluent: Experiment with name 'unsturctured_human_dataset_rf' does not exist. Creating a new experiment.
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


🔍 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        41

    accuracy                           1.00        41
   macro avg       1.00      1.00      1.00        41
weighted avg       1.00      1.00      1.00        41

✅ Accuracy: 1.0000
✅ F1 Score: 0.0000
✅ Precision: 0.0000
✅ Recall: 0.0000





📌 Run ID: 92f07815b89946dca83a1335ca56dc7b


In [12]:
import subprocess

# Start the MLflow UI (run this in a terminal, not in a notebook cell)
# !mlflow ui

# If you want to run it from the notebook, use the following:
subprocess.Popen(["mlflow", "ui"])
print("MLflow UI started at http://localhost:5000")

MLflow UI started at http://localhost:5000
