In [1]:
import os
import mlflow
import optuna
import xgboost as xgb
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph
from typing import TypedDict
import networkx as nx
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# Global Configurations
MODEL_VERSION = "v1.0"
MODEL_ACCURACY = 0.0
EXPERIMENT_NAME = "MLOps_Agent"
DATA_PATH = "../ml/data/diabetes_prediction_dataset.csv"

In [9]:
# Load Dataset
df = pd.read_csv(DATA_PATH)
df = pd.get_dummies(df, columns=['smoking_history', 'gender'])
X = df.drop(columns=["diabetes"])
y = df["diabetes"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
llm = ChatOpenAI(model="gpt-4o", temperature=0.2)

In [11]:
# Hyperparameter Optimization Function
def objective(trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "learning_rate": trial.suggest_loguniform("learning_rate", 0.01, 0.3),
        "n_estimators": trial.suggest_int("n_estimators", 50, 300),
    }
    model = xgb.XGBClassifier(**params, use_label_encoder=False, eval_metric="logloss")
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    return accuracy_score(y_test, preds)

In [None]:
# Train Initial Model
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)
best_params = study.best_params
model = xgb.XGBClassifier(**best_params, use_label_encoder=False, eval_metric="logloss")
model.fit(X_train, y_train)
MODEL_ACCURACY = accuracy_score(y_test, model.predict(X_test))

In [13]:
# Log Model to MLflow
with mlflow.start_run():
    mlflow.log_params(best_params)
    mlflow.log_metric("accuracy", MODEL_ACCURACY)
    mlflow.xgboost.log_model(model, "model")

  self.get_booster().save_model(fname)


In [14]:
# MLOps Workflow Functions
def monitor_model(state):
    """Monitor model performance."""
    global MODEL_ACCURACY
    print(f"Monitoring Model: {MODEL_VERSION}, Accuracy: {MODEL_ACCURACY}")
    return {"status": "drift_detected"} if MODEL_ACCURACY < 0.75 else {"status": "model_healthy"}

def decide_retraining(state):
    """Decide if retraining is needed using an LLM."""
    response = llm.predict("The model accuracy dropped below the threshold. Should I retrain?")
    return {"status": "retrain"} if "yes" in response.lower() else {"status": "done"}

def retrain_model(state):
    """Retrain model with updated dataset & hyperparameters."""
    global MODEL_ACCURACY, MODEL_VERSION
    study.optimize(objective, n_trials=5)
    best_params = study.best_params
    new_model = xgb.XGBClassifier(**best_params, use_label_encoder=False, eval_metric="logloss")
    new_model.fit(X_train, y_train)
    MODEL_ACCURACY = accuracy_score(y_test, new_model.predict(X_test))
    MODEL_VERSION = "v" + str(int(MODEL_VERSION.split("v")[-1]) + 1)
    with mlflow.start_run():
        mlflow.log_params(best_params)
        mlflow.log_metric("new_accuracy", MODEL_ACCURACY)
        mlflow.xgboost.log_model(new_model, "model")
    return {"status": "deploy"}

def deploy_model(state):
    """Deploy the updated model."""
    print(f"Deploying Model {MODEL_VERSION} with Accuracy: {MODEL_ACCURACY}")
    return {"status": "done"}

def done(state):
    """End workflow."""
    print("MLOps Workflow Completed Successfully!")
    return state

In [15]:
# Define Workflow State Schema
class MLOpsState(TypedDict):
    status: str  # Workflow status (e.g., "monitoring", "retraining")

In [16]:
# Build Workflow with LangGraph
workflow = StateGraph(MLOpsState)
workflow.add_node("monitor", monitor_model)
workflow.add_node("decision", decide_retraining)
workflow.add_node("retrain", retrain_model)
workflow.add_node("deploy", deploy_model)
workflow.add_node("done", done)

workflow.set_entry_point("monitor")
workflow.add_conditional_edges("monitor", lambda state: "decision" if state["status"] == "drift_detected" else "done")
workflow.add_conditional_edges("decision", lambda state: "retrain" if state["status"] == "retrain" else "done")
workflow.add_edge("retrain", "deploy")
workflow.add_edge("deploy", "done")


<langgraph.graph.state.StateGraph at 0x204ca1b6930>

In [17]:
# Compile and Run Workflow
agent = workflow.compile()
output = agent.invoke({"status": "monitoring"})
print("MLOps Workflow Output:", output)

Monitoring Model: v1.0, Accuracy: 0.9726
MLOps Workflow Completed Successfully!
MLOps Workflow Output: {'status': 'model_healthy'}


In [None]:
# Visualize Workflow
edges = [("monitor", "decision"), ("monitor", "done"), ("decision", "retrain"),
         ("decision", "done"), ("retrain", "deploy"), ("deploy", "done")]
G = nx.DiGraph()
G.add_edges_from(edges)
plt.figure(figsize=(8, 5))
pos = nx.spring_layout(G, seed=42)
nx.draw(G, pos, with_labels=True, node_color="lightblue", edge_color="gray", node_size=2000, font_size=10, font_weight="bold", arrows=True)
plt.title("LangGraph MLOps Workflow")
plt.show()