<a href="https://colab.research.google.com/github/aralzaabi1991-coder/Training-ENEC/blob/main/Copy_of_Copy_of_MF_FLOW_CODE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Install necessary libraries

!pip install mlflow evidently pandas scikit-learn numpy scipy



In [None]:

# 1. Install necessary libraries
!pip install mlflow pandas scikit-learn numpy scipy




In [None]:

# 2. Import libraries
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from scipy import stats # New import for statistical testing (replacement for Evidently)

# Set up MLFlow tracking URI (This acts as the "detailed lab notebook" [3])
mlflow.set_tracking_uri("file:///content/mlruns")

# --- Configuration ---
N_SAMPLES = 5000
SEED = 42
np.random.seed(SEED)

In [None]:
train_df.head()

NameError: name 'train_df' is not defined

In [None]:
print("Generating Initial Training Data (V1 Baseline)...")

# Features: Temp is moderate, Sensor_A is stable
temp_spring = np.random.normal(loc=20, scale=3, size=N_SAMPLES)
pressure_stable = np.random.normal(loc=100, scale=5, size=N_SAMPLES)
sensor_a_baseline = np.random.normal(loc=50, scale=4, size=N_SAMPLES)

# Simple rule for failure prediction
failure_probability = 1 / (1 + np.exp(-0.05 * (pressure_stable + sensor_a_baseline - 120)))
failure_spring = np.random.binomial(1, failure_probability)

# Create the training DataFrame
train_df = pd.DataFrame({
    'Temperature': temp_spring,
    'Pressure': pressure_stable,
    'Sensor_A': sensor_a_baseline,
    'Failure': failure_spring
})

X_train = train_df.drop('Failure', axis=1)
y_train = train_df['Failure']
print(f"Baseline Data Snapshot Size: {len(X_train)}")
#Step 3: Train and Log Model V1 using MLFlow
#We train the initial "Champion" model and log everything using MLFlow, creating reproducible model version one.
# 4. Start MLFlow Run for Model V1
with mlflow.start_run(run_name="Model_V1_Initial_Deployment") as run:

    # Log parameters
    mlflow.log_param("model_type", "Logistic Regression")

    # Train the model
    model_v1 = LogisticRegression(solver='liblinear', random_state=SEED)
    model_v1.fit(X_train, y_train)

    # Calculate initial performance (Simulating the initial 90% accuracy target)
    y_pred_v1 = model_v1.predict(X_train)
    accuracy_v1 = accuracy_score(y_train, y_pred_v1)

    # Log metrics
    mlflow.log_metric("training_accuracy", accuracy_v1)
    print(f"\nModel V1 Initial Accuracy (Champion): {accuracy_v1:.4f}")

    # Log the model artifact
    mlflow.sklearn.log_model(model_v1, "model_v1_artifact")
    mlflow.set_tag("status", "Champion_Deployed")
    print(f"MLFlow Run ID for Model V1: {run.info.run_id}")
#Step 4: Simulate Real-World Data and Data Drift
#We simulate the real world becoming "messy, unpredictable, and everchanging" by generating new live data that reflects a shiftâ€”like a heat wave hitting in the summer.
# 5. Generate Synthetic Live Data (Summer Data - with Drift)
N_LIVE_SAMPLES = 1000
print("\nGenerating Live Monitoring Data (Post-Drift)...")

# Features shift dramatically (Data Drift simulation)
# Temperature shifts up (Heat Wave)
temp_summer = np.random.normal(loc=35, scale=5, size=N_LIVE_SAMPLES)
# Sensor A also shifts due to high usage/stress
sensor_a_drifted = np.random.normal(loc=75, scale=6, size=N_LIVE_SAMPLES)
pressure_live = np.random.normal(loc=101, scale=5, size=N_LIVE_SAMPLES)

# Note: Underlying failure rule (concept) may also shift slightly
failure_probability_live = 1 / (1 + np.exp(-0.06 * (pressure_live + sensor_a_drifted - 130)))
failure_live = np.random.binomial(1, failure_probability_live)

live_df = pd.DataFrame({
    'Temperature': temp_summer,
    'Pressure': pressure_live,
    'Sensor_A': sensor_a_drifted,
    'Failure': failure_live
})

X_live = live_df.drop('Failure', axis=1)
y_live = live_df['Failure']


#Step 5: Custom Drift Detection (Replacing Evidently)
#Instead of using the dedicated drift detection package, we implement a custom statistical test (K-S test) to "constantly compare the new data coming in to the original data we trained on". We check for the change in distribution for critical sensors.
# 6. Check Model V1 Performance Decay
y_pred_live_v1 = model_v1.predict(X_live)
accuracy_live_v1 = accuracy_score(y_live, y_pred_live_v1)

# This simulates the model's accuracy tanking from 90% down to 75% [1]
print(f"\nPerformance Check: Model V1 Accuracy on Live Data: {accuracy_live_v1:.4f}")

# 7. Custom Statistical Drift Report (The "Early Warning System")
def check_for_drift_ks(reference_df, current_df, features, p_threshold=0.01):
    """Uses Kolmogorov-Smirnov test to check if feature distributions are different."""
    drift_alerts = {}
    print("\n--- Running Custom Statistical Drift Check (K-S Test Watchdog) ---")

    for feature in features:
        # K-S test compares two distributions; Null Hypothesis: they are the same.
        statistic, p_value = stats.ks_2samp(reference_df[feature], current_df[feature])

        # If p_value < threshold, we reject the null hypothesis, meaning drift is detected.
        is_drifted = p_value < p_threshold
        drift_alerts[feature] = is_drifted

        print(f"Feature: {feature} | K-S P-Value: {p_value:.6f} | Drift Detected: {is_drifted}")

        if is_drifted:
             # This functions as the report that is "screaming at us" [1]
             print(f"  *** ALERT: Massive distribution shift detected for {feature} (P-value below {p_threshold}) ***")

    return drift_alerts

# Execute the check for the critical sensor features
features_to_monitor = ['Temperature', 'Pressure', 'Sensor_A']
drift_results = check_for_drift_ks(X_train, X_live, features_to_monitor, p_threshold=0.01)

# Check if any drift was severe enough to trigger an alert
if accuracy_live_v1 < 0.85 and any(drift_results.values()): # Performance drop + Drift = Trigger
    print("\nALERT TRIGGERED: Data Drift Detected AND Performance Decay Observed. Time to retrain.")
    TRIGGER_RETRAIN = True
else:
    print("\nSystem stable. Monitoring continues.")
    TRIGGER_RETRAIN = False
#Step 6: Retraining and Champion vs. Challenger Deployment
#If the drift alert and performance drop are detected, retraining is triggered. We update the model using the new, recent data that "reflects the plant's current state".
if TRIGGER_RETRAIN:
    # 8. Retrain Model V2 using the new recent data
    print("\n--- Retraining Model V2 (Challenger) ---")

    # Combine V1 baseline data with the recent live data for comprehensive retraining
    X_retrain = pd.concat([X_train, X_live])
    y_retrain = pd.concat([y_train, y_live])

    with mlflow.start_run(run_name="Model_V2_Challenger") as run:

        mlflow.log_param("model_type", "Logistic Regression")

        # Train the Challenger Model
        model_v2 = LogisticRegression(solver='liblinear', random_state=SEED)
        model_v2.fit(X_retrain, y_retrain)

        # Calculate performance of V2 on the new, challenging live data
        y_pred_live_v2 = model_v2.predict(X_live)
        accuracy_live_v2 = accuracy_score(y_live, y_pred_live_v2)

        mlflow.log_metric("live_data_accuracy", accuracy_live_v2)
        print(f"Model V2 Accuracy on Live Data (Challenger): {accuracy_live_v2:.4f}")

        # Log the challenger model
        mlflow.sklearn.log_model(model_v2, "model_v2_artifact")
        mlflow.set_tag("status", "Challenger_Ready")

    # 9. Champion vs. Challenger Showdown [5]
    print("\n--- Champion vs. Challenger Showdown ---")
    print(f"Champion (V1) Performance: {accuracy_live_v1:.4f}")
    print(f"Challenger (V2) Performance: {accuracy_live_v2:.4f}")

    if accuracy_live_v2 > accuracy_live_v1:
        print("\nRESULT: The challenger wins! Model V2 is promoted into production.")
        # We promote Model V2 because we found it performs way better on the recent data [5].
        # Model V1 is safely archived as a safety net (rollback plan B) [3, 5].
    else:
        print("\nRESULT: Model V2 did not outperform V1. Sticking with Champion V1.")

else:
    print("\nNo retraining needed. The model continues to be a living, adapting system [5].")

Generating Initial Training Data (V1 Baseline)...
Baseline Data Snapshot Size: 5000


  return FileStore(store_uri, store_uri)



Model V1 Initial Accuracy (Champion): 0.8050




MLFlow Run ID for Model V1: 05b8d3d7d4cb4af1ad8e77c3a8fd2f86

Generating Live Monitoring Data (Post-Drift)...

Performance Check: Model V1 Accuracy on Live Data: 0.9470

--- Running Custom Statistical Drift Check (K-S Test Watchdog) ---
Feature: Temperature | K-S P-Value: 0.000000 | Drift Detected: True
  *** ALERT: Massive distribution shift detected for Temperature (P-value below 0.01) ***
Feature: Pressure | K-S P-Value: 0.000000 | Drift Detected: True
  *** ALERT: Massive distribution shift detected for Pressure (P-value below 0.01) ***
Feature: Sensor_A | K-S P-Value: 0.000000 | Drift Detected: True
  *** ALERT: Massive distribution shift detected for Sensor_A (P-value below 0.01) ***

System stable. Monitoring continues.

No retraining needed. The model continues to be a living, adapting system [5].
