In [25]:
# perfect
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import mlflow
import mlflow.sklearn

# Load your dataset from Excel
df = pd.read_excel("./Employeedata/Employee Sample Data.xlsx")

# Select the column for anomaly detection
column_name = 'Bonus %'
data = df[[column_name]]

# Split the data into training and testing sets
X_train, X_test = train_test_split(data, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define different parameters for the Isolation Forest model
contamination_values = [0.01, 0.05, 0.1]
random_state_values = [42, 55, 123]

# Iterate over parameter combinations
for contamination in contamination_values:
    for random_state in random_state_values:
        # Train the Isolation Forest model
        model = IsolationForest(contamination=contamination, random_state=random_state)
        model.fit(X_train_scaled)

        # Predict anomalies on the test set
        y_pred = model.predict(X_test_scaled)

        # Add a new column 'Anomaly' to the DataFrame
        X_test['Anomaly'] = y_pred

        # Display the anomalies
        anomalies = X_test[X_test['Anomaly'] == -1]
        print(f"Anomalies for contamination={contamination}, random_state={random_state}:")
        print(anomalies)

        # MLflow tracking for each run
        with mlflow.start_run():
            # Log parameters
            mlflow.log_param("contamination", contamination)
            mlflow.log_param("random_state", random_state)

            # Log the model
            mlflow.sklearn.log_model(model, "model")

            # Save the scaler
            mlflow.sklearn.log_model(scaler, "scaler")


Anomalies for contamination=0.01, random_state=42:
Empty DataFrame
Columns: [Bonus %, Anomaly]
Index: []




Anomalies for contamination=0.01, random_state=55:
Empty DataFrame
Columns: [Bonus %, Anomaly]
Index: []




Anomalies for contamination=0.01, random_state=123:
Empty DataFrame
Columns: [Bonus %, Anomaly]
Index: []




Anomalies for contamination=0.05, random_state=42:
    Bonus %  Anomaly
18     24.0       -1




Anomalies for contamination=0.05, random_state=55:
    Bonus %  Anomaly
19     18.0       -1
68     35.0       -1
18     24.0       -1




Anomalies for contamination=0.05, random_state=123:
     Bonus %  Anomaly
19      18.0       -1
114     37.0       -1
68      35.0       -1
18      24.0       -1




Anomalies for contamination=0.1, random_state=42:
     Bonus %  Anomaly
19      18.0       -1
114     37.0       -1
68      35.0       -1
25      23.0       -1
18      24.0       -1




Anomalies for contamination=0.1, random_state=55:
     Bonus %  Anomaly
19      18.0       -1
114     37.0       -1
68      35.0       -1
25      23.0       -1
18      24.0       -1




Anomalies for contamination=0.1, random_state=123:
     Bonus %  Anomaly
19      18.0       -1
114     37.0       -1
68      35.0       -1
25      23.0       -1
18      24.0       -1




In [16]:
logged_model = 'runs:/e2185fa7dea949719cc6b56dafe59eb9/scaler'

# Load model
loaded_model = mlflow.sklearn.load_model(logged_model)

In [17]:
result = mlflow.register_model(
    "runs:/e2185fa7dea949719cc6b56dafe59eb9/scaler", "anomaly detection"
)

Successfully registered model 'anomaly detection'.
Created version '1' of model 'anomaly detection'.


In [18]:

logged_model = 'runs:/e2185fa7dea949719cc6b56dafe59eb9/model'

logged_model = 'runs:/runid/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.

loaded_model.predict(pd.DataFrame(data))



array([-1,  1, -1, -1,  1,  1, -1,  1, -1,  1, -1,  1, -1, -1,  1, -1, -1,
       -1, -1, -1, -1, -1, -1,  1, -1, -1,  1, -1, -1, -1,  1,  1,  1,  1,
        1,  1,  1, -1,  1,  1,  1, -1, -1,  1, -1,  1, -1, -1,  1, -1,  1,
        1, -1,  1, -1,  1,  1,  1,  1, -1, -1,  1, -1, -1,  1,  1,  1, -1,
       -1,  1,  1, -1, -1,  1, -1,  1,  1, -1,  1, -1,  1, -1,  1, -1, -1,
        1, -1,  1,  1,  1, -1, -1,  1,  1,  1, -1, -1,  1, -1, -1,  1, -1,
        1, -1,  1, -1,  1,  1,  1, -1,  1, -1,  1, -1, -1, -1, -1,  1,  1,
       -1,  1,  1,  1, -1,  1,  1,  1,  1,  1, -1, -1,  1, -1,  1, -1,  1,
       -1, -1,  1,  1,  1, -1, -1,  1,  1, -1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  1, -1, -1, -1,  1, -1,  1,  1,  1, -1, -1, -1,  1, -1, -1,
        1,  1, -1, -1, -1, -1, -1, -1,  1, -1,  1,  1,  1,  1,  1,  1,  1,
        1, -1,  1, -1,  1, -1, -1,  1, -1,  1, -1,  1, -1,  1,  1, -1,  1,
       -1,  1,  1, -1,  1, -1,  1, -1,  1, -1,  1,  1,  1, -1,  1,  1, -1,
        1,  1,  1,  1,  1