In [None]:
!pip3 install pickle

In [1]:
import pandas as pd
import joblib
from datetime import datetime
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import gcsfs
import mlflow
from mlflow import MlflowClient
from mlflow.models import infer_signature
from pprint import pprint
from evidently import DataDefinition
from evidently import Report
from evidently.presets import DataDriftPreset, DataSummaryPreset
import os
import pickle

In [2]:
from check_drift import check_data_drift

In [3]:
check_data_drift()

--- Checking for Data Drift ---
Reference data loaded from: data/iris.csv
Current data loaded from: data/data.csv
Running Evidently report...
Drift report saved to artifacts/drift_report.html
Drift analysis completed successfully. ‚úì


In [4]:
from check_poisoning import find_suspicious_labels

Checking the data for poisoning of labels


In [5]:
find_suspicious_labels(data_path="data/iris.csv", k=5, threshold=0.5)

--- Checking for Suspicious Labels in: data/iris.csv ---

Report: Found 5 suspicious labels out of 150 total rows.
Suspicious row indices (first 10): [70, 72, 83, 106, 119]
---------------------------------------------------



[70, 72, 83, 106, 119]

In [6]:
data = pd.read_csv('data/iris.csv')
train, test = train_test_split(data, test_size = 0.4, stratify = data['species'], random_state = 42)
X_train = train[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y_train = train.species
X_test = test[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y_test = test.species

In [7]:
mlflow.set_tracking_uri("http://10.128.0.2:8100")
client = MlflowClient(mlflow.get_tracking_uri())
all_experiments = client.search_experiments()
print(all_experiments)

[<Experiment: artifact_location='mlflow-artifacts:/381516238085156523', creation_time=1765699297508, experiment_id='381516238085156523', last_update_time=1765699297508, lifecycle_stage='active', name='IRIS Classifier Test: MLFlow', tags={}>, <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1765697592288, experiment_id='0', last_update_time=1765697592288, lifecycle_stage='active', name='Default', tags={}>]


In [8]:
mlflow.set_experiment("IRIS Classifier Test: MLFlow")

<Experiment: artifact_location='mlflow-artifacts:/381516238085156523', creation_time=1765699297508, experiment_id='381516238085156523', last_update_time=1765699297508, lifecycle_stage='active', name='IRIS Classifier Test: MLFlow', tags={}>

In [9]:
params = {
    "max_depth": 2,
    "random_state": 1
}

In [12]:
mod_dt = DecisionTreeClassifier(**params)
mod_dt.fit(X_train, y_train)
prediction = mod_dt.predict(X_test)
accuracy_score = metrics.accuracy_score(prediction, y_test)
print(accuracy_score)
filename = 'artifacts/model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(mod_dt, file)
joblib.dump(mod_dt, "artifacts/model.joblib")

0.9166666666666666


['artifacts/model.joblib']

In [None]:
from check_fairness import check_model_fairness

In [None]:
check_model_fairness()

In [None]:
from generate_explanations import generate_shap_explanations

In [None]:
generate_shap_explanations()

In [13]:
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy_score)
    mlflow.set_tag("Training info", "Decision Tree First Run")
    signature = infer_signature(X_train, mod_dt.predict(X_train))
    
    model_info = mlflow.sklearn.log_model(
        sk_model = mod_dt,
        artifact_path = "iris_model",
        signature = signature,
        input_example = X_train,
        registered_model_name = "IRIS-classifier-dt"
    )

Registered model 'IRIS-classifier-dt' already exists. Creating a new version of this model...
2025/12/14 08:09:50 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: IRIS-classifier-dt, version 2


üèÉ View run dapper-ant-480 at: http://10.128.0.2:8100/#/experiments/381516238085156523/runs/b92597da9f0b4ecfa6ea6d03436a56d5
üß™ View experiment at: http://10.128.0.2:8100/#/experiments/381516238085156523


Created version '2' of model 'IRIS-classifier-dt'.


In [14]:
a = "Test change"