In [1]:
# Parameters cells
WITNESS_NAME = "PPT"
SIMULATION_PATH = "./simulated_data"
MLFLOW_URL = "http://localhost:5000"
AIRFLOW_DAG_RUN_ID = None #"test-dm-chsh"
AWS_ACCESS_KEY_ID="minio123"
AWS_SECRET_ACCESS_KEY="minio123"
MLFLOW_S3_ENDPOINT_URL="http://localhost:9990"

In [2]:
from os import environ, path
import mlflow

environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID
environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY

mlflow.set_tracking_uri(MLFLOW_URL)
environ["MLFLOW_S3_ENDPOINT_URL"] = MLFLOW_S3_ENDPOINT_URL

In [3]:
import numpy as np
from sklearn.model_selection import train_test_split

simulation_file_path = path.join(SIMULATION_PATH, f"simulation-{WITNESS_NAME}.npz")

data = np.load(simulation_file_path)
simulated_states = data["states"].astype(np.float64)
simulated_labels = data["labels"].astype(np.float64)

X_train, X_val, y_train, y_val = train_test_split(simulated_states, simulated_labels, test_size=0.2, random_state=42)

In [4]:
from sklearn.metrics import accuracy_score
from tpot import TPOTClassifier

mlflow.set_experiment('ML Quantum Entanglement')

with mlflow.start_run() as run:
    pipeline_optimizer = TPOTClassifier(generations=5, population_size=20, cv=5, random_state=42, verbosity=2)
    pipeline_optimizer.fit(X_train, y_train)
    
    # Make predictions
    y_pred = pipeline_optimizer.predict(X_val)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_val, y_pred)
    
    # Log parameters
    mlflow.log_param("generations", 5)
    mlflow.log_param("population_size", 20)
    mlflow.log_param("random_state", 42)
    mlflow.log_param("witness", WITNESS_NAME)

    mlflow.set_tag("airflow_dag_run_id", f"{AIRFLOW_DAG_RUN_ID}-TPOT")
    mlflow.set_tag("pipeline_step", "training")
    
    # Log the accuracy metric
    mlflow.log_metric("accuracy", accuracy)
    
    # Log the model
    mlflow.sklearn.log_model(pipeline_optimizer.fitted_pipeline_, "model")
    model_name = f"QuantumStateClassifier-{WITNESS_NAME}-TPOT"
    model_uri = f"runs:/{run.info.run_id}/model"
    mlflow.register_model(model_uri, model_name)

    print(f"Model accuracy: {accuracy}")

Optimization Progress:   0%|          | 0/120 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 1.0

Generation 2 - Current best internal CV score: 1.0

Generation 3 - Current best internal CV score: 1.0

Generation 4 - Current best internal CV score: 1.0

Generation 5 - Current best internal CV score: 1.0

Best pipeline: ExtraTreesClassifier(input_matrix, bootstrap=False, criterion=entropy, max_features=0.5, min_samples_leaf=2, min_samples_split=10, n_estimators=100)


Successfully registered model 'QuantumStateClassifier-PPT-TPOT'.
2024/07/17 16:14:53 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: QuantumStateClassifier-PPT-TPOT, version 1


Model accuracy: 1.0


Created version '1' of model 'QuantumStateClassifier-PPT-TPOT'.
