In [1]:
import mlflow
import optuna
import lightning.pytorch as pl
import json
from src.datawork import data_module
from src.neural_network import NN
from lightning.pytorch.callbacks import Callback
from lightning.pytorch.loggers import MLFlowLogger

# override Optuna's default logging to ERROR only
optuna.logging.set_verbosity(optuna.logging.ERROR)

with open("config.json","r") as f:
    configs=json.load(f)

RANDOM_SEED:int=configs["RANDOM STATE"]
EPOCHS:int=configs["EPOCHS"]
TRIALS:int=configs["TRIALS"]
EXPERIMENT_NAME="Changing to experiment.log"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# class log_losses(Callback):

#     def on_train_epoch_end(self, trainer, pl_module):
#         mlflow.log_metric('train_loss_epochs', trainer.logged_metrics['train_loss'])
#     def on_validation_epoch_end(self, trainer, pl_module):
#         mlflow.log_metric('val_loss_epochs', trainer.logged_metrics['val_loss'])

In [3]:
def objective(trial):

    with mlflow.start_run(nested=True):

        # We optimize the number of layers, hidden units in each layer, dropout and the learning rate.
        n_layers = trial.suggest_int("n_layers", 1, 3)
        dropout = trial.suggest_float("dropout", 0.2, 0.5)
        lr = trial.suggest_float("learning_rate",1e-5,1e-1)

        output_dims = [
            trial.suggest_int(f"n_units_l{i}", 4, 128, log=True) for i in range(n_layers)
        ]

        # od="_".join(str(x) for x in output_dims)
        # version = f"version_{round(dropout,2)}_{round(lr,2)}_{od}"

        pl.seed_everything(RANDOM_SEED, workers=True) # Setting seed for execution
        data=data_module(batch_size=4,seed=RANDOM_SEED)
        model = NN(dropout, output_dims,lr)

        mlf_logger = MLFlowLogger(experiment_name=EXPERIMENT_NAME) #, tracking_uri="file:./ml-runs")

        trainer = pl.Trainer(
            logger=mlf_logger,
            deterministic=True,
            enable_checkpointing=False,
            max_epochs=EPOCHS,
            # callbacks=[log_losses()],
            default_root_dir="./"
        )
        hyperparameters = dict(n_layers=n_layers, dropout=dropout, output_dims=output_dims, lr=lr)
        trainer.fit(model,data)
        error = trainer.callback_metrics["val_loss"].item()
        # Log to MLflow
        # mlflow.log_metric("mse", error)
        # mlflow.log_params(hyperparameters)

    return error

In [4]:
def get_or_create_experiment(experiment_name:str):

    if experiment := mlflow.get_experiment_by_name(experiment_name):
        return experiment.experiment_id
    else:
        return mlflow.create_experiment(experiment_name)

experiment_id = get_or_create_experiment(EXPERIMENT_NAME)
experiment_id

# Set the current active MLflow experiment
mlflow.set_experiment(experiment_id=experiment_id)

<Experiment: artifact_location='file:///Users/anupam/Documents/Codebase/Studies/mlFlow-k8s-Fargate/mlruns/417464982641497498', creation_time=1705569961710, experiment_id='417464982641497498', last_update_time=1705569961710, lifecycle_stage='active', name='Changing to experiment.log', tags={}>

In [5]:
with mlflow.start_run(experiment_id=experiment_id, run_name="Experiment run", nested=True):
    # Initialize the Optuna study
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=TRIALS) #, callbacks=[champion_callback])

    mlflow.log_params(study.best_params)
    mlflow.log_metric("Lowest val loss", study.best_value)

    # Log tags
    mlflow.set_tags(
        tags={
            "project": "Apple Demand Project",
            "optimizer_engine": "optuna",
            "model_family": "pytorch lightning",
            "feature_set_version": 1,
        }
    )

    # Log a fit model instance
    # model = NN(**study.best_params)
    # artifact_path = "model"

    # d=data_module(batch_size=4,seed=42)
    # d.setup()
    # inp_example=next(iter(d.train_dataset))[0]

    # mlflow.pytorch.log_model(
    #     # xgb_model=model,
    #     artifact_path=artifact_path,
    #     input_example=inp_example,
    #     model_format="pt",
    #     metadata={"model_data_version": 1},
    # )

    # Get the logged model uri so that we can load it from the artifact store
    # model_uri = mlflow.get_artifact_uri(artifact_path)

Seed set to 42
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name   | Type       | Params
--------------------------------------
0 | layers | Sequential | 1.5 K 
--------------------------------------
1.5 K     Trainable params
0         Non-trainable params
1.5 K     Total params
0.006     Total estimated model params size (MB)


data/X.csv
Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/opt/homebrew/Caskroom/miniforge/base/envs/mlflow_k8s/lib/python3.8/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                           

  loss = self.loss(preds, y)
/opt/homebrew/Caskroom/miniforge/base/envs/mlflow_k8s/lib/python3.8/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
/opt/homebrew/Caskroom/miniforge/base/envs/mlflow_k8s/lib/python3.8/site-packages/lightning/pytorch/loops/fit_loop.py:293: The number of training batches (20) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

AttributeError: 'MlflowClient' object has no attribute 'log_params'