In [1]:
import os
from pathlib import Path
import yaml

from datetime import datetime

In [2]:
mlflow_logs = os.path.join(Path(os.getcwd()).parents[0], 'mlflow_logs')

In [3]:
import os
import yaml

class Experiment:
    def __init__(self, folder_path):
        self.folder_path = folder_path
        self.experiment_id = os.path.basename(folder_path)
        self.meta = self.load_meta()
        self.traces = self.load_traces()

    def load_meta(self):
        meta_path = os.path.join(self.folder_path, "meta.yaml")
        if os.path.isfile(meta_path):
            with open(meta_path) as f:
                return yaml.safe_load(f)
        return {}

    def load_traces(self):
        traces = []
        traces_dir = os.path.join(self.folder_path, "traces")
        if not os.path.isdir(traces_dir):
            return traces

        for trace_id in os.listdir(traces_dir):
            trace_folder = os.path.join(traces_dir, trace_id)
            if os.path.isdir(trace_folder):
                traces.append(Trace(trace_folder))
        return traces

    @property
    def metadata(self):
        creation_time = self.meta.get("creation_time")
        readable_time = self._format_time(creation_time)
        return {
            "experiment_id": self.experiment_id,
            "creation_time": creation_time,
            "creation_time_str": readable_time,
            "trace_count": len(self.traces)
        }

    def _format_time(self, timestamp_ms):
        if not timestamp_ms:
            return "Unknown"
        dt = datetime.utcfromtimestamp(int(timestamp_ms) / 1000)
        return dt.strftime("%Y-%m-%d %H:%M:%S UTC")

    def __repr__(self):
        return (f"Experiment(id='{self.metadata['experiment_id']}', "
                f"traces={self.metadata['trace_count']}, "
                f"created='{self.metadata['creation_time_str']}')")


In [4]:
import os
import json

class Trace:
    def __init__(self, trace_folder):
        self.trace_folder = trace_folder
        self.trace_id = os.path.basename(trace_folder)
        self.data = self.load_trace()

    def load_trace(self):
        trace_file = os.path.join(self.trace_folder, "artifacts", "traces.json")
        if os.path.isfile(trace_file):
            with open(trace_file) as f:
                return json.load(f)
        return {}

    @property
    def metadata(self):
        spans = self.data.get("spans", [])
        start_time_ns = spans[0].get("start_time_unix_nano", 0) if spans else 0
        readable_time = self._format_time(start_time_ns)

        return {
            "trace_id": self.trace_id,
            "span_count": len(spans),
            "start_time_unix_nano": start_time_ns,
            "start_time_str": readable_time
        }

    def _format_time(self, timestamp_ns):
        if not timestamp_ns:
            return "Unknown"
        dt = datetime.utcfromtimestamp(int(timestamp_ns) / 1e9)
        return dt.strftime("%Y-%m-%d %H:%M:%S UTC")

    def __repr__(self):
        return (f"Trace(id='{self.metadata['trace_id']}', "
                f"spans={self.metadata['span_count']}, "
                f"start='{self.metadata['start_time_str']}')")



In [5]:
class Models:
    def __init__(self, folder_path):
        self.folder_path = folder_path
        # Can be extended to list models, versions, etc.


In [6]:
class MLflowLogs:
    def __init__(self, logs_dir):
        self.logs_dir = logs_dir
        self.experiments = []
        self.models = None
        self.load()

    def load(self):
        for folder in os.listdir(self.logs_dir):
            folder_path = os.path.join(self.logs_dir, folder)
            if self.is_experiment_folder(folder_path):
                self.experiments.append(Experiment(folder_path))
            elif folder == "models" and os.path.isdir(folder_path):
                self.models = Models(folder_path)

    def is_experiment_folder(self, folder_path):
        return (
            os.path.isdir(folder_path) and
            os.path.isfile(os.path.join(folder_path, "meta.yaml"))
        )

    def list_experiments(self):
        return [exp.experiment_id for exp in self.experiments]

    def get_experiment_by_id(self, exp_id):
        for exp in self.experiments:
            if exp.experiment_id == exp_id:
                return exp
        return None


In [7]:
mlflow_log_handler = MLflowLogs(mlflow_logs)

In [8]:
mlflow_log_handler.list_experiments()

['132899082267310288', '815316816171043374']

In [9]:
mlflow_log_handler.experiments

[Experiment(id='132899082267310288', traces=0, created='2025-06-27 14:23:30 UTC'),
 Experiment(id='815316816171043374', traces=24, created='2025-06-28 06:36:35 UTC')]

In [12]:
for exp in mlflow_log_handler.experiments:
    print(exp.metadata)
    print(10*'=')
    for trace in exp.load_traces():
        print(trace)

{'experiment_id': '132899082267310288', 'creation_time': 1751034210378, 'creation_time_str': '2025-06-27 14:23:30 UTC', 'trace_count': 0}
{'experiment_id': '815316816171043374', 'creation_time': 1751092595089, 'creation_time_str': '2025-06-28 06:36:35 UTC', 'trace_count': 24}
Trace(id='05b2cdf0c02049478b1b37399282d88c', spans=29, start='2025-06-29 14:16:51 UTC')
Trace(id='1eab952cc4694cb4b24322bb598660e3', spans=29, start='2025-07-03 07:26:51 UTC')
Trace(id='1eed59cbad4d48308680c48d5ec0e1aa', spans=8, start='2025-06-29 14:13:52 UTC')
Trace(id='378981db883d47b0877df0fe4b7903c0', spans=29, start='2025-07-01 16:21:58 UTC')
Trace(id='52c0ffea5bb148efb18513566d3554c4', spans=29, start='2025-06-29 14:11:19 UTC')
Trace(id='5fc1a238ed0443efab9dc12653f10075', spans=16, start='2025-06-28 09:25:03 UTC')
Trace(id='6eac28386a9d433283b1fd138a7f6f59', spans=8, start='2025-06-28 08:19:27 UTC')
Trace(id='7603e45cc04a44c7b8531b0ad623f4be', spans=29, start='2025-07-01 16:00:40 UTC')
Trace(id='7afd6f476b4