In [2]:
import mlflow
from glob import glob
import datetime
import numpy as np
from typing import List
import json

In [35]:
exp_run_mappings['147575009518632148']

['a0da97aacfa04a329147b9294e047c6e', 'b966edaecd2e40d4b36f3f1c72d48276']

In [43]:
client=mlflow.MlflowClient()

In [44]:
def get_registered_models():
    register_model_details=[]
    for model in client.search_registered_models():
        name=model.name
        run_id=model.latest_versions[0].run_id
        # stage=model.latest_versions[0].current_stage
        date_updated=str(datetime.datetime.fromtimestamp(model.latest_versions[0].last_updated_timestamp/1e3)).split(' ')[0]
        register_model_details.append([name,date_updated,run_id])
    return register_model_details

In [45]:
get_registered_models()

[['Dummy1', '2024-01-22', 'a0da97aacfa04a329147b9294e047c6e']]

In [323]:
class get_past_experiments_details():
    def __init__(self):
        self.client=mlflow.MlflowClient()
        self.generate_dates_to_exps_mappings()
        self.generate_exps_to_runs_mappings()

    def get_registered_models(self):
        register_model_details=[]
        for model in self.client.search_registered_models():
            name=model.name
            run_id=model.latest_versions[0].run_id
            stage=model.latest_versions[0].current_stage
            version=model.latest_versions[0].version
            date_updated=str(datetime.datetime.fromtimestamp(model.latest_versions[0].last_updated_timestamp/1e3)).split(' ')[0]
            register_model_details.append([name,version,stage,date_updated,run_id])
        return register_model_details

    def generate_dates_to_exps_mappings(self):
        self.exp_ids=[[x.experiment_id,str(datetime.datetime.fromtimestamp(x.last_update_time/1e3)).split(' ')[0]] for x in self.client.search_experiments()][:-1] # removing the default exp
        self.unique_dates=list(set([i[1] for i in self.exp_ids]))
        self.dates_to_exps={d:[] for d in self.unique_dates}
        for d in self.dates_to_exps:
            for i in self.exp_ids:
                if d in i:
                    self.dates_to_exps[d].append(i[0])
        self.experiment_ids=[ e for es in self.dates_to_exps.values() for e in es]

    def generate_exps_to_runs_mappings(self):
        self.exps_to_runs={y:[x.split('/')[-1] for x in glob(f"mlruns/{y}/*") if 'meta' not in x] for y in self.experiment_ids}
    
    def get_exp_and_run_losses_for_date_detailed(self,date:str):

        exps=self.dates_to_exps[date]

        losses_on_date={}
        for exp in exps:
            
            losses_on_date[exp]={}
            runs=self.exps_to_runs[exp]
            for run in runs:
                run_losses=self.get_run_losses_detailed(exp,run)
                losses_on_date[exp][run]={}
                losses_on_date[exp][run]=run_losses

        return losses_on_date
    
    def get_exp_and_run_losses_for_date_table(self,date:str):

        exps=self.dates_to_exps[date]

        losses_on_date={}
        for exp in exps:
            
            losses_on_date[exp]={}
            runs=self.exps_to_runs[exp]
            for run in runs:
                run_losses=self.get_run_losses_table(exp,run)
                losses_on_date[exp][run]={}
                losses_on_date[exp][run]=run_losses

        return losses_on_date

    def get_run_losses_detailed(self,exp_id,run_id):
        x=[]
        with open(f'mlruns/{exp_id}/{run_id}/metrics/train_loss') as f:
            x=f.read()
            train_losses={y[2]:y[1] for y in [y.split(' ') for y in x.split('\n')][:-1]}
        with open(f'mlruns/{exp_id}/{run_id}/metrics/val_loss') as f:
            x=f.read()
            val_losses={y[2]:y[1] for y in [y.split(' ') for y in x.split('\n')][:-1]}
        epochs=list(train_losses.keys())
        losses={}
        for e in epochs:
            losses[e]=[train_losses[e],val_losses[e]]
        return losses
    
    def get_run_losses_table(self,exp_id,run_id):
        x=[]
        with open(f'mlruns/{exp_id}/{run_id}/artifacts/comparison_table.json','r') as f:
            json_file=json.load(f)
            return [json_file["data"][0][2],json_file["data"][0][3]]
    
    def compare_losses(self,date,runs):
        losses_table=self.get_exp_and_run_losses_for_date_table(date)
        run_losses=dict(x for row in losses_table.values() for x in row.items())
        return {r:run_losses[r] for r in runs}

    def get_run_ids(self,exps):

        runs=[]
        for e in exps:
            runs.extend(self.exps_to_runs[e])
        return runs
    
    def get_exp_names(self,exp_ids):

        names=[]
        for exp_id in exp_ids:
            names.append(self.client.get_experiment(exp_id).name)
        return names
    
    def experiment_names_to_ids(self, names):

        exp_ids=[]
        for name in names:
            exp_ids.append(self.client.get_experiment_by_name(name).experiment_id)
        return exp_ids

In [324]:
e=get_past_experiments_details()
e.dates_to_exps

{'2024-01-22': ['147575009518632148',
  '432371529360376795',
  '153729051477138785'],
 '2024-01-18': ['737505758160016794', '765617377597547522']}

In [325]:
e.exps_to_runs

{'147575009518632148': ['a0da97aacfa04a329147b9294e047c6e',
  'b966edaecd2e40d4b36f3f1c72d48276'],
 '432371529360376795': ['1794fe922c7c45fd853f963d72ef48cf'],
 '153729051477138785': ['f8b2fec17ec247769493788b99d13bd7'],
 '737505758160016794': ['ec09f34cda7a4c26a0bfd3ac32b06d27',
  'ac75b7a4aa7f4e309f12b2d784ff322d',
  'a1d1fa7cc7a5492786015b51c59c86dd',
  '861c51cb34c44d088fac890c29d5d605'],
 '765617377597547522': ['8b03eae6293a4e48b00d714500d3e489',
  '973abe2fa3c945ceb8542bda25a8226f',
  'dc85bb2d59f64f7eb480f743ca166e8e']}

In [326]:
losses_table=e.get_exp_and_run_losses_for_date_table('2024-01-22')

In [327]:
losses=e.get_exp_and_run_losses_for_date_detailed('2024-01-22')

In [328]:
e.compare_losses('2024-01-22',['a0da97aacfa04a329147b9294e047c6e',
  'b966edaecd2e40d4b36f3f1c72d48276'])

{'a0da97aacfa04a329147b9294e047c6e': [15357.1767578125, 9365.21484375],
 'b966edaecd2e40d4b36f3f1c72d48276': [15258.109375, 9090.5361328125]}

In [3]:
client=mlflow.MlflowClient()

In [31]:
runs=['a0da97aacfa04a329147b9294e047c6e','b966edaecd2e40d4b36f3f1c72d48276']

In [37]:
str(datetime.datetime.fromtimestamp(client.get_run(runs[0]).info.end_time/1e3))

'2024-01-22 11:39:44.762000'

1705903784762

In [26]:
str(datetime.datetime.fromtimestamp(model.latest_versions[0].last_updated_timestamp/1e3)).split(' ')[0]

'2024-01-22'

In [None]:
last_updated_timestamp, name, run_id