In [4]:
import mlflow
from glob import glob
import datetime
import numpy as np
from typing import List
import json

In [None]:
exp_run_mappings['147575009518632148']

In [None]:
client=mlflow.MlflowClient()

In [None]:
def get_registered_models():
    register_model_details=[]
    for model in client.search_registered_models():
        name=model.name
        run_id=model.latest_versions[0].run_id
        # stage=model.latest_versions[0].current_stage
        date_updated=str(datetime.datetime.fromtimestamp(model.latest_versions[0].last_updated_timestamp/1e3)).split(' ')[0]
        register_model_details.append([name,date_updated,run_id])
    return register_model_details

In [None]:
get_registered_models()

In [None]:
class get_past_experiments_details():
    def __init__(self):
        self.client=mlflow.MlflowClient()
        self.generate_dates_to_exps_mappings()
        self.generate_exps_to_runs_mappings()

    def get_registered_models(self):
        register_model_details=[]
        for model in self.client.search_registered_models():
            name=model.name
            run_id=model.latest_versions[0].run_id
            stage=model.latest_versions[0].current_stage
            version=model.latest_versions[0].version
            date_updated=str(datetime.datetime.fromtimestamp(model.latest_versions[0].last_updated_timestamp/1e3)).split(' ')[0]
            register_model_details.append([name,version,stage,date_updated,run_id])
        return register_model_details

    def generate_dates_to_exps_mappings(self):
        self.exp_ids=[[x.experiment_id,str(datetime.datetime.fromtimestamp(x.last_update_time/1e3)).split(' ')[0]] for x in self.client.search_experiments()][:-1] # removing the default exp
        self.unique_dates=list(set([i[1] for i in self.exp_ids]))
        self.dates_to_exps={d:[] for d in self.unique_dates}
        for d in self.dates_to_exps:
            for i in self.exp_ids:
                if d in i:
                    self.dates_to_exps[d].append(i[0])
        self.experiment_ids=[ e for es in self.dates_to_exps.values() for e in es]

    def generate_exps_to_runs_mappings(self):
        self.exps_to_runs={y:[x.split('/')[-1] for x in glob(f"mlruns/{y}/*") if 'meta' not in x] for y in self.experiment_ids}
    
    def get_exp_and_run_losses_for_date_detailed(self,date:str):

        exps=self.dates_to_exps[date]

        losses_on_date={}
        for exp in exps:
            
            losses_on_date[exp]={}
            runs=self.exps_to_runs[exp]
            for run in runs:
                run_losses=self.get_run_losses_detailed(exp,run)
                losses_on_date[exp][run]={}
                losses_on_date[exp][run]=run_losses

        return losses_on_date
    
    def get_exp_and_run_losses_for_date_table(self,date:str):

        exps=self.dates_to_exps[date]

        losses_on_date={}
        for exp in exps:
            
            losses_on_date[exp]={}
            runs=self.exps_to_runs[exp]
            for run in runs:
                run_losses=self.get_run_losses_table(exp,run)
                losses_on_date[exp][run]={}
                losses_on_date[exp][run]=run_losses

        return losses_on_date

    def get_run_losses_detailed(self,exp_id,run_id):
        x=[]
        with open(f'mlruns/{exp_id}/{run_id}/metrics/train_loss') as f:
            x=f.read()
            train_losses={y[2]:y[1] for y in [y.split(' ') for y in x.split('\n')][:-1]}
        with open(f'mlruns/{exp_id}/{run_id}/metrics/val_loss') as f:
            x=f.read()
            val_losses={y[2]:y[1] for y in [y.split(' ') for y in x.split('\n')][:-1]}
        epochs=list(train_losses.keys())
        losses={}
        for e in epochs:
            losses[e]=[train_losses[e],val_losses[e]]
        return losses
    
    def get_run_losses_table(self,exp_id,run_id):
        x=[]
        with open(f'mlruns/{exp_id}/{run_id}/artifacts/comparison_table.json','r') as f:
            json_file=json.load(f)
            return [json_file["data"][0][2],json_file["data"][0][3]]
    
    def compare_losses(self,date,runs):
        losses_table=self.get_exp_and_run_losses_for_date_table(date)
        run_losses=dict(x for row in losses_table.values() for x in row.items())
        return {r:run_losses[r] for r in runs}

    def get_run_ids(self,exps):

        runs=[]
        for e in exps:
            runs.extend(self.exps_to_runs[e])
        return runs
    
    def get_exp_names(self,exp_ids):

        names=[]
        for exp_id in exp_ids:
            names.append(self.client.get_experiment(exp_id).name)
        return names
    
    def experiment_names_to_ids(self, names):

        exp_ids=[]
        for name in names:
            exp_ids.append(self.client.get_experiment_by_name(name).experiment_id)
        return exp_ids

In [None]:
e=get_past_experiments_details()
e.dates_to_exps

In [None]:
e.exps_to_runs

In [None]:
losses_table=e.get_exp_and_run_losses_for_date_table('2024-01-22')

In [None]:
losses=e.get_exp_and_run_losses_for_date_detailed('2024-01-22')

In [None]:
e.compare_losses('2024-01-22',['a0da97aacfa04a329147b9294e047c6e',
  'b966edaecd2e40d4b36f3f1c72d48276'])

In [None]:
client=mlflow.MlflowClient()

In [None]:
runs=['a0da97aacfa04a329147b9294e047c6e','b966edaecd2e40d4b36f3f1c72d48276']

In [None]:
str(datetime.datetime.fromtimestamp(client.get_run(runs[0]).info.end_time/1e3))

In [None]:
str(datetime.datetime.fromtimestamp(model.latest_versions[0].last_updated_timestamp/1e3)).split(' ')[0]

In [2]:
import mlflow

In [None]:
mlflow.projects.run(
uri=".",
run_name="nn",
entry_point="train",
backend='local',
synchronous=False,
env_manager='local',
parameters={
    'name':"as",
    'epochs':1,
    'trials':1
},
)

2024/01/22 16:54:17 INFO mlflow.projects.backend.local: === Asynchronously launching MLflow run with ID eabc0694a9d0456cbbc1cb69d3606c5e ===


<mlflow.projects.submitted_run.LocalSubmittedRun at 0x161d54be0>

* 'schema_extra' has been renamed to 'json_schema_extra'
2024/01/22 16:54:19 INFO mlflow.projects.utils: === Created directory /var/folders/k5/l531tc5j2070y5w0jlhbhfcw0000gn/T/tmpqtw7wdrw for downloading remote URIs passed to arguments of type 'path' ===
2024/01/22 16:54:19 INFO mlflow.projects.backend.local: === Running command 'python src/train.py as 1 1' in run with ID 'eabc0694a9d0456cbbc1cb69d3606c5e' === 
Seed set to 42
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


data/X.csv
Sanity Checking: |          | 0/? [00:00<?, ?it/s]


  | Name   | Type       | Params
--------------------------------------
0 | layers | Sequential | 295   
--------------------------------------
295       Trainable params
0         Non-trainable params
295       Total params
0.001     Total estimated model params size (MB)
/opt/homebrew/Caskroom/miniforge/base/envs/mlflow_k8s/lib/python3.8/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  loss = self.loss(preds, y)
/opt/homebrew/Caskroom/miniforge/base/envs/mlflow_k8s/lib/python3.8/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 0: 100%|██████████| 20/20 [00:16<00:00,  1.18it/s]                   
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/3 [00:00<?, ?it/s][A
Validation DataLoader 0:  33%|███▎      | 1/3 [00:00<00:00, 48.53it/s][A
Validation DataLoader 0:  67%|██████▋   | 2/3 [00:00<00:00, 76.22it/s][A
Validation DataLoader 0: 100%|██████████| 3/3 [00:00<00:00, 44.73it/s][A
Epoch 0: 100%|██████████| 20/20 [00:36<00:00,  0.54it/s]              [A


  loss = self.loss(preds, y)
`Trainer.fit` stopped: `max_epochs=1` reached.




 tensor(15199.9512) 


{'date': '2024-01-22', 'runID': ['2ad12d336b044bfa980b73e2dcd14f39'], 'train_loss': array(15199.951, dtype=float32), 'val_loss': array(9460.56, dtype=float32)}


2024/01/22 16:55:24 INFO mlflow.projects: === Run (ID 'eabc0694a9d0456cbbc1cb69d3606c5e') succeeded ===


In [5]:
client=mlflow.MlflowClient()

In [7]:
for x in client.search_experiments():
    print(datetime.datetime.fromtimestamp(x.creation_time/1e3))

2024-01-22 16:54:24.071000
2024-01-22 11:39:33.706000
2024-01-22 11:39:24.952000
2024-01-22 11:39:03.457000
2024-01-18 19:39:28.311000
2024-01-18 19:32:44.936000
2024-01-18 14:00:21.548000


In [8]:
client.search_experiments()

[<Experiment: artifact_location='file:///Users/anupam/Documents/Codebase/Studies/mlFlow-k8s-Fargate/mlruns/210312274051288785', creation_time=1705922664071, experiment_id='210312274051288785', last_update_time=1705922664071, lifecycle_stage='active', name='as', tags={}>,
 <Experiment: artifact_location='file:///mlflow/tmp/mlruns/147575009518632148', creation_time=1705903773706, experiment_id='147575009518632148', last_update_time=1705903773706, lifecycle_stage='active', name='qwdawd', tags={}>,
 <Experiment: artifact_location='file:///mlflow/tmp/mlruns/432371529360376795', creation_time=1705903764952, experiment_id='432371529360376795', last_update_time=1705903764952, lifecycle_stage='active', name='sd', tags={}>,
 <Experiment: artifact_location='file:///mlflow/tmp/mlruns/153729051477138785', creation_time=1705903743457, experiment_id='153729051477138785', last_update_time=1705903743457, lifecycle_stage='active', name='sdf', tags={}>,
 <Experiment: artifact_location='file:///mlflow/tmp

In [1]:
import boto3

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
AK=os.environ["AK"]
SK=os.environ["SK"]

In [5]:
from utils.upload_to_s3 import upload_recursively_to_s3
upload_recursively_to_s3("data",AK,SK)

Searching "data/X.csv" in "mlops-optuna"
Searching "data/y.csv" in "mlops-optuna"


In [8]:
for root, dirs, files in os.walk("data"):

    for filename in files:
            
        print("\n\n")
        # construct the full local path
        local_path = os.path.join(root, filename)
        print("local path",local_path)
        # construct the full Dropbox path
        relative_path = os.path.relpath(local_path, "data")
        print("relative path",relative_path)
        s3_path = os.path.join("data", relative_path)
        print("s3 path",s3_path)




local path data/X.csv
relative path X.csv
s3 path data/X.csv



local path data/y.csv
relative path y.csv
s3 path data/y.csv


In [9]:
import mlflow
m=mlflow.MlflowClient()

In [15]:
exp=m.get_experiment_by_name(name="s3_trial")

In [34]:
from datetime import datetime

(datetime.now()- datetime.fromtimestamp(exp.last_update_time/1e3)).seconds/60

33.4

In [16]:
import mlflow
c=mlflow.MlflowClient()

run=c.get_run('1e6e3e0d804245b4b17d833b267f8cba').info.run_name

In [13]:
from src.analyze_runs import MLFlow_app_client

e=MLFlow_app_client()

In [17]:
e.experiment_names_to_ids(['s3_trial'])

['422451744140292354']

In [20]:
filtered_runs,run_names=e.get_run_names_in_exp(['422451744140292354'])



 ['730a35a1394b4e77910db4ce65e876e1', '1e6e3e0d804245b4b17d833b267f8cba', '6c92da5031454a0f8a4a56e8e8a845cf', '0860bfa405ed47fd8632043bb0233cd0', '9f4068fd078d4f8992a1621a69f5fd4b', 'ef2389883a754f21b7e3ff89efd90fce'] 




In [24]:
filtered_runs

['730a35a1394b4e77910db4ce65e876e1',
 '1e6e3e0d804245b4b17d833b267f8cba',
 '6c92da5031454a0f8a4a56e8e8a845cf',
 '0860bfa405ed47fd8632043bb0233cd0',
 '9f4068fd078d4f8992a1621a69f5fd4b',
 'ef2389883a754f21b7e3ff89efd90fce']

In [25]:
run_names

['version_0.31_0.08_6_107',
 'version_0.36_0.09_51_21_71_10_17',
 'version_0.56_0.06_6_9_19_5_28',
 'version_0.49_0.03_85_11_11_17_34',
 'version_0.6_0.08_25_4_55_62',
 'version_0.57_0.07_9_28_32_16']

In [21]:
runs_selected=['version_0.36_0.09_51_21_71_10_17','version_0.56_0.06_6_9_19_5_28']

In [22]:
selected_run_ids=[]
for name,run_id in zip(run_names,filtered_runs):
    if name in runs_selected:
        selected_run_ids.append(run_id)

In [23]:
selected_run_ids

['1e6e3e0d804245b4b17d833b267f8cba', '6c92da5031454a0f8a4a56e8e8a845cf']

In [27]:
loss_table=e.compare_losses('2024-01-25',selected_run_ids)

In [28]:
loss_table

{'1e6e3e0d804245b4b17d833b267f8cba': [15219.341796875, 9746.0107421875],
 '6c92da5031454a0f8a4a56e8e8a845cf': [15195.724609375, 9489.216796875]}

In [11]:
import re
with open("../k8s-deployment.yaml","r") as f:
    k8s_yaml=f.read()

image_version=re.findall('mlops-webapp:v\d+\.\d+\.\d+',k8s_yaml)

In [12]:
image_version

[]