In [3]:
import sys
import os
from typing import Tuple
import pandas as pd 
import mlflow as mf

sys.path.append("/home/jupyter-aamir09/mlops2_with_dagster/")
from mlops2_with_dagster import encoder_pipeline
from pathlib import Path
from joblib import dump, load
from hamilton import driver, base
from mlops2_with_dagster.utils import get_project_dir, printse
from sklearn.preprocessing import (
    StandardScaler,
    LabelEncoder
)

from ortho import BaseKernel
from ortho.utils import Logger
from ortho.ortho.decorators import task
from ortho.ortho.callbacks import MlFlowCallBack, LoggerCallBack


logger = Logger().logger()

   
ARTIFACT_PATH = "/home/jupyter-aamir09/mlops2_with_dagster/artifacts/mlflow_artfacts"
LOGGER_FOLDER_PATH = "/home/jupyter-aamir09/mlops2_with_dagster/notebooks"
        
        

In [3]:
project = 'mlops2_with_dagster'
project_dir = get_project_dir(project)
printse(f'project_dir: {project_dir}')

>>>> project_dir: /home/jupyter-aamir09/mlops2_with_dagster


In [8]:
class TargetExtractor(BaseKernel):
    
    def __init__(self, callbacks,  experiment_name, run_name, load_from_artifact=False):
        super().__init__(callbacks,
                       experiment_name,
                       run_name,
                       load_from_artifact)
        
        self.index_col = 'passengerid'
        self.target_col = "survived"
        self.cat_cols = ["sex", "cabin", "embarked"]
        self.config = {
            'index_column': self.index_col,
            'target_column': self.target_col,
            'categorical_columns': self.cat_cols
        }
    
    @task(build_on_previous_run=True, end_mlflow_run=False)       
    def get_target(self, df_train):
        encode_dr = driver.Driver(self.config, encoder_pipeline)
        output_nodes = ['target']
        
        out = encode_dr.execute(output_nodes ,
        inputs = dict(
            df_train = df_train
        )         
        )
        
        data = {"target": out }
        payload = {"artifact_path": ARTIFACT_PATH}
        
        return data, payload
    
    def run(self, *args, **kwargs):
        
        return self.get_target(*args, **kwargs)
        
        
    

In [5]:
train_data : Path = project_dir/"data/train.csv"
test_data : Path = project_dir/"data/test.csv"

In [6]:
# parameters
df_train = pd.read_csv(train_data)

In [9]:
import dagstermill
extractor = TargetExtractor(experiment_name="Mlflow_with_Dagster",
                          run_name="BadamBhum",
                          callbacks = [LoggerCallBack(LOGGER_FOLDER_PATH, kernel_names=["get_target"]),
                                       MlFlowCallBack(kernel_names=["get_target"])])

# mf.end_run()
dagstermill.yield_result(extractor.run(df_train=df_train)[0]["target"], output_name="target")

2023-09-13 12:57:39,632 - DEBUG - Starting new HTTP connection (1): 127.0.0.1:5002
2023-09-13 12:57:39,881 - DEBUG - http://127.0.0.1:5002 "GET /api/2.0/mlflow/experiments/get-by-name?experiment_name=Mlflow_with_Dagster HTTP/1.1" 200 241
2023-09-13 12:57:39,886 - DEBUG - Resetting dropped connection: 127.0.0.1
2023-09-13 12:57:39,893 - DEBUG - http://127.0.0.1:5002 "GET /api/2.0/mlflow/runs/get?run_uuid=5724c3c9324b4c2cb39ba5b00576a284&run_id=5724c3c9324b4c2cb39ba5b00576a284 HTTP/1.1" 200 1576
2023-09-13 12:57:39,899 - DEBUG - Resetting dropped connection: 127.0.0.1
2023-09-13 12:57:39,914 - DEBUG - http://127.0.0.1:5002 "POST /api/2.0/mlflow/runs/update HTTP/1.1" 200 423
2023-09-13 12:57:39,916 - DEBUG - Resetting dropped connection: 127.0.0.1
2023-09-13 12:57:39,921 - DEBUG - http://127.0.0.1:5002 "GET /api/2.0/mlflow/runs/get?run_uuid=5724c3c9324b4c2cb39ba5b00576a284&run_id=5724c3c9324b4c2cb39ba5b00576a284 HTTP/1.1" 200 1575
2023-09-13 12:57:39,922 - INFO - Successfuly initiated run

2023-09-13 12:57:40,759 - DEBUG - http://127.0.0.1:5002 "PUT /api/2.0/mlflow-artifacts/artifacts/1/5724c3c9324b4c2cb39ba5b00576a284/artifacts/training_outputs.pickle HTTP/1.1" 200 2
2023-09-13 12:57:40,763 - DEBUG - Resetting dropped connection: 127.0.0.1
2023-09-13 12:57:40,764 - DEBUG - http://127.0.0.1:5002 "PUT /api/2.0/mlflow-artifacts/artifacts/1/5724c3c9324b4c2cb39ba5b00576a284/artifacts/encoders.pickle HTTP/1.1" 200 2


Unnamed: 0_level_0,target
passengerid,Unnamed: 1_level_1
0,1
1,0
2,0
3,0
4,1
...,...
99995,1
99996,0
99997,0
99998,0
