In [1]:
%%capture
!pip install -r requirements.txt

In [5]:
import kfp
from kfp.dsl import component, pipeline, Input, Output, Dataset, Model, Metrics, ClassificationMetrics, Artifact
import os
from dotenv import load_dotenv

# Load environment variables from env file
load_dotenv('env')

# Github variables
GITHUB_USERNAME = os.getenv("GITHUB_USERNAME")
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
GITHUB_REPO_URL = "https://github.com/danilonicioka/mlops-workflow.git"
GITHUB_CLONED_DIR = "mlops-workflow"
GITHUB_TEST_BRANCH = "tests"

# Kubeflow variables
KUBEFLOW_PIPELINE_NAME = "mlops"
KUBEFLOW_HOST_URL = "http://ml-pipeline.kubeflow:8888"  # KFP host URL
KUBEFLOW_PIPELINE_ID="7451916e-eee8-4c14-ad5f-8dee5aa61e3b"
with open(os.environ['KF_PIPELINES_SA_TOKEN_PATH'], "r") as f:
    KUBEFLOW_TOKEN = f.read()

# DVC variables
REMOTE_NAME = "minio_remote"
REMOTE_URL = "s3://dvc-data"
MINIO_URL = "minio-service.kubeflow:9000"
ACCESS_KEY = os.getenv("ACCESS_KEY")
SECRET_KEY = os.getenv("SECRET_KEY")
DVC_FILE_DIR = 'data/external'
DVC_FILE_NAME = 'dataset.csv'
BUCKET_NAME = "model-files"
MODEL_OBJECT_NAME = "model-store/youtubegoes5g/model.pt"

TRIGGER_TYPE = '1'
PERFORMANCE_FACTOR = 5.0

# Model config
LR = 0.0001
EPOCHS = 3500
PRINT_FREQUENCY = 500

# Model serve config vars
MODEL_NAME = "youtubegoes5g"
FRAMEWORK = "pytorch"
NAMESPACE = "kubeflow-user-example-com"
SVC_ACC = "sa-minio-kserve"
#MODEL_URI = "pvc://model-store-claim"
#MODEL_URI = "minio-service.kubeflow:9000/model-files"

# Temp dir and files to save accuracy for trigger 3
TEMP_DIR = "tmp"
TEMP_FILE_ACC_IN_LAST_RUN = "accuracy_in_last_run.txt"
LAST_ACC_OBJECT_NAME = "accuracy-score/last_acc.txt"

# Define a KFP component factory function for data ingestion
@component(base_image="python:3.11.9",packages_to_install=['gitpython', 'dvc==3.54.1', 'dvc-s3==3.2.0', 'numpy==1.25.2', 'pandas==2.0.3'])
def data_ingestion(
    github_repo_url: str,
    github_cloned_dir: str,
    github_test_branch: str,
    github_username: str,
    github_token: str,
    remote_name: str,
    remote_url: str,
    minio_url: str,
    access_key: str,
    secret_key: str,
    dvc_file_dir: str,
    dvc_file_name: str,
    dataset_artifact: Output[Dataset],
    dataset_info: Output[Metrics]
    ):
    from git import Repo
    from subprocess import run, CalledProcessError
    import os
    import pandas as pd

    def clone_repository_with_token(github_repo_url, github_cloned_dir, github_test_branch, github_username, github_token):
        """Clone a Git repository using a GitHub token in the URL and specifying the branch."""
        try:
            # Construct the URL with the GitHub username and token
            url_with_token = f"https://{github_username}:{github_token}@{github_repo_url.split('//')[1]}"
            
            # Clone the repository from the specified branch
            repo = Repo.clone_from(url_with_token, github_cloned_dir, branch=github_test_branch)
            return "Repository cloned successfully"
        except Exception as e:
            return f"Error occurred during repository cloning: {e}"

    def configure_dvc_remote(github_cloned_dir, remote_name, remote_url, minio_url, access_key, secret_key):
        http_minio = f'http://{minio_url}'
        """Configure the Minio bucket as the DVC remote repository using the `dvc remote` commands."""
        try:
            # Add the remote
            run(
                ['dvc', 'remote', 'add', '-d', remote_name, remote_url],
                cwd=github_cloned_dir,
                capture_output=True,
                text=True,
                check=True
            )
            
            # Configure the endpoint URL
            run(
                ['dvc', 'remote', 'modify', remote_name, 'endpointurl', http_minio],
                cwd=github_cloned_dir,
                capture_output=True,
                text=True,
                check=True
            )
            
            # Configure access key ID
            run(
                ['dvc', 'remote', 'modify', remote_name, 'access_key_id', access_key],
                cwd=github_cloned_dir,
                capture_output=True,
                text=True,
                check=True
            )
            
            # Configure secret access key
            run(
                ['dvc', 'remote', 'modify', remote_name, 'secret_access_key', secret_key],
                cwd=github_cloned_dir,
                capture_output=True,
                text=True,
                check=True
            )
            
            return f'Successfully configured Minio bucket as DVC remote repository: {remote_name}'
        except CalledProcessError as e:
            # Log and raise any errors
            return f'Failed to configure DVC remote: {e.stderr}'

    def perform_dvc_pull(github_cloned_dir, remote_name):
        """Perform a DVC pull to synchronize local data with the remote repository."""
        try:
            # Run the `dvc pull` command
            result = run(['dvc', 'pull', '-r', remote_name], cwd=github_cloned_dir, capture_output=True, text=True)
            
            # Check if the command executed successfully
            if result.returncode != 0:
                # Log and raise an error if the command failed
                error_message = f"dvc pull failed with error: {result.stderr}"
                raise Exception(error_message)
            
            # Log successful operation
            return "Successfully pulled data from remote DVC repository"
            
        except Exception as e:
            # Log and handle the error
            return f"Error occurred during dvc pull: {e}"

    # Call the functions
    clone_result = clone_repository_with_token(github_repo_url, github_cloned_dir, github_test_branch, github_username, github_token)
    configure_result = configure_dvc_remote(github_cloned_dir, remote_name, remote_url, minio_url, access_key, secret_key)
    dvc_pull_result = perform_dvc_pull(github_cloned_dir, remote_name)

    # Save dataset with pandas in Dataset artifact
    pulled_dataset_path = os.path.join(github_cloned_dir, dvc_file_dir, dvc_file_name)
    tmp_dataset_path = "/tmp/" + dvc_file_name
    dataset = pd.read_csv(pulled_dataset_path)
    dataset.to_pickle(tmp_dataset_path)
    os.rename(tmp_dataset_path, dataset_artifact.path)
    
    # save dataset info to see on kubeflow graph
    dataset_info.log_metric("Dataset size", dataset.shape[0])
    
# Component for data preparation
@component(base_image="python:3.11.9", packages_to_install=['pandas==2.0.3', 'numpy==1.25.2', 'torch==2.3.0', 'scikit-learn==1.2.2', 'imblearn'])
def data_preparation(
    dataset_artifact: Input[Dataset],
    X_train_artifact: Output[Dataset], 
    X_test_artifact: Output[Dataset],
    y_train_artifact: Output[Dataset],
    y_test_artifact: Output[Dataset],
    test_size: float = 0.2, 
    random_state: int = 42
    ):
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from imblearn.over_sampling import SMOTE
    from sklearn.preprocessing import StandardScaler
    import torch
    import os

    # Load dataset from Dataset artifact
    df = pd.read_pickle(dataset_artifact.path)

    # Handle null values and replace specific characters
    #df = df.replace([' ', '-',np.nan], 0) # There are null values
    df = df.replace([' ', '-', np.nan], np.nan)

    # Selective columns for mean calculation
    columns_to_convert = [
        'CQI1', 'CQI2', 'CQI3', 'cSTD CQI', 'cMajority', 'c25 P', 'c50 P', 'c75 P', 
        'RSRP1', 'RSRP2', 'RSRP3', 'pMajority', 'p25 P', 'p50 P', 'p75 P', 
        'RSRQ1', 'RSRQ2', 'RSRQ3', 'qMajority', 'q25 P', 'q50 P', 'q75 P', 
        'SNR1', 'SNR2', 'SNR3', 'sMajority', 's25 P', 's50 P', 's75 P'
    ]
    df[columns_to_convert] = df[columns_to_convert].astype(float)

    # Replace np.nan with mean values for selective columns
    df[columns_to_convert] = df[columns_to_convert].fillna(df[columns_to_convert].mean())

    # Convert 'Stall' column to numerical values
    df['Stall'].replace({'Yes': 1, 'No': 0}, inplace=True)

    X = df[columns_to_convert].values
    y = df['Stall'].values

    # Apply SMOTE for balancing the dataset
    # oversample = SMOTE(random_state=random_state)
    oversample = SMOTE()
    X, y = oversample.fit_resample(X, y)

    # Standardize the features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # Convert to torch tensors
    X = torch.tensor(X, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)

    # Split the dataset into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    X_train_path = "/tmp/X_train.pt"
    X_test_path = "/tmp/X_test.pt"
    y_train_path = "/tmp/y_train.pt"
    y_test_path = "/tmp/y_test.pt"
    torch.save(X_train, X_train_path)
    os.rename(X_train_path, X_train_artifact.path)
    
    torch.save(X_test, X_test_path)
    os.rename(X_test_path, X_test_artifact.path)

    torch.save(y_train, y_train_path)
    os.rename(y_train_path, y_train_artifact.path)

    torch.save(y_test, y_test_path)
    os.rename(y_test_path, y_test_artifact.path)

# Component for model training
@component(base_image="python:3.11.9", packages_to_install=['torch==2.3.0', 'scikit-learn==1.2.2', 'numpy==1.25.2','Minio==7.2.5'])
def model_training(
    X_train_artifact: Input[Dataset], 
    X_test_artifact: Input[Dataset],
    y_train_artifact: Input[Dataset],
    y_test_artifact: Input[Dataset],
    metrics: Output[Metrics], 
    classification_metrics: Output[ClassificationMetrics], 
    model_trained_artifact: Output[Model],
    up_model_cond: Output[Artifact],
    lr: float,
    epochs: int,
    print_frequency: int,
    minio_url: str,
    access_key: str,
    secret_key: str,
    bucket_name: str,
    model_object_name: str,
    trigger_type: str,
    performance_factor: float,
    last_accuracy_object_name: str,
    tmp_dir: str,
    tmp_file_last_acc: str
    ):
    import os
    import torch
    from torch import nn
    from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
    from minio import Minio
    
    # Build model with non-linear activation function
    class InterruptionModel(nn.Module):
        def __init__(self):
            super().__init__()
            self.layer_1 = nn.Linear(in_features=29, out_features=200)
            self.layer_2 = nn.Linear(in_features=200, out_features=100)
            self.layer_3 = nn.Linear(in_features=100, out_features=1)
            self.relu = nn.ReLU() # <- add in ReLU activation function
            # Can also put sigmoid in the model
            # This would mean you don't need to use it on the predictions
            # self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            # Intersperse the ReLU activation function between layers
            return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))

    # Helper functions
    def accuracy_fn(y_true, y_pred):
        correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
        acc = (correct / len(y_pred)) * 100
        return acc

    def minio_setup(minio_url, access_key, secret_key):
        # Initialize Minio client with just the base URL (without path)
        client = Minio(
            minio_url,  # Ensure minio_url does not include a path, only the base URL (e.g., http://localhost:9000)
            access_key=access_key,
            secret_key=secret_key,
            secure=False  # Minio is using HTTP on localhost:9000
        )
        return client

    def upload_file(client, bucket_name, object_name, filepath):
        # Create the bucket if it does not exist
        if not client.bucket_exists(bucket_name):
            client.make_bucket(bucket_name)
            create_bucket_result = f"Successfully created bucket: {bucket_name}"
        else:
            create_bucket_result = f"Bucket {bucket_name} already exists"
    
        try:
            # Upload the file to the specified path in the bucket
            client.fput_object(bucket_name, object_name, filepath)
            return (f'Successfully uploaded {filepath} to {bucket_name}/{object_name}')

        except Exception as e:
            # Log and raise any upload errors
            raise Exception(f'Failed to upload model to Minio: {e}')

    def read_from_minio(client, bucket_name, object_name):
        """
        Function to read a file from a MinIO bucket and convert its single content to a float.
        If the file is not found or is empty, it returns 0.0.

        Args:
            client: minio client
            bucket_name (str): The name of the bucket in MinIO.
            object_name (str): The name of the object (file) in the bucket.

        Returns:
            float: The float value converted from the file content, or 0.0 if the file is not found or empty.
        """
        try:
            # Get the file from the MinIO bucket
            response = client.get_object(bucket_name, object_name)

            # Read the file content into a buffer
            file_data = response.read()

            # Decode file content and strip whitespace
            content = file_data.decode('utf-8').strip()

            # If the content is empty, return 0.0
            if not content:
                print(f"File {object_name} is empty.")
                return 0.0

            # Convert the content to a float
            float_value = float(content)
            return float_value

        except Exception as e:
            # Handle file not found or any other errors
            print(f"Error occurred: {e}")
            return 0.0

    def save_float_to_tempfile(float_value, dir_name, file_name):
        """
        Saves a float value to a specified directory and file name.

        Args:
            float_value (float): The float value to save.
            dir_name (str): The name of the directory to save the file in.
            file_name (str): The name of the file.
        
        Returns:
            str: The path to the file.
        """
        # Ensure the directory exists
        os.makedirs(dir_name, exist_ok=True)
        temp_file_path = os.path.join(dir_name, file_name)
        
        with open(temp_file_path, 'w') as temp_file:
            # Convert the float to a string, then write to file
            temp_file.write(str(float_value))
        
        return temp_file_path

    def get_accuracy_in_last_run(client, bucket_name, object_name):
        accuracy_in_last_run = read_from_minio(client, bucket_name, object_name)
        return accuracy_in_last_run

    def update_accuracy_in_last_run(client, bucket_name, object_name, new_value, tmp_dir, tmp_file):
        filepath = save_float_to_tempfile(new_value, tmp_dir, tmp_file)
        upload_file(client, bucket_name, object_name, filepath)

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = InterruptionModel().to(device)

    # Setup loss and optimizer
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    
    # Fit the model
    torch.manual_seed(42)
    epochs = epochs

    # Put all data on target device
    X_train = torch.load(X_train_artifact.path)
    X_test = torch.load(X_test_artifact.path)
    y_train = torch.load(y_train_artifact.path)
    y_test = torch.load(y_test_artifact.path)
    X_train, y_train = X_train.to(device), y_train.to(device)
    X_test, y_test = X_test.to(device), y_test.to(device)

    for epoch in range(epochs):
        # 1. Forward pass
        y_logits = model(X_train).squeeze()

        y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> prediction probabilities -> prediction labels

        # 2. Calculate loss and accuracy
        loss = loss_fn(y_logits, y_train) # BCEWithLogitsLoss calculates loss using logits
        acc = accuracy_fn(y_true=y_train,
                        y_pred=y_pred)

        # 3. Optimizer zero grad
        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()

        ### Testing
        model.eval()
        with torch.no_grad():
        # 1. Forward pass
            test_logits = model(X_test).squeeze()
            #print(test_logits.shape)
            test_pred = torch.round(torch.sigmoid(test_logits)) # logits -> prediction probabilities -> prediction labels
            # 2. Calcuate loss and accuracy
            test_loss = loss_fn(test_logits, y_test)
            test_acc = accuracy_fn(y_true=y_test,
                                y_pred=test_pred)


        # Print out what's happening
        if epoch % print_frequency == 0:
            print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")

    model.eval()
    with torch.no_grad():
        y_preds = torch.round(torch.sigmoid(model(X_test))).squeeze()

    if device == "cuda":
        predictions = y_preds.cpu().numpy() #if it is cuda, then this, otherwise y_pred.numpy()
        true_labels = y_test.cpu().numpy()
    else:
        predictions = y_preds.numpy()
        true_labels = y_test.numpy()
    
    # Confusion Matrix
    cmatrix = confusion_matrix(true_labels, predictions)
    #print("Confusion Matrix:", cmatrix)

    # Metrics
    accuracy = accuracy_score(true_labels, predictions)
    metrics.log_metric("Accuracy", accuracy)
    #print('Accuracy: %f' % accuracy)

    # test accuracy
    print(type(accuracy))

    precision = precision_score(true_labels,  predictions, average='weighted')
    metrics.log_metric("Precision", precision)
    #print('Precision: %f' % precision)

    recall = recall_score(true_labels, predictions, average='weighted')
    metrics.log_metric("Recall", recall)
    #print('Recall: %f' % recall)

    microf1 = f1_score(true_labels, predictions, average='micro')
    metrics.log_metric("Micro F1 score", microf1)
    #print('Micro F1 score: %f' % microf1)

    macrof1 = f1_score(true_labels, predictions, average='macro')
    metrics.log_metric("Macro F1 score", macrof1)
    #print('Macro F1 score: %f' % macrof1)

    target_names = ['No-Stall', 'Stall']
    # Print precision-recall report
    #print(classification_report(true_labels, predictions, target_names=target_names))

    # Classification Metrics artifact
    cmatrix = cmatrix.tolist()
    target_names = ['No-Stall', 'Stall']
    classification_metrics.log_confusion_matrix(target_names, cmatrix)
    
    # Save model
    model_path = "/tmp/model.pt"
    torch.save(model.state_dict(), model_path)
    os.rename(model_path, model_trained_artifact.path)

    # Setup minio client to upload and read files
    client = minio_setup(minio_url, access_key, secret_key)

    previous_accuracy = get_accuracy_in_last_run(client, bucket_name, last_accuracy_object_name)

    print(accuracy-previous_accuracy)
    metrics.log_metric("current-previous accuracy", accuracy-previous_accuracy)

    if trigger_type == '1' or trigger_type == '2':
        up_model = True
    elif trigger_type == '3':
        if accuracy - previous_accuracy > performance_factor:
            up_model = True
            update_accuracy_in_last_run(client, bucket_name, last_accuracy_object_name, accuracy, tmp_dir, tmp_file_last_acc)
    else:
        up_model = False
        metrics.log_metric("up model", '0')
        with open(up_model_cond.path, 'w') as f:
            f.write('0')
    
    if up_model:
        metrics.log_metric("up model", '1')
        with open(up_model_cond.path, 'w') as f:
            f.write('1')
        upload_model_result = upload_file(client, bucket_name, model_object_name, model_trained_artifact.path)

@component(base_image="python:3.11.9", packages_to_install=['kserve==0.13.0','kubernetes==30.1.0'])
def model_serving(
    up_model_cond: Input[Artifact],
    cond_info: Output[Metrics],
    bucket_name: str,
    model_name: str,
    namespace: str,
    svc_acc: str
):
    # Create kserve instance
    from kubernetes import client 
    from kserve import KServeClient, constants, V1beta1InferenceService, V1beta1InferenceServiceSpec, V1beta1PredictorSpec, V1beta1TorchServeSpec
    from datetime import datetime
    import time
    
    # exec if a new model was uploaded
    with open(up_model_cond.path) as f:
        up_model = f.read()
        
    if up_model == '1':
        cond_info.log_metric("Up model", up_model)
        
        #Inference server config
        now = datetime.now()
        kserve_version='v1beta1'
        api_version = constants.KSERVE_GROUP + '/' + kserve_version

        # with open(model_uri.path) as f:
        #     uri = f.read()
        uri = f's3://{bucket_name}'

        isvc = V1beta1InferenceService(api_version=api_version,
                                       kind=constants.KSERVE_KIND,
                                       metadata=client.V1ObjectMeta(
                                           name=model_name, namespace=namespace, annotations={'sidecar.istio.io/inject':'false'}),
                                       spec=V1beta1InferenceServiceSpec(
                                       predictor=V1beta1PredictorSpec(
                                           service_account_name=svc_acc,
                                           pytorch=(V1beta1TorchServeSpec(
                                               storage_uri=uri))))
        )

        KServe = KServeClient()

        #replace old inference service with a new one
        try:
            KServe.delete(name=model_name, namespace=namespace)
            print("Old model deleted")
        except:
            print("Couldn't delete old model")
        time.sleep(10)

        KServe.create(isvc)
    else:
        cond_info.log_metric("Up model", '0')

@pipeline
def my_pipeline(
    github_repo_url: str,
    github_cloned_dir: str,
    github_test_branch: str,
    github_username: str,
    github_token: str,
    remote_name: str,
    remote_url: str,
    minio_url: str,
    access_key: str,
    secret_key: str,
    dvc_file_dir: str,
    dvc_file_name: str,
    model_name: str,
    namespace: str,
    lr: float,
    epochs: int,
    print_frequency: int,
    bucket_name: str,
    model_object_name: str,
    svc_acc: str,
    trigger_type: str,
    performance_factor: float,
    last_accuracy_object_name: str,
    tmp_dir: str,
    tmp_file_last_acc: str
):
    data_ingestion_task = data_ingestion(
        github_repo_url=github_repo_url,
        github_cloned_dir=github_cloned_dir,
        github_test_branch=github_test_branch,
        github_username=github_username,
        github_token=github_token,
        remote_name=remote_name,
        remote_url=remote_url,
        minio_url=minio_url,
        access_key=access_key,
        secret_key=secret_key,
        dvc_file_dir=dvc_file_dir,
        dvc_file_name=dvc_file_name)
    dataset_artifact = data_ingestion_task.outputs["dataset_artifact"]
    data_preparation_task = data_preparation(dataset_artifact=dataset_artifact)
    X_train_artifact = data_preparation_task.outputs["X_train_artifact"]
    X_test_artifact = data_preparation_task.outputs["X_test_artifact"]
    y_train_artifact = data_preparation_task.outputs["y_train_artifact"]
    y_test_artifact = data_preparation_task.outputs["y_test_artifact"]
    model_training_task = model_training(X_train_artifact=X_train_artifact, 
                                         X_test_artifact=X_test_artifact, 
                                         y_train_artifact=y_train_artifact, 
                                         y_test_artifact=y_test_artifact,
                                         lr=lr,
                                         epochs=epochs,
                                         print_frequency=print_frequency,
                                         minio_url=minio_url,
                                         access_key=access_key,
                                         secret_key=secret_key,
                                         bucket_name=bucket_name,
                                         model_object_name=model_object_name,
                                         trigger_type=trigger_type,
                                         performance_factor=performance_factor,
                                         last_accuracy_object_name=last_accuracy_object_name,
                                         tmp_dir=tmp_dir,
                                         tmp_file_last_acc=tmp_file_last_acc)
    up_model_cond = model_training_task.outputs["up_model_cond"]
    model_serving_task = model_serving(up_model_cond=up_model_cond,
                                       bucket_name=bucket_name,
                                       model_name=model_name, 
                                       namespace=namespace,
                                       svc_acc=svc_acc)

# Compile the pipeline
pipeline_filename = f"{KUBEFLOW_PIPELINE_NAME}.yaml"
kfp.compiler.Compiler().compile(
    pipeline_func=my_pipeline,
    package_path=pipeline_filename)

# Submit the pipeline to the KFP cluster
client = kfp.Client(
    host=KUBEFLOW_HOST_URL,
    existing_token=KUBEFLOW_TOKEN)  

client.create_run_from_pipeline_func(
    my_pipeline,
    enable_caching=False,
    arguments={
        'github_repo_url': GITHUB_REPO_URL,
        'github_cloned_dir': GITHUB_CLONED_DIR,
        'github_test_branch': GITHUB_TEST_BRANCH,
        'github_username': GITHUB_USERNAME,
        'github_token': GITHUB_TOKEN,
        'remote_name': REMOTE_NAME,
        'remote_url': REMOTE_URL,
        'minio_url': MINIO_URL,
        'access_key': ACCESS_KEY,
        'secret_key': SECRET_KEY,
        'dvc_file_dir': DVC_FILE_DIR,
        'dvc_file_name': DVC_FILE_NAME,
        'model_name': MODEL_NAME,
        'namespace': NAMESPACE,
        'lr': LR,
        'epochs': EPOCHS,
        'print_frequency': PRINT_FREQUENCY,
        'bucket_name': BUCKET_NAME,
        'model_object_name': MODEL_OBJECT_NAME,
        'svc_acc': SVC_ACC,
        'trigger_type': TRIGGER_TYPE,
        'performance_factor': PERFORMANCE_FACTOR,
        'last_accuracy_object_name': LAST_ACC_OBJECT_NAME,
        'tmp_dir': TEMP_DIR,
        'tmp_file_last_acc': TEMP_FILE_ACC_IN_LAST_RUN
    })

#upload to Kubeflow 
client.upload_pipeline(pipeline_package_path=pipeline_filename,
                       pipeline_name=KUBEFLOW_PIPELINE_NAME,
                       namespace = NAMESPACE)

{'created_at': datetime.datetime(2024, 9, 17, 19, 34, 47, tzinfo=tzlocal()),
 'description': None,
 'display_name': 'mlops',
 'error': None,
 'namespace': 'kubeflow-user-example-com',
 'pipeline_id': '9f242c9c-412f-434f-b20b-566fe36a85bd'}