In [1]:
def copy_file_to_docker(file_path, docker_path):
    """Copy a file from local to GCP VM and then into the Docker container."""
    filename = file_path.split("/")[-1] if "/" in file_path else file_path.split("\\")[-1]
    # Copy file to VM
    !gcloud compute scp --zone=us-central1-a {file_path} load-testing-instance:/home/adilm
    # Copy file from VM to Docker container
    copy_command = f"docker cp /home/adilm/{filename} load-testing-container:{docker_path}"
    !gcloud compute ssh load-testing-instance --zone=us-central1-a --command="{copy_command}"

def run_docker_command(inner_command):
    """
    Run a command inside load-testing-container on the GCP VM via SSH using docker exec.
    """
    docker_exec_template = "docker exec load-testing-container bash -c '{cmd}'"
    docker_command = docker_exec_template.format(cmd=inner_command)
    ssh_command = f"gcloud compute ssh load-testing-instance --zone=us-central1-a --command=\"{docker_command}\""
    !{ssh_command}

def copy_dir_to_docker(dir_path, docker_path):
    """Copy a directory from local to GCP VM and then into the Docker container."""
    dir_name = dir_path.split("/")[-1] if "/" in dir_path else dir_path.split("\\")[-1]
    # Clean up the VM directory 
    rm_command = f"rm -rf /home/adilm/{dir_name}"  # Clean up the VM directory after copying
    docker_rm_command = f"docker exec load-testing-container rm -rf {docker_path}{dir_name}"  # Clean up the Docker directory
    !gcloud compute ssh load-testing-instance --zone=us-central1-a --command="{rm_command}"
    !gcloud compute ssh load-testing-instance --zone=us-central1-a --command="{docker_rm_command}"
    # Copy directory to VM
    print(f"gcloud compute scp --zone=us-central1-a --recurse {dir_path} load-testing-instance:/home/adilm/")
    !gcloud compute scp --zone=us-central1-a --recurse {dir_path} load-testing-instance:/home/adilm/
    # Copy directory from VM to Docker container
    copy_command = f"docker cp /home/adilm/{dir_name} load-testing-container:{docker_path}"
    !gcloud compute ssh load-testing-instance --zone=us-central1-a --command="{copy_command}"

In [None]:
# RUN LOAD TESTING VM
vm_name = "load-testing-instance"
snapshot_name = "load-testing-instance-snapshot"
zone = "us-central1-a"
!gcloud compute instances create {vm_name} --zone={zone} --disk=name={snapshot_name},boot=yes,auto-delete=yes --machine-type=e2-standard-8
!gcloud compute firewall-rules create allow-all --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=all --source-ranges=0.0.0.0/0

In [7]:
# CHECK IF VM IS RUNNING
vm_status = !gcloud compute instances describe {vm_name} --zone={zone} --format="get(status)"
if vm_status[0] == "RUNNING":
    print(f"VM {vm_name} is running.")
else:
    print(f"VM {vm_name} is not running. Current status: {vm_status[0]}")

VM load-testing-instance is running.


In [None]:
# RUN LOAD TESTING CONTAINER
docker_command = "docker run -d --name load-testing-container auliadil/load-testing-rodrigues:v1 tail -f /dev/null"
gcloud_template = "gcloud compute ssh standalone-load-tester --zone=us-central1-a"
!{gcloud_template} --command="{docker_command}"

In [None]:
# CHECK IF CONTAINER IS RUNNING
container_status = !gcloud compute ssh load-testing-instance --zone={zone} --command="docker ps -q --filter 'name=load-testing-container'"
if container_status:
    print("Container is running.")
else:
    print("Container is not running")

In [None]:
# COPY FILE
copy_file_to_docker("../Serialization_Datasets/locust-testing.py", "/app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets/")
# copy_file_to_docker("../Serialization_Datasets/check-kafka-lag.py", "/app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets/")
# copy_dir_to_docker('C:/Users/adilm/AppData/Roaming/gcloud', "/root/.config/gcloud")

In [None]:

# Check copy file
path = "/app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets"
run_docker_command("cat " + path + "/locust-testing.py")
# run_docker_command("cat " + path + "/check-kafka-lag.py")

In [None]:
# Check GCloud
# projects = !gcloud config get-value project
# set_project_command = f"gcloud config set project {projects[0]}"
# get_kubectl_command = "gcloud container clusters get-credentials two-node-cluster --zone us-central1-a"
# run_docker_command(set_project_command)
# run_docker_command(get_kubectl_command)
run_docker_command("kubectl get pods")

NAME                              READY   STATUS    RESTARTS   AGE
api-inferencia-6f47cb7df8-92lbg   1/1     Running   0          7h6m
api-inferencia-6f47cb7df8-dxnqj   1/1     Running   0          7h6m
api-inferencia-6f47cb7df8-sp8t6   1/1     Running   0          7h6m
api-inferencia-6f47cb7df8-tn9f8   1/1     Running   0          7h6m
api-update-794765cdd7-wwk5f       1/1     Running   0          7h6m
mlflow-cb7f6b8b-59vjb             1/1     Running   0          7h6m


In [18]:
locust_path = "/app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets/locust-testing.py"
check_kafka_lag_dir_path = "/app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets"
locust_command = f"locust -f {locust_path} --users 50 --spawn-rate 50 --headless --csv=result_testing"
run_docker_command(f"cd {check_kafka_lag_dir_path} && nohup python3 check-kafka-lag.py > kafka_lag.log 2>&1 &")
# run_docker_command(f"cd /app/large-scale-online-learning/ && source ../.python-venv/bin/activate && nohup {locust_command} > locust.log 2>&1 &")

In [19]:
# check whether the script is running
run_docker_command("ps aux | grep check-kafka-lag.py")
run_docker_command("ps aux | grep locust-testing.py")

root        3820  0.0  0.0   4324   236 ?        S    14:00   0:00 bash -c cd /app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets && nohup python3 check-kafka-lag.py > kafka_lag.log 2>&1 &
root        3821  0.2  0.0  90664 12144 ?        Sl   14:00   0:00 python3 check-kafka-lag.py
root        3866 75.0  0.0   4324  3528 ?        Ss   14:00   0:00 bash -c ps aux | grep check-kafka-lag.py
root        3873  0.0  0.0   3528  1708 ?        S    14:00   0:00 grep check-kafka-lag.py
root        3794  0.0  0.0   4324  2048 ?        S    13:59   0:00 bash -c cd /app/large-scale-online-learning/ && source ../.python-venv/bin/activate && nohup locust -f /app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets/locust-testing.py --users 50 --spawn-rate 50 --headless --csv=result_testing > locust.log 2>&1 &
root        3795  100  1.1 1629532 376412 ?      Rl   13:59   0:45 /root/venv/bin/python3 /root/venv/bin/locust -f /app/large-scale-online-learning/MLOps

In [None]:
# kill the process if it is running
# run_docker_command("pkill -f check-kafka-lag.py")
# run_docker_command("pkill -f locust-testing.py")

In [20]:
# CHECK IF KAFKA LAG LOG IS GENERATED
run_docker_command("cat /app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets/kafka_lag_log.csv")

# clean kafka lag log
# run_docker_command("rm -rf /app/large-scale-online-learning/MLOps-Architecture/Serialization_Datasets/kafka_lag_log.csv")

1754895616.4320204,400


In [None]:
# CHECK IF LOCUST LOGS ARE GENERATED
run_docker_command("cat /app/large-scale-online-learning/locust.log")

# Clean up locust log
# run_docker_command("rm -rf /app/large-scale-online-learning/locust.log")

In [None]:
# Check if Kubernetes pod is working
# update_pods = !kubectl get pods | findstr /R "upd"
# !kubectl exec {update_pods[0].split()[0]} -- ls
# !kubectl exec {update_pods[0].split()[0]} -- cat message_log.csv

# inference_pods = !kubectl get pods | findstr /R "inf"
# if inference_pods:
#     print(f"Inference pod found: {inference_pods[0]}")
#     !kubectl exec {inference_pods[0].split()[0]} -- ls
#     !kubectl exec {inference_pods[0].split()[0]} -- cat message_log.csv

In [None]:
# CHECK Resource Usage of Load Testing VM
!gcloud compute ssh load-testing-instance --zone=us-central1-a --command="top -b -n 1 | head -n 20"

In [None]:
# GET VARIABLES
with open("etc/variables.txt", "r") as f:
    for line in f:
        line = line.strip()
        if line and not line.startswith("#"):
            key, value = line.split("=", 1)
            key = key.strip()
            value = value.strip()
            globals()[key] = value

experiment = eval(experiment) if isinstance(experiment, str) else experiment
if isinstance(experiment, list):
    experiment_name = experiment[0]
    experiment_file = experiment[1]

In [None]:
# GET INFERENCE RESULTS
inference_pods = !kubectl get pods | findstr /R "inf"
if inference_pods:
    filenames = !kubectl exec {inference_pods[0].split()[0]} -- ls | findstr /R "load_model"


for filename in filenames:
    !kubectl exec {inference_pods[0].split()[0]} -- tar -czf /app/{filename}.tar.gz /app/{filename}
    !kubectl cp default/{inference_pods[0].split()[0]}:/app/{filename}.tar.gz experiment-results/{experiment_name}/inference-results/{filename}.tar.gz

In [None]:
# GET UPDATE RESULTS
pods = !kubectl get pods | findstr /R "upd"
!kubectl exec {pods[0].split()[0]} -- tar -czf /app/for_auc.tar.gz /app/for_auc.csv
!kubectl exec {pods[0].split()[0]} -- tar -czf /app/message_log.tar.gz /app/message_log.csv
!kubectl exec {pods[0].split()[0]} -- tar -czf /app/model_upload_latency.tar.gz /app/model_upload_latency.csv
!kubectl cp default/{pods[0].split()[0]}:/app/for_auc.tar.gz experiment-results/{experiment_name}/update-results/for_auc.tar.gz
!kubectl cp default/{pods[0].split()[0]}:/app/message_log.tar.gz experiment-results/{experiment_name}/update-results/message_log.tar.gz
!kubectl cp default/{pods[0].split()[0]}:/app/model_upload_latency.tar.gz experiment-results/{experiment_name}/update-results/model_upload_latency.tar.gz

In [None]:
# CHECK MFLOW REPO
!kubectl exec {pods[0].split()[0]} -- ls /mlartifacts

In [None]:
!kubectl exec {pods[0].split()[0]} -- tar -czf /mlartifacts/mlflow-results.tar.gz /mlartifacts/1