# BirdCLEF 2025 Full ETL Pipeline Runner
This notebook launches a VM and runs the full modular ETL pipeline using Docker Compose.

In [None]:
from chi import server, context
import chi, os

context.version = "1.0"
context.choose_project()
context.choose_site(default="KVM@TACC")

s = server.Server(
    name="node-data-pipeline-project38",
    image_name="CC-Ubuntu24.04",
    flavor_name="m1.xxlarge",
    key_name="id_rsa_chameleon_project_g38"
)
s.submit(idempotent=True)
s.associate_floating_ip()
s.refresh()
s.show(type="widget")


In [None]:
os_conn = chi.clients.connection()
nova_server = chi.nova().servers.get(s.id)

security_groups = [
  {'name': "allow-ssh", 'port': 22},
  {'name': "allow-8888", 'port': 8888},
  {'name': "allow-8000", 'port': 8000},
  {'name': "allow-9000", 'port': 9000},
  {'name': "allow-9001", 'port': 9001}
]

for sg in security_groups:
    if not os_conn.get_security_group(sg['name']):
        os_conn.create_security_group(sg['name'], f"Open port {sg['port']}")
        os_conn.create_security_group_rule(sg['name'], port_range_min=sg['port'],
                                           port_range_max=sg['port'], protocol='tcp', remote_ip_prefix='0.0.0.0/0')
    nova_server.add_security_group(sg['name'])

print(f"Updated security groups: {[group.name for group in nova_server.list_security_group()]}")
s.check_connectivity()


In [None]:
s.execute("git clone https://github.com/exploring-curiosity/MLOps.git")
s.execute("curl -sSL https://get.docker.com/ | sudo sh")
s.execute("sudo groupadd -f docker; sudo usermod -aG docker $USER")
s.execute("curl https://rclone.org/install.sh | sudo bash")
s.execute("sudo sed -i '/^#user_allow_other/s/^#//' /etc/fuse.conf")


In [None]:
# Upload kaggle.json (once on your local)
# !scp -i ~/.ssh/id_rsa_chameleon_project_g38 ~/.kaggle/kaggle.json cc@<FLOATING-IP>:~

s.execute("mkdir -p ~/.kaggle && mv ~/kaggle.json ~/.kaggle/kaggle.json && chmod 600 ~/.kaggle/kaggle.json")


In [None]:
# Running ETL stages
compose_files = [
    "docker-compose-etl-download-upload.yaml",
    "docker-compose-cleanup.yaml",
    "docker-compose-etl-denoise.yaml",
    "docker-compose-cleanup.yaml",
    "docker-compose-etl-features.yaml",
    "docker-compose-cleanup.yaml",
    "docker-compose-etl-manifests.yaml",
    "docker-compose-cleanup.yaml",
    "docker-compose-etl-production.yaml"
]

for file in compose_files:
    print(f"Running {file}...")
    result = s.execute(f"cd MLOps/data_pipeline && docker compose -f {file} up --build --abort-on-container-exit")
    print(result)
