In [None]:
%pip install -q chi

In [None]:
from chi import server, context, lease
import os

context.version = "1.0" 
context.choose_project()
context.choose_site(default="CHI@TACC")

In [None]:
l = lease.get_lease(f"node-gpu-project51") 
l.show()

In [None]:
username = os.getenv('USER') # all exp resources will have this prefix
s = server.Server(
    f"node-gpu-project51", 
    reservation_id=l.node_reservations[0]["id"],
    image_name="CC-Ubuntu24.04-CUDA"
)
s.submit(idempotent=True)

In [None]:
s.associate_floating_ip()

In [None]:
s.refresh()
s.check_connectivity()

In [None]:
s.refresh()
s.show(type="widget")

In [None]:
s.execute("git clone --recurse-submodules https://github.com/care-ai-mlops/care-companion.git")

## Docker Setup

In [None]:
s.execute("curl -sSL https://get.docker.com/ | sudo sh")
s.execute("sudo groupadd -f docker; sudo usermod -aG docker $USER")

## Setup NVIDIA GPU

In [None]:
s.execute("curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
  && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list")
s.execute("sudo apt update")
s.execute("sudo apt-get install -y nvidia-container-toolkit")
s.execute("sudo nvidia-ctk runtime configure --runtime=docker")
# for https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
s.execute("sudo jq 'if has(\"exec-opts\") then . else . + {\"exec-opts\": [\"native.cgroupdriver=cgroupfs\"]} end' /etc/docker/daemon.json | sudo tee /etc/docker/daemon.json.tmp > /dev/null && sudo mv /etc/docker/daemon.json.tmp /etc/docker/daemon.json")
s.execute("sudo systemctl restart docker")

In [None]:
s.execute("sudo apt update")
s.execute("sudo apt -y install nvtop")

## Mounting Block Storage

In [None]:
cinder_client = chi.clients.cinder()
volume = [v for v in cinder_client.volumes.list() if v.name=='block-persist-project51'][0]

volume_manager = chi.nova().volumes
volume_manager.create_server_volume(server_id = s.id, volume_id = volume.id)

In [None]:
s.execute('sudo mkdir -p /mnt/block')
s.execute('sudo mount /dev/vdb1 /mnt/block')
s.execute('ls -l /mnt/block')

## Mounting Object Storage

In [None]:
s.execute('curl https://rclone.org/install.sh | sudo bash')
s.execute("sudo sed -i '/^#user_allow_other/s/^#//' /etc/fuse.conf")
s.execute("mkdir -p ~/.config/rclone")

In [None]:
s.execute("ln -sf /mnt/block/rclone/rclone.conf ~/.config/rclone/rclone.conf")
s.execute("rclone lsd chi_tacc:object-persist-project51")

In [None]:
s.execute('sudo mkdir -p /mnt/object')
s.execute('sudo chown -R cc /mnt/object')
s.execute('sudo chgrp -R cc /mnt/object')

In [None]:
s.execute('rclone mount chi_tacc:object-persist-project51 /mnt/object --read-only --allow-other --daemon')