Skip to content

Commit

Permalink
GCP VPC (#221)
Browse files Browse the repository at this point in the history
* allow users to define the VPC network to use when launching clusters

* update docstring and yaml config
  • Loading branch information
quasiben committed Dec 16, 2020
1 parent 8fe303d commit 59b142a
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 5 deletions.
1 change: 1 addition & 0 deletions dask_cloudprovider/cloudprovider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ cloudprovider:
gcp:
source_image: "projects/ubuntu-os-cloud/global/images/ubuntu-minimal-1804-bionic-v20201014" # the gcp image to use for all instances
zone: "us-east1-c" # the zone of of where to launch the instances
network: "default" # the network/subnetwork in GCP to use
projectid: "" # name of the google cloud project
machine_type: "n1-standard-1" # size of the machine type to use
filesystem_size: 50 # amount in GBs of hard drive space to allocate
Expand Down
14 changes: 13 additions & 1 deletion dask_cloudprovider/gcp/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def __init__(
filesystem_size=None,
source_image=None,
docker_image=None,
network=None,
env_vars=None,
ngpus=None,
gpu_type=None,
Expand All @@ -62,6 +63,7 @@ def __init__(
**kwargs,
):
super().__init__(**kwargs)

self.cluster = cluster
self.config = config
self.projectid = projectid or self.config.get("projectid")
Expand All @@ -76,6 +78,7 @@ def __init__(
self.env_vars = env_vars
self.filesystem_size = filesystem_size or self.config.get("filesystem_size")
self.ngpus = ngpus or self.config.get("ngpus")
self.network = network or self.config.get("network")
self.gpu_type = gpu_type or self.config.get("gpu_type")
self.gpu_instance = gpu_instance
self.bootstrap = bootstrap
Expand Down Expand Up @@ -111,7 +114,7 @@ def create_gcp_config(self):
"networkInterfaces": [
{
"kind": "compute#networkInterface",
"subnetwork": f"projects/{self.projectid}/regions/{self.general_zone}/subnetworks/default",
"subnetwork": f"projects/{self.projectid}/regions/{self.general_zone}/subnetworks/{self.network}",
"aliasIpRanges": [],
}
],
Expand Down Expand Up @@ -365,6 +368,13 @@ class GCPCluster(VMCluster):
https://cloudprovider.dask.org/en/latest/gcp.html#project-id
zone: str
The GCP zone to launch you cluster in. A full list can be obtained with ``gcloud compute zones list``.
network: str
The GCP VPC network/subnetwork to use. The default is `default`. If using firewall rules,
please ensure the follwing accesses are configured:
- egress 0.0.0.0/0 on all ports for downloading docker images and general data access
- ingress 10.0.0.0/8 on all ports for internal communication of workers
- ingress 0.0.0.0/0 on 8786-8787 for external accessibility of the dashboard/scheduler
- (optional) ingress 0.0.0.0./0 on 22 for ssh access
machine_type: str
The VM machine_type. You can get a full list with ``gcloud compute machine-types list``.
The default is ``n1-standard-1`` which is 3.75GB RAM and 1 vCPU
Expand Down Expand Up @@ -500,6 +510,7 @@ def __init__(
self,
projectid=None,
zone=None,
network=None,
machine_type=None,
source_image=None,
docker_image=None,
Expand Down Expand Up @@ -536,6 +547,7 @@ def __init__(
"zone": zone or self.config.get("zone"),
"machine_type": self.machine_type,
"ngpus": ngpus or self.config.get("ngpus"),
"network": network or self.config.get("network"),
"gpu_type": gpu_type or self.config.get("gpu_type"),
"gpu_instance": self.gpu_instance,
"bootstrap": self.bootstrap,
Expand Down
8 changes: 4 additions & 4 deletions dask_cloudprovider/gcp/tests/test_gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async def test_create_rapids_cluster():
worker_options={"rmm_pool_size": "15GB"},
asynchronous=True,
auto_shutdown=True,
boostrap=False,
bootstrap=False,
) as cluster:

assert cluster.status == Status.running
Expand Down Expand Up @@ -150,7 +150,8 @@ def gpu_mem():
def test_create_rapids_cluster_sync():
skip_without_credentials()
cluster = GCPCluster(
source_image="projects/nv-ai-infra/global/images/ngc-docker-11-20200916",
source_image="projects/nv-ai-infra/global/images/packer-1607527229",
network="dask-gcp-network-test",
zone="us-east1-c",
machine_type="n1-standard-1",
filesystem_size=50,
Expand All @@ -160,7 +161,7 @@ def test_create_rapids_cluster_sync():
worker_class="dask_cuda.CUDAWorker",
worker_options={"rmm_pool_size": "15GB"},
asynchronous=False,
boostrap=False,
bootstrap=False,
)

cluster.scale(1)
Expand All @@ -178,5 +179,4 @@ def gpu_mem():
for w, res in results.items():
assert "total" in res["gpu"][0]["fb_memory_usage"].keys()
print(res)

cluster.close()

0 comments on commit 59b142a

Please sign in to comment.