Skip to content

Commit

Permalink
Use internal IP whilst in GCE (#232)
Browse files Browse the repository at this point in the history
* Add util to check if running in GCE env

* Use internal IP when running inside GCE

* Add ip/port to the msg to ease debugging
  • Loading branch information
ravwojdyla committed Jan 4, 2021
1 parent 5fba605 commit e93069b
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 7 deletions.
13 changes: 8 additions & 5 deletions dask_cloudprovider/gcp/instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
VMInterface,
SchedulerMixin,
)
from dask_cloudprovider.gcp.utils import build_request

from dask_cloudprovider.gcp.utils import build_request, is_inside_gce

from distributed.core import Status

Expand Down Expand Up @@ -293,11 +292,15 @@ async def start_scheduler(self):
self.cluster._log("Creating scheduler instance")
self.internal_ip, self.external_ip = await self.create_vm()

if self.config.get("public_ingress", True):
# scheduler is publicly available
if self.config.get("public_ingress", True) and not is_inside_gce():
# scheduler must be publicly available, and firewall
# needs to be in place to allow access to 8786 on
# the external IP
self.address = f"tcp://{self.external_ip}:8786"
else:
# scheduler is only accessible within VPC
# if the client is running inside GCE environment
# it's better to use internal IP, which doesn't
# require firewall setup
self.address = f"tcp://{self.internal_ip}:8786"
await self.wait_for_scheduler()

Expand Down
12 changes: 11 additions & 1 deletion dask_cloudprovider/gcp/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
from dask_cloudprovider.gcp.utils import build_request
import pytest

from dask_cloudprovider.gcp.utils import build_request, is_inside_gce


def test_build_request():
assert build_request()(None, lambda x: x, "https://example.com")


@pytest.mark.xfail(
is_inside_gce(), reason="Fails if you run this test on GCE environment"
)
def test_is_gce_env():
# Note: this test isn't super valuable, but at least we run the code
assert is_inside_gce() is False
19 changes: 19 additions & 0 deletions dask_cloudprovider/gcp/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,22 @@ def inner(http, *args, **kwargs):
return googleapiclient.http.HttpRequest(new_http, *args, **kwargs)

return inner


def is_inside_gce() -> bool:
"""
Returns True is the client is running in the GCE environment,
False otherwise.
Doc: https://cloud.google.com/compute/docs/storing-retrieving-metadata
"""
h = httplib2.Http()
try:
resp_headers, _ = h.request(
"http://metadata.google.internal/computeMetadata/v1/",
headers={"metadata-flavor": "Google"},
method="GET",
)
except (httplib2.HttpLib2Error, OSError):
return False
return True
2 changes: 1 addition & 1 deletion dask_cloudprovider/generic/vmcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ async def wait_for_scheduler(self):
_, address = self.address.split("://")
ip, port = address.split(":")

self.cluster._log("Waiting for scheduler to run")
self.cluster._log(f"Waiting for scheduler to run at {ip}:{port}")
while not is_socket_open(ip, port):
await asyncio.sleep(0.1)
self.cluster._log("Scheduler is running")
Expand Down

0 comments on commit e93069b

Please sign in to comment.