# Diagnostic data from dask scheduler connect issue

## DASK k8s resources

In [1]:
!kubectl get pods

NAME                                               READY   STATUS    RESTARTS   AGE
dask-scheduler-5754b49469-tfrxh                    1/1     Running   1          45h
dask-workers-598f8bc9d4-pwbwl                      1/1     Running   2          45h
dask-workers-598f8bc9d4-r8q89                      1/1     Running   2          45h
dask-workers-598f8bc9d4-kcbjf                      1/1     Running   2          45h
dask-workers-598f8bc9d4-qqzbn                      1/1     Running   3          45h
dask-workers-598f8bc9d4-4k9qv                      1/1     Running   2          45h
ml-pipeline-ui-artifact-5dd95d555b-6w4sz           2/2     Running   8          5d
dask-notebook-0                                    2/2     Running   2          45h
dask-workers-598f8bc9d4-k5gt9                      1/1     Running   2          45h
ml-pipeline-visualizationserver-6b44c6759f-fqpd9   2/2     Running   8          5d


## Show k8s definition of dask-scheduler

In [2]:
!kubectl get svc dask-scheduler -o yaml

apiVersion: v1
kind: Service
metadata:
  annotations:
    kubectl.kubernetes.io/last-applied-configuration: |
      {"apiVersion":"v1","kind":"Service","metadata":{"annotations":{},"labels":{"app":"dask-scheduler","component":"scheduler"},"name":"dask-scheduler","namespace":"kubeflow-user"},"spec":{"ports":[{"name":"dask-scheduler","port":8786,"protocol":"TCP"}],"selector":{"app":"dask-scheduler","component":"scheduler"}}}
  creationTimestamp: "2021-12-02T03:02:56Z"
  labels:
    app: dask-scheduler
    component: scheduler
  name: dask-scheduler
  namespace: kubeflow-user
  resourceVersion: "282945"
  uid: b4ced791-2325-4299-9c92-2b07b5c21ff4
spec:
  clusterIP: 10.43.193.58
  clusterIPs:
  - 10.43.193.58
  ipFamilies:
  - IPv4
  ipFamilyPolicy: SingleStack
  ports:
  - name: dask-scheduler
    port: 8786
    protocol: TCP
    targetPort: 8786
  selector:
    app: dask-scheduler
    component: scheduler
  sessionAffinity: None
  type: ClusterIP
status:
  loadBalancer: {}


In [3]:
!kubectl describe svc dask-scheduler

Name:              dask-scheduler
Namespace:         kubeflow-user
Labels:            app=dask-scheduler
                   component=scheduler
Annotations:       <none>
Selector:          app=dask-scheduler,component=scheduler
Type:              ClusterIP
IP Family Policy:  SingleStack
IP Families:       IPv4
IP:                10.43.193.58
IPs:               10.43.193.58
Port:              dask-scheduler  8786/TCP
TargetPort:        8786/TCP
Endpoints:         10.42.0.149:8786
Session Affinity:  None
Events:            <none>


In [5]:
# service name resolves the the correct IP address
import socket
socket.gethostbyname('dask-scheduler.kubeflow-user.svc.cluster.local')

'10.43.193.58'

In [6]:
!kubectl describe envoyfilter add-header

Name:         add-header
Namespace:    kubeflow-user
Labels:       <none>
Annotations:  <none>
API Version:  networking.istio.io/v1alpha3
Kind:         EnvoyFilter
Metadata:
  Creation Timestamp:  2021-12-02T03:02:56Z
  Generation:          1
  Managed Fields:
    API Version:  networking.istio.io/v1alpha3
    Fields Type:  FieldsV1
    fieldsV1:
      f:metadata:
        f:annotations:
          .:
          f:kubectl.kubernetes.io/last-applied-configuration:
      f:spec:
        .:
        f:configPatches:
    Manager:         kubectl-client-side-apply
    Operation:       Update
    Time:            2021-12-02T03:02:56Z
  Resource Version:  282975
  UID:               153988fa-eab1-4a1b-ba27-b0bae82e386f
Spec:
  Config Patches:
    Apply To:  VIRTUAL_HOST
    Match:
      Context:  SIDECAR_OUTBOUND
      Route Configuration:
        Vhost:
          Name:  dask-scheduler.kubeflow-user.svc.cluster.local:8786
          Route:
            Name:  default
    Patch:
      Operation:  ME

## Test connecting to dask-scheduler by service name and ip address

In [7]:
from distributed import Client

## Test connecting with dask scheduler service name

In [8]:
try:
    client = Client('tcp://dask-scheduler.kubeflow-user.svc.cluster.local:8786')
    print(f'sucessfully connected: {client}')
    client.close()
except Exception as e:
    print(
        '***\n'
        '*\n'
        f'*FAILED TO CONNECT with service name: {e}\n'
        '*\n'
        '***'
    )

***
*
*FAILED TO CONNECT with service name: Timed out during handshake while connecting to tcp://dask-scheduler.kubeflow-user.svc.cluster.local:8786 after 30 s
*
***


## Test connecting with dask scheduler endpoint IP Address

In [9]:
# retrieve dask scheduler endpoint address

ip_address = !kubectl get endpoints -l app=dask-scheduler -o jsonpath='{.items[0].subsets[0].addresses[0].ip}'
port_number = !kubectl get endpoints -l app=dask-scheduler -o jsonpath='{.items[0].subsets[0].ports[0].port}'
dask_scheduler_endpoint = ip_address[0] + ':' + port_number[0]
dask_scheduler_endpoint

'10.42.0.149:8786'

In [10]:
try:
    client = Client('tcp://' + dask_scheduler_endpoint)
    print(f'sucessfully connected: {client}')
    client.close()
except Exception as e:
       print(
        '***\n'
        '*\n'
        f'*FAILED TO CONNECT with ip address: {e}\n'
        '*\n'
        '***'
    )

sucessfully connected: <Client: 'tcp://10.42.0.149:8786' processes=6 threads=12, memory=11.18 GiB>
