diff --git a/README.md b/README.md index 4cbc72a426a2..bd6f3a88aafe 100644 --- a/README.md +++ b/README.md @@ -347,7 +347,7 @@ sudo make install ### Building Docker images -GraphScope ships with a [Dockerfile](k8s/graphscope.Dockerfile) that can build docker images for releasing. The images are built on a `builder` image with all dependencies installed and copied to +GraphScope ships with a [Dockerfile](k8s/dockerfiles/graphscope-dev.Dockerfile) that can build docker images for releasing. The images are built on a `builder` image with all dependencies installed and copied to a `runtime-base` image. To build images with latest version of GraphScope, go to the `k8s/internal` directory under root directory and run this command. ```bash diff --git a/coordinator/gscoordinator/cluster_builder.py b/coordinator/gscoordinator/cluster_builder.py index 968a077cd234..dcb47916cc9f 100644 --- a/coordinator/gscoordinator/cluster_builder.py +++ b/coordinator/gscoordinator/cluster_builder.py @@ -163,6 +163,7 @@ def __init__( self._vineyard_requests = {"cpu": vineyard_cpu, "memory": vineyard_mem} self._analytical_requests = {"cpu": engine_cpu, "memory": engine_mem} + # Should give executor a smaller value, since it doesn't need to load the graph self._executor_requests = {"cpu": "2000m", "memory": engine_mem} self._learning_requests = {"cpu": "1000m", "memory": "256Mi"} self._frontend_requests = {"cpu": "200m", "memory": "512Mi"} @@ -265,10 +266,15 @@ def get_engine_container_helper( ) return container + def _get_tail_if_exists_cmd(self, fname: str): + return ( + f"while true; do if [ -e {fname} ]; then tail -f {fname}; fi; sleep 1; done" + ) + def get_analytical_container(self, volume_mounts, with_java=False): name = self.analytical_container_name image = self._analytical_image if not with_java else self._analytical_java_image - args = ["tail", "-f", "/dev/null"] + args = ["bash", "-c", self._get_tail_if_exists_cmd("/tmp/grape_engine.INFO")] container = self.get_engine_container_helper( name, image, @@ -292,7 +298,11 @@ def get_analytical_container(self, volume_mounts, with_java=False): def get_interactive_executor_container(self, volume_mounts): name = self.interactive_executor_container_name image = self._interactive_executor_image - args = ["tail", "-f", "/dev/null"] + args = [ + "bash", + "-c", + self._get_tail_if_exists_cmd("/var/log/graphscope/current/executor.log"), + ] container = self.get_engine_container_helper( name, image, @@ -445,7 +455,7 @@ def get_engine_headless_service(self): "ClusterIP", ports, self._engine_labels, None ) - # Necessary, create a headless service for statefulset + # Necessary, create a headless service for statefulsets service_spec.cluster_ip = "None" service = ResourceBuilder.get_service( self._namespace, name, service_spec, self._engine_labels @@ -534,7 +544,11 @@ def get_graphlearn_service_endpoint(self, api_client, object_id, pod_host_ip_lis def get_interactive_frontend_container(self): name = self.interactive_frontend_container_name image = self._interactive_frontend_image - args = ["tail", "-f", "/dev/null"] + args = [ + "bash", + "-c", + self._get_tail_if_exists_cmd("/var/log/graphscope/current/frontend.log"), + ] container = kube_client.V1Container(name=name, image=image, args=args) container.image_pull_policy = self._image_pull_policy container.resources = ResourceBuilder.get_resources( diff --git a/docs/development/how_to_debug.md b/docs/development/how_to_debug.md index 1eb39d42f8c5..cb4849e2d251 100644 --- a/docs/development/how_to_debug.md +++ b/docs/development/how_to_debug.md @@ -2,9 +2,37 @@ This document shows how to debugging GraphScope under various conditions. ### Debugging on local deployment +## Find the logs + +Most of the logs will be streamed through the stdout of client, you could control the log level by + +```python +import graphscope +graphscope.set_option(show_log=True) +graphscope.set_option(log_level='DEBUG') # could also be INFO, ERROR +``` + +As you may know, GraphScope is composed of three engines, where the detailed log location of each engine is + +- Analytical Engine: `/tmp/grape_engine.INFO` +- Interactive Engine: Inside `/var/log/graphscope/` or `$HOME/log/graphscope` if GraphScope doesn't have permission of `/var/log`. You may find several folders named with a long number, which is the object id of the graph. There is also a `current` folder links to the log folder of latest created interactive instance. +- Learning Engine: `graphlearn.INFO` in the current directory. + + ### Debugging on Kubernetes deployment +## Find the logs + +In kubernetes environment, besides most of the logs still output to console, you could find detailed logs in each pod's stdout, or files inside each pods. + +Note: You could use `kubectl logs ` to inspect the stdout of the pod. Use `kubectl logs -c ` to inspect a specific container inside the pod. + +- Coordinator: The stdout of coordinator pod. +- Analytical Engine: The stdout engine container in the engine pod. +- Interactive Engine: The stdout of executor container in the engine pod for the executor log. And the stdout of interactive-frontend pod for the frontend. The log files resides in the `/var/log/graphscope` of each container, respectively. + + ## Commands for Debugging Here is list with commands usually used for checking the status of the GraphScope deployment on K8s. diff --git a/docs/frequently_asked_questions.rst b/docs/frequently_asked_questions.rst index 5adc5f650faa..9635298c5210 100644 --- a/docs/frequently_asked_questions.rst +++ b/docs/frequently_asked_questions.rst @@ -32,7 +32,8 @@ If you don't find an answer to your question here, feel free to file an `Issues` graphscope.set_option(show_log=True) - If you are running GraphScope in k8s, you can use `kubectl describe/logs `_ to check the log/status of the cluster. If the disk storage is accessible(on local or via Pods), you may also find logs in `/tmp/gs/runtime/logs`. + If you are running GraphScope in k8s, you can use `kubectl describe/logs `_ to check the log/status of the pods of GraphScope. + If the disk storage is accessible(on local or via Pods), you may also find logs in `/var/log/graphscope/current` or `$HOME/.local/log/graphscope`. 4. Why I find more Pods than expected with command `kubectl get pod`? diff --git a/interactive_engine/assembly/src/bin/graphscope/giectl b/interactive_engine/assembly/src/bin/graphscope/giectl index 5a7c397f0208..d80b4efe4a74 100755 --- a/interactive_engine/assembly/src/bin/graphscope/giectl +++ b/interactive_engine/assembly/src/bin/graphscope/giectl @@ -80,6 +80,10 @@ start_frontend() { declare -r pid_dir=${GRAPHSCOPE_RUNTIME}/pid/${object_id} mkdir -p ${log_dir} ${config_dir} ${pid_dir} + # make a "current" link + unlink ${GS_LOG}/current || true + ln -s ${log_dir} ${GS_LOG}/current + declare java_opt="-server -verbose:gc -Xloggc:${log_dir}/frontend.gc.log @@ -149,6 +153,10 @@ start_executor() { export LD_LIBRARY_PATH=${GRAPHSCOPE_HOME}/lib:${LD_LIBRARY_PATH} export DYLD_LIBRARY_PATH=${GRAPHSCOPE_HOME}/lib:${DYLD_LIBRARY_PATH} + # make a "current" link + unlink ${GS_LOG}/current || true + ln -s ${log_dir} ${GS_LOG}/current + # set executor config file sed -e "s@GRAPH_NAME@${object_id}@g" \ -e "s@VINEYARD_OBJECT_ID@${object_id}@g" \ @@ -206,9 +214,6 @@ create_gremlin_instance_on_local() { mkdir -p ${GS_LOG} declare -r log_dir=${GS_LOG}/${object_id} - # make a "current" link - unlink ${GS_LOG}/current || true - ln -s ${log_dir} ${GS_LOG}/current # Frontend use executor rpc port network_servers=""