From d23d7d6f2d0c79f7c1c3152deb1005a265e8bbf5 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 13 Sep 2021 14:31:48 +0800 Subject: [PATCH 1/3] update init --- ...13\350\257\225\346\234\215\345\212\241.md" | 31 ++++++-- ops/init_env.sh | 78 ++++++++++++++++--- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git "a/docs/zh_CN/install/\347\273\204\345\273\272\346\200\247\350\203\275\350\207\252\345\212\250\345\214\226\346\265\213\350\257\225\346\234\215\345\212\241.md" "b/docs/zh_CN/install/\347\273\204\345\273\272\346\200\247\350\203\275\350\207\252\345\212\250\345\214\226\346\265\213\350\257\225\346\234\215\345\212\241.md" index 8c8d87f..274ea9b 100644 --- "a/docs/zh_CN/install/\347\273\204\345\273\272\346\200\247\350\203\275\350\207\252\345\212\250\345\214\226\346\265\213\350\257\225\346\234\215\345\212\241.md" +++ "b/docs/zh_CN/install/\347\273\204\345\273\272\346\200\247\350\203\275\350\207\252\345\212\250\345\214\226\346\265\213\350\257\225\346\234\215\345\212\241.md" @@ -3,6 +3,27 @@ ## 性能测试服务器初始化 + + + + +## Upgrade GPU Driver +https://en.opensuse.org/SDB:NVIDIA_drivers#:~:text=Procedure%201%20Add%20the%20Nvidia%20Repository.%20The%20NVIDIA,your%20hardware%20information%20into%20Nvidia%27s%20driver%20search%20engine. + +## Enable SSH +https://en.opensuse.org/SDB:OpenSSH_basics#:~:text=OpenSSH%2C%20SSHD%2C%20is%20installed%20in%20openSUSE%20by%20default.,it%20to%20the%20list.%20Save%20configuration%20and%20exit. + + +## Revert network manager to wicked + +https://doc.opensuse.org/documentation/leap/reference/html/book-reference/cha-network.html#sec-network-nm + +## Config network config +https://unix.stackexchange.com/questions/280552/static-ip-configuration-on-opensuse + + +## Install application (font, browser, editor, screenshot) + * 参考 @@ -706,11 +727,11 @@ remote_port = 7002 WantedBy=multi-user.target # 启动 frp 并设置开机启动 - systemctl stop frpc - systemctl disable frpc - systemctl start frpc - systemctl enable frpc - systemctl status frpc +sudo systemctl stop frpc +sudo systemctl disable frpc +sudo systemctl start frpc +sudo systemctl enable frpc +sudo systemctl status frpc ``` * 参考链接: diff --git a/ops/init_env.sh b/ops/init_env.sh index cc4c610..27c2358 100644 --- a/ops/init_env.sh +++ b/ops/init_env.sh @@ -11,21 +11,79 @@ # install GPU driver and toolkit # Install git -zypper install -y git -git clone https://github.com/discourse/discourse.git +tmux, moning brew, timeshift,htop,MCDU,timetrap -# Install docker -sudo zypper install -y docker python3-docker-compose -sudo systemctl enable docker -sudo usermod -G docker -a $USER -sudo systemctl restart docker -docker version +ventoy 多系统 启动盘 +# Uninstall old versions + sudo zypper remove -y docker \ + docker-client \ + docker-client-latest \ + docker-common \ + docker-latest \ + docker-latest-logrotate \ + docker-logrotate \ + docker-engine \ + runc +# Get opensuse version id +# if [ -e /etc/os-release ]; then +# VERSION_ID=$(cat /etc/os-release | grep VERSION_ID | grep -Eo '[0-9]+\.[0-9]+') +# else +# VERSION_ID=$(cat /usr/lib/os-release | grep VERSION_ID | grep -Eo '[0-9]+\.[0-9]+') +# fi + +# Install GPU Driver +zypper addrepo --refresh 'https://download.nvidia.com/opensuse/leap/$releasever' NVIDIA +# Get hardware information +sudo hwinfo --gfxcard | grep Model +sudo hwinfo --arch +# Install +sudo zypper se -s x11-video-nvidiaG0* +sudo zypper se nvidia-glG0* +sudo zypper in x11-video-nvidiaG05 +sudo zypper in nvidia-glG05 + + +# Add the package repositories +# accept the overwrite of /etc/docker/daemon.json + +distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \ + && sudo zypper ar https://nvidia.github.io/nvidia-docker/${distribution}/nvidia-docker.repo \ + && sudo zypper ar https://download.opensuse.org/repositories/Virtualization:/containers/${distribution}/Virtualization:containers.repo \ + && sudo zypper ref \ + && sudo zypper install -y --allow-vendor-change 'docker >= 19.03' python3-docker-compose \ + && docker version \ + && sudo usermod -G docker -a $USER \ + && sudo systemctl --now enable docker \ + && sudo systemctl start docker \ + && sudo zypper install nvidia-docker2 \ + && sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi # init k8s and argo and MAMO and kubeflow # deploy discourse # https://github.com/discourse/discourse.git -curl -sSL https://raw.githubusercontent.com/bitnami/bitnami-docker-discourse/master/docker-compose.yml > docker-compose.yml -docker-compose up -d +# curl -sSL https://raw.githubusercontent.com/bitnami/bitnami-docker-discourse/master/docker-compose.yml > docker-compose.yml +# docker-compose up -d + +## config network +/etc/sysconfig/network/ifcfg-eth0 +BOOTPROTO='static' +IPADDR='192.168.2.77' +MTU='1500' +NAME='' +NETMASK='255.255.255.0' +STARTMODE='auto' +USERCONTROL='no' + +/etc/sysconfig/network/routes +default 192.168.2.1 - - + +# Install cuda +wget https://developer.download.nvidia.com/compute/cuda/11.4.1/local_installers/cuda_11.4.1_470.57.02_linux.run +sudo sh cuda_11.4.1_470.57.02_linux.run +# Install OBS +sudo zypper ar -cfp 90 'https://ftp.gwdg.de/pub/linux/misc/packman/suse/openSUSE_Leap_$releasever/' packman +sudo zypper dup --from packman --allow-vendor-chang + sudo zypper in obs-studio From 2d293e5acebbb1fa8e02ff21663817d5dd31d864 Mon Sep 17 00:00:00 2001 From: haiyuan Date: Thu, 23 Sep 2021 16:37:36 +0800 Subject: [PATCH 2/3] add pause codeEnv jobs --- ops/pause_codeEnv_job.py | 128 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 ops/pause_codeEnv_job.py diff --git a/ops/pause_codeEnv_job.py b/ops/pause_codeEnv_job.py new file mode 100644 index 0000000..0f2bfb8 --- /dev/null +++ b/ops/pause_codeEnv_job.py @@ -0,0 +1,128 @@ +#!/usr/bin/python3 +# -*- coding: UTF-8 -*- +""" pause current CODE-DEVELOPMENT jobs +# INFO: 暂停代码开发环境中的任务 +# VERSION: 1.0.0 +# EDITOR: thomas +# UPDATE: 2021-09-22 +""" +import http.client as client +import json +import hashlib +import time +import sys, getopt + +TEST_DATAS = { + "host": "aiarts.apulis.cn", + "https": "http", + "web_admin": {"userName":"admin","password":"4owfQN"}, + "token": "", + "header": {"Content-Type":"application/json;charset=UTF-8", "Accept-Language":"en-US,en;q=0.9,zh-TW;q=0.8,zh;q=0.7", "Accept":"application/json", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"}, + "cookie": "", + "homepage": "/", + "login": "/custom-user-dashboard-backend/auth/login", + "logout": "/custom-user-dashboard-backend/auth/logout", + "get_codeEnv_summary": "/ai_arts/api/common/job/summary?jobType={job_type}&vcName={vc_name}", + "get_codeEnv_list": "/api/v2/clusters/DLWS/teams/{vc_name}/jobs?user=all&limit=9999", + "pause_job": "/api/clusters/DLWS/jobs/{job_id}/status" +} + +def security_passwd(passwd="DEFAULT"): + Md5Passwd = hashlib.md5() + Md5Passwd.update(passwd.encode("utf-8")) + SecurityPasswd = (Md5Passwd.hexdigest()).lower() + return SecurityPasswd + +def on_start(host="aiarts.apulis.cn", account={"userName":"admin","password":"4owfQN"}): + platform_host = host if host else client.HTTPConnection(TEST_DATAS["host"]) + admin_account = account if account else TEST_DATAS["web_admin"]["password"] + admin_account["password"] = security_passwd(admin_account["password"]) + data = json.dumps(admin_account) + conn = client.HTTPConnection(platform_host) + conn.request("POST", TEST_DATAS["login"], data, headers=TEST_DATAS["header"]) + response = json.load(conn.getresponse()) + try: + if response["success"]: + TEST_DATAS["token"] = response["token"] + TEST_DATAS["currentRole_id"] = response["currentRole"][0]["id"] + TEST_DATAS["header"]["Authorization"] = "Bearer " + TEST_DATAS["token"] + TEST_DATAS["header"]["cookie"] = "language=zh-CN; token={}".format(TEST_DATAS["token"]) + except KeyError: + response.raise_for_status() + return conn + +def on_stop(conn): + conn.request("GET", url=TEST_DATAS["logout"]) + conn.close() + + +def get_codeDev_jobs(conn, vc_name="platform"): + params = json.dumps({"limit":999,"user":"all",}) + headers = TEST_DATAS["header"] + url = TEST_DATAS["get_codeEnv_list"].format(vc_name=vc_name) + conn.request("GET", url, params, headers ) + response = json.load(conn.getresponse()) + return [iEnv["jobId"] for iEnv in response if (iEnv["jobType"] == u"codeEnv") and (iEnv["jobStatus"] != u"paused") and (iEnv["jobStatus"] != u"killed")] + +def pause_job(conn, job_id): + params = json.dumps({"status": "pausing"}) + headers = TEST_DATAS["header"] + url = TEST_DATAS["pause_job"].format(job_id=job_id) + conn.request("PUT", url, params, headers ) + response = json.load(conn.getresponse()) + print("=================================>>> Pause {} {} on time: {}".format(job_id, response["result"], time.asctime( time.localtime(time.time()) ))) + +def parse_opt(argv): + try: + opts, args = getopt.getopt(argv,"?h:u:p:",["host=","username=","passwd="]) + except getopt.GetoptError: + sys.exit(2) + for opt, arg in opts: + if opt == '-?': + print( + """ + 暂停代码开发环境中的任务 + -h : 设置平台访问地址(域名或IP) + -u : 设置平台管理员账号 + -p : 设置平台管理员账号 + -? : 显示帮助 + Example: + python3 ./pause_codeEnv_job.py -h aiarts.apulis.cn -u admin -p 4owfQN + """ + ) + sys.exit(2) + elif opt in ("-h", "--host"): + host = arg + elif opt in ("-u", "--username"): + username = arg + elif opt in ("-p", "--passwd"): + passwd = arg + else: + continue + return host, {"userName":username,"password":passwd} + +def main(argv): + host, account = parse_opt(argv) + conn = on_start(host, account) + for i in range(5): + job_list = get_codeDev_jobs(conn) + if len(job_list): + for iJob in job_list: + pause_job(conn, iJob) + on_stop(conn) + break + else: + time.sleep(3) + continue + +if __name__ == "__main__": + # 在主机系统设置定时任务 + # chmod +w /etc/crontab + ## 测试使用每5min备份一次 + ## echo "*/5 * * * * root python3 /root/ops_env/pause_codeEnv_job.py -h aiarts.apulis.cn -u admin -p 4owfQN>> /var/log/pause-codeEnv.log" >> /etc/crontab + # 每天晚上12点执行 + # echo "59 23 * * * root python3 /root/ops_env/pause_codeEnv_job.py -h aiarts.apulis.cn -u admin -p 4owfQN>> /var/log/pause-codeEnv.log" >> /etc/crontab + # chmod -w /etc/crontab + # service cron restart + ## python3 ./pause_codeEnv_job.py -h aiarts.apulis.cn -u admin -p 4owfQN + main(sys.argv[1:]) \ No newline at end of file From 74777549314a491130fb5fdc943a525f876e9978 Mon Sep 17 00:00:00 2001 From: haiyuan Date: Thu, 23 Sep 2021 16:46:42 +0800 Subject: [PATCH 3/3] update local temps --- .gitignore | 0 CONTRIBUTING.md | 0 LICENSE | 0 RELEASE.md | 0 example/jmeter/trace_user_footprint.jmx | 0 ops/conf/argo_install.yaml | 0 ops/conf/env_info.yaml | 0 ops/conf/kubernetes_dashboard_recommended.yaml | 0 ops/conf/metrics-server-components.yaml | 0 ops/conf/workflow-controller-configmap.yaml | 0 ops/docker_proxy.sh | 0 ops/init_k8s.sh | 0 ops/metrix_dashboard/user_metrix.yaml | 0 ops/sync_public_repo/github_action_sync_backend.yml | 0 ops/sync_public_repo/github_action_sync_dashboard.yml | 0 ops/sync_public_repo/github_action_sync_public.yml | 0 ops/sync_public_repo/sync_release.sh | 0 ops/update_packages.sh | 0 ops/update_packages_reset_harbor.sh | 0 ops/update_platform.sh | 0 requirements.ini | 0 testhub/testlib/csv_client.py | 0 testhub/testlib/fake_users.py | 0 testhub/testlib/postgres_client.py | 0 24 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 .gitignore mode change 100755 => 100644 CONTRIBUTING.md mode change 100755 => 100644 LICENSE mode change 100755 => 100644 RELEASE.md mode change 100755 => 100644 example/jmeter/trace_user_footprint.jmx mode change 100755 => 100644 ops/conf/argo_install.yaml mode change 100755 => 100644 ops/conf/env_info.yaml mode change 100755 => 100644 ops/conf/kubernetes_dashboard_recommended.yaml mode change 100755 => 100644 ops/conf/metrics-server-components.yaml mode change 100755 => 100644 ops/conf/workflow-controller-configmap.yaml mode change 100755 => 100644 ops/docker_proxy.sh mode change 100755 => 100644 ops/init_k8s.sh mode change 100755 => 100644 ops/metrix_dashboard/user_metrix.yaml mode change 100755 => 100644 ops/sync_public_repo/github_action_sync_backend.yml mode change 100755 => 100644 ops/sync_public_repo/github_action_sync_dashboard.yml mode change 100755 => 100644 ops/sync_public_repo/github_action_sync_public.yml mode change 100755 => 100644 ops/sync_public_repo/sync_release.sh mode change 100755 => 100644 ops/update_packages.sh mode change 100755 => 100644 ops/update_packages_reset_harbor.sh mode change 100755 => 100644 ops/update_platform.sh mode change 100755 => 100644 requirements.ini mode change 100755 => 100644 testhub/testlib/csv_client.py mode change 100755 => 100644 testhub/testlib/fake_users.py mode change 100755 => 100644 testhub/testlib/postgres_client.py diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md old mode 100755 new mode 100644 diff --git a/LICENSE b/LICENSE old mode 100755 new mode 100644 diff --git a/RELEASE.md b/RELEASE.md old mode 100755 new mode 100644 diff --git a/example/jmeter/trace_user_footprint.jmx b/example/jmeter/trace_user_footprint.jmx old mode 100755 new mode 100644 diff --git a/ops/conf/argo_install.yaml b/ops/conf/argo_install.yaml old mode 100755 new mode 100644 diff --git a/ops/conf/env_info.yaml b/ops/conf/env_info.yaml old mode 100755 new mode 100644 diff --git a/ops/conf/kubernetes_dashboard_recommended.yaml b/ops/conf/kubernetes_dashboard_recommended.yaml old mode 100755 new mode 100644 diff --git a/ops/conf/metrics-server-components.yaml b/ops/conf/metrics-server-components.yaml old mode 100755 new mode 100644 diff --git a/ops/conf/workflow-controller-configmap.yaml b/ops/conf/workflow-controller-configmap.yaml old mode 100755 new mode 100644 diff --git a/ops/docker_proxy.sh b/ops/docker_proxy.sh old mode 100755 new mode 100644 diff --git a/ops/init_k8s.sh b/ops/init_k8s.sh old mode 100755 new mode 100644 diff --git a/ops/metrix_dashboard/user_metrix.yaml b/ops/metrix_dashboard/user_metrix.yaml old mode 100755 new mode 100644 diff --git a/ops/sync_public_repo/github_action_sync_backend.yml b/ops/sync_public_repo/github_action_sync_backend.yml old mode 100755 new mode 100644 diff --git a/ops/sync_public_repo/github_action_sync_dashboard.yml b/ops/sync_public_repo/github_action_sync_dashboard.yml old mode 100755 new mode 100644 diff --git a/ops/sync_public_repo/github_action_sync_public.yml b/ops/sync_public_repo/github_action_sync_public.yml old mode 100755 new mode 100644 diff --git a/ops/sync_public_repo/sync_release.sh b/ops/sync_public_repo/sync_release.sh old mode 100755 new mode 100644 diff --git a/ops/update_packages.sh b/ops/update_packages.sh old mode 100755 new mode 100644 diff --git a/ops/update_packages_reset_harbor.sh b/ops/update_packages_reset_harbor.sh old mode 100755 new mode 100644 diff --git a/ops/update_platform.sh b/ops/update_platform.sh old mode 100755 new mode 100644 diff --git a/requirements.ini b/requirements.ini old mode 100755 new mode 100644 diff --git a/testhub/testlib/csv_client.py b/testhub/testlib/csv_client.py old mode 100755 new mode 100644 diff --git a/testhub/testlib/fake_users.py b/testhub/testlib/fake_users.py old mode 100755 new mode 100644 diff --git a/testhub/testlib/postgres_client.py b/testhub/testlib/postgres_client.py old mode 100755 new mode 100644