Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions jenkins/helper/launch_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from traceback import print_exc

from dmesg import DmesgWatcher, dmesg_runner
from overload_thread import spawn_overload_watcher_thread, shutdown_overload_watcher_thread
from site_config import SiteConfig, IS_LINUX
from testing_runner import TestingRunner

Expand Down Expand Up @@ -36,6 +37,7 @@ def launch_runner(runner, create_report):
dmesg_thread = Thread(target=dmesg_runner, args=[dmesg], name="dmesg")
dmesg.name = "dmesg"
dmesg_thread.start()
spawn_overload_watcher_thread(runner.cfg)
time.sleep(3)
print(runner.scenarios)
try:
Expand All @@ -59,6 +61,7 @@ def launch_runner(runner, create_report):
runner.create_testruns_file()
if IS_LINUX:
dmesg.end_run()
shutdown_overload_watcher_thread()
print('joining dmesg threads')
dmesg_thread.join()
runner.print_and_exit_closing_stance()
53 changes: 53 additions & 0 deletions jenkins/helper/overload_thread.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/env python3
""" check for resource shortage of the test host """
# pylint: disable=global-statement disable=global-variable-not-assigned
from threading import Thread, Lock
import time
from datetime import datetime
import psutil
# from tools.socket_counter import get_socket_count
from tools.killall import get_all_processes_stats_json

END_THREAD_LOCK = Lock()
END_THREAD = False
OVERLOAD_THREAD = None


def overload_thread(sitecfg, _):
"""watcher thread to track system load"""
continue_running = True
print("starting load monitoring thread")
fn =sitecfg.basedir / "overloads.jsonl"
print(f"report file: {str(fn)}")
with open(fn, "w+", encoding="utf-8") as jsonl_file:
while continue_running:
#try:
# sock_count = get_socket_count()
# if sock_count > 8000:
# print(f"Socket count high: {sock_count}")
#except psutil.AccessDenied:
# pass
load = psutil.getloadavg()
if (load[0] > sitecfg.max_load) or (load[1] > sitecfg.max_load1) or (load[0] > sitecfg.overload):
#print(f"{str(load)} <= {sitecfg.overload} Load to high - Disk I/O: " + str(psutil.swap_memory()))
jsonl_file.write(f'["{datetime.now ()}", {get_all_processes_stats_json()}]\n')
time.sleep(1)
with END_THREAD_LOCK:
continue_running = not END_THREAD
#print("exiting load monitoring thread")


def spawn_overload_watcher_thread(siteconfig):
"""launch the overload watcher thread"""
global OVERLOAD_THREAD
OVERLOAD_THREAD = Thread(target=overload_thread, args=(siteconfig, True))
OVERLOAD_THREAD.start()


def shutdown_overload_watcher_thread():
"""terminate the overload watcher thread"""
global END_THREAD
with END_THREAD_LOCK:
END_THREAD = True
if OVERLOAD_THREAD is not None:
OVERLOAD_THREAD.join()
1 change: 1 addition & 0 deletions jenkins/helper/site_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def __init__(self, definition_file):
# pylint: disable=too-many-statements disable=too-many-branches
print_env()
init_temp()
self.basedir = Path.cwd()
self.datetime_format = "%Y-%m-%dT%H%M%SZ"
self.trace = False
self.portbase = 7000
Expand Down
53 changes: 53 additions & 0 deletions jenkins/helper/tools/killall.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/bin/env python3
""" manipulate processes """
import time
import json
import sys
import psutil

Expand Down Expand Up @@ -41,3 +43,54 @@ def kill_all_arango_processes():
process.kill()
except psutil.NoSuchProcess: # pragma: no cover
pass

def gather_process_thread_statistics(p):
""" gather the statistics of one process and all its threads """
ret = {}
ret['process'] = [{
'time': time.ctime(),
'pid': p.pid,
'name': p.name(),
'percent': p.cpu_percent(),
'iocounters': p.io_counters(),
'ctxSwitches': p.num_ctx_switches(),
'numfds': p.num_fds(),
'cpu_times': p.cpu_times(),
'meminfo': p.memory_full_info(),
'netcons': p.connections()
}]
for t in p.threads():
ret[ t.id ] = { 'user': t.user_time, 'sys': t.system_time}
return ret

def add_delta(p1, p2):
""" calculate and add a delta in cpu and time to all threads of a process """
tids = list(p1.keys())
for tid in tids:
if tid in p2 and tid != 'process':
p1[tid]['d_user'] = p2[tid]['user'] - p1[tid]['user']
p1[tid]['d_sys'] = p2[tid]['sys'] - p1[tid]['sys']
p1['process'].append(p2['process'][0])

def get_all_processes_stats_json():
""" aggregate a structure of all processes and their threads plus delta """
process_full_list = {}
for n in [True, False]:
processes = psutil.process_iter()
for process in processes:
name = ""
try:
name = process.name()
if process.ppid() != 2 and process.pid not in [1, 2]:
procstat = gather_process_thread_statistics(process)
if n:
process_full_list[f"p{process.pid}"] = procstat
else:
add_delta(process_full_list[f"p{process.pid}"], procstat)
except psutil.AccessDenied:
pass
except Exception as ex:
print(f"while inspecting {name}: {ex} ")
if n:
time.sleep(1)
return json.dumps(process_full_list)