Skip to content

Commit

Permalink
Update report sbox option
Browse files Browse the repository at this point in the history
  • Loading branch information
ashki23 committed Dec 9, 2021
1 parent 0f04a19 commit b526bb1
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 39 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ Sbox is a toolbox for Slurm that provides information about users' accounts and
- `--whodat2`: Show users informations by name. It uses `ldapsearch`command and returns empty output if the cluster does not use LDAP.
- `--agent`: Start, stop and list user's ssh-agents on the current host. It requires one of the start/stop/list options as an argument. Use `ssh -o StrictHostKeyChecking=no` to disable asking for host key acceptances.
- `--report`: Show current cluster utilization based on the running jobs. It uses slurm `sinfo` and `squeue` commands.
- `--report2`: Show cluster utilization based on ended jobs in last day. It uses Slurm `sacct` command.

**Examples**

Expand Down
40 changes: 10 additions & 30 deletions bin/sbox
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ parser.add_argument('--whodat', help = 'show users informations by uid', metavar
parser.add_argument('--whodat2', help = 'show users informations by name', metavar = 'UNAME')
parser.add_argument('--agent', choices = ['start','stop','list'], help = 'start/stop/list ssh-agents on a host')
parser.add_argument('--report', action = 'store_true', help = 'show current cluster utilization')
parser.add_argument('--report2', action = 'store_true', help = 'show cluster utilization in last 24 hours')
args = parser.parse_args()

if len(sys.argv) == 1:
Expand Down Expand Up @@ -383,35 +382,16 @@ if args.report:
if len(part_all) == 0:
print('No CPU/GPU partition found in the config file!')
sys.exit(1)

print('date,partition,type,total,allocation,utilization')
for c in part_cpu:
os.system(f"""
(date +"%F"| tr "\n" " " ; echo "{c} cpu" | tr "\n" " " ; sinfo --partition {c} --Node --format %C | cut --delimiter '/' --fields 1,4 | tr '/' ' ' | awk '{{ sum1 += $1; sum2 += $2 }} END {{ print sum2, sum1, sum1/sum2 }}') | tr " " ","
""")
for g in part_gpu:
os.system(f"""
gpu_util=`(date +"%F"| tr "\n" " " ; echo "{g} gpu" | tr "\n" " " ; sinfo -p {g} -o %n,%G | grep -Po '(?<=:)\d' | awk '{{ sum1 += $1 }} END {{ print sum1 }}' | tr "\n" " " ; squeue -O jobid,partition,gres,state,username | grep RUNNING | grep -i {g} | awk '{{ print $3 }}' | awk 'BEGIN {{ FS=":" }} {{ total+=$2 }} END {{ print total }}')`
(echo $gpu_util | tr "\n" " " ; if [ -z `echo $gpu_util | awk '{{ print $5 }}'` ]; then echo "0 0"; else echo `echo $gpu_util | awk '{{ print $5/$4 }}'`; fi) | tr " " ","
""")

if args.report2:
if len(part_cpu) == 0:
print('No CPU partition found in the config file!')
sys.exit(1)

print('date,partition,total_corehour,allocation_corehour,utilization')
print('date,partition,type,total,allocation,utilization')
for c in part_cpu:
tocpu = os.popen(f"sinfo --partition {c} --Node --format %C | cut --delimiter '/' --fields 4 | awk '{{ sum1 += $1 }} END {{ print sum1 }}'").read().strip()
data_ = os.popen(f"sacct --partition {c} --allusers --parsable2 --noheader --allocation --duplicates --format partition,start,end,ncpus --state CANCELLED,COMPLETED,FAILED,NODE_FAIL,PREEMPTED,TIMEOU --starttime $(date --date='1 day ago' +'%Y-%m-%d-%H:%M') --endtime $(date +'%Y-%m-%d-%H:%M')").read().strip().split("\n")
if len(data_[0]) > 0:
tmcpu = []
for i in data_:
rw_ = i.split('|')
tm1 = time.strptime(rw_[1], '%Y-%m-%dT%H:%M:%S')
tm2 = time.strptime(rw_[2], '%Y-%m-%dT%H:%M:%S')
dif = time.mktime(tm2) - time.mktime(tm1) # running time per second
tmcpu.append(int(rw_[3]) * int(dif))
else:
tmcpu = [0]
print(time.strftime('%Y-%m-%d'), c, round(int(tocpu)*24), round(sum(tmcpu)/3600), round(sum(tmcpu)/(int(tocpu)*86400),2), sep = ',')
alcpu = os.popen(f"sinfo --partition {c} --Node --format %C | cut --delimiter '/' --fields 1 | awk '{{ sum1 += $1 }} END {{ print sum1 }}'").read().strip()
print(time.strftime('%Y-%m-%d'), c, "cpu", tocpu, alcpu, round(int(alcpu)/int(tocpu),2),sep = ',')

for g in part_gpu:
togpu = os.popen(f"sinfo -p {g} -o %n,%G | grep -Po '(?<=:)\d' | awk '{{ sum1 += $1 }} END {{ print sum1 }}'").read().strip()
algpu = os.popen(f"squeue -O partition,gres,state | grep RUNNING | grep -i {g} | awk '{{ print $2 }}' | grep -Po '(?<=:)\d' | awk '{{ sum1 += $1 }} END {{ print sum1 }}'").read().strip()
if len(algpu) == 0:
algpu = 0
print(time.strftime('%Y-%m-%d'), g, "gpu", togpu, algpu, round(int(algpu)/int(togpu),2),sep = ',')
2 changes: 0 additions & 2 deletions docs/sbox.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ Command line options
key acceptances.
- ``--report``: Show current cluster utilization based on the running
jobs. It uses slurm ``sinfo`` and ``squeue`` commands.
- ``--report2``: Show cluster utilization based on ended jobs in last
day. It uses Slurm ``sacct`` command.

**Examples**

Expand Down
7 changes: 1 addition & 6 deletions share/man/man1/sbox.1
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
.SH NAME
sbox \- a simple toolbox for Slurm
.SH SYNOPSIS
sbox [-h] [-a] [-f] [-g] [-q] [-j JOBID] [-c] [-p] [-u UID] [-v] [--eff JOBID] [--history {day,week,month,year}] [--pending] [--running] [--cancel JOBID] [--qos] [--quota] [--ncpu] [--ngpu] [--gpu] [--license] [--reserve] [--topusage] [--whodat UID] [--whodat2 UNAME] [--agent {start,stop,list}] [--report] [--report2]
sbox [-h] [-a] [-f] [-g] [-q] [-j JOBID] [-c] [-p] [-u UID] [-v] [--eff JOBID] [--history {day,week,month,year}] [--pending] [--running] [--cancel JOBID] [--qos] [--quota] [--ncpu] [--ngpu] [--gpu] [--license] [--reserve] [--topusage] [--whodat UID] [--whodat2 UNAME] [--agent {start,stop,list}] [--report]
.br
.SH DESCRIPTION
.PP
Expand Down Expand Up @@ -148,11 +148,6 @@ key acceptances.
Show current cluster utilization based on the
running jobs.
It uses slurm sinfo and squeue commands.
.TP
.B --report2
Show cluster utilization based on ended jobs in
last day.
It uses Slurm sacct command.
.PP
Examples
.PP
Expand Down

0 comments on commit b526bb1

Please sign in to comment.