Skip to content

Commit

Permalink
Add a new report option to sbox
Browse files Browse the repository at this point in the history
  • Loading branch information
ashki23 committed Dec 8, 2021
1 parent bd6bf5b commit 0f04a19
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 14 deletions.
3 changes: 2 additions & 1 deletion README.md
Expand Up @@ -53,7 +53,8 @@ Sbox is a toolbox for Slurm that provides information about users' accounts and
- `--whodat`: Show users informations by UID. It uses `ldapsearch` command and returns empty output if the cluster does not use LDAP.
- `--whodat2`: Show users informations by name. It uses `ldapsearch`command and returns empty output if the cluster does not use LDAP.
- `--agent`: Start, stop and list user's ssh-agents on the current host. It requires one of the start/stop/list options as an argument. Use `ssh -o StrictHostKeyChecking=no` to disable asking for host key acceptances.
- `--report`: Show the cluster utilization report. It uses `sinfo` and `squeue` commands.
- `--report`: Show current cluster utilization based on the running jobs. It uses slurm `sinfo` and `squeue` commands.
- `--report2`: Show cluster utilization based on ended jobs in last day. It uses Slurm `sacct` command.

**Examples**

Expand Down
31 changes: 27 additions & 4 deletions bin/sbox
Expand Up @@ -11,6 +11,7 @@ import os
import re
import sys
import json
import time
import pathlib
import argparse

Expand Down Expand Up @@ -44,10 +45,10 @@ parser.add_argument('-p', '--partition', action = 'store_true', help = 'show par
parser.add_argument('-u', '--user', default = user, help = 'user id', metavar = 'UID')
parser.add_argument('-v', '--version', action = 'version', version = '%(prog)s 1.2')
parser.add_argument('--eff', type = int, help = 'show efficiency of a job', metavar = 'JOBID')
parser.add_argument('--history', choices = ['day','week','month','year'], help = 'show jobs history for last day/week/month/year')
parser.add_argument('--history', choices = ['day','week','month','year'], help = 'show jobs history')
parser.add_argument('--pending', action = 'store_true', help = 'show pending jobs')
parser.add_argument('--running', action = 'store_true', help = 'show running jobs')
parser.add_argument('--cancel', help = 'cancel jobs by a single id or a comma separated list of ids', metavar = 'JOBID')
parser.add_argument('--cancel', help = 'cancel jobs by job id', metavar = 'JOBID')
parser.add_argument('--qos', action = 'store_true', help = 'show quality of services')
parser.add_argument('--quota', action = 'store_true', help = 'show quotas')
parser.add_argument('--ncpu', action = 'store_true', help = 'show number of available cpus')
Expand All @@ -58,8 +59,9 @@ parser.add_argument('--reserve', action = 'store_true', help = 'show reservation
parser.add_argument('--topusage', action = 'store_true', help = 'show top usage users')
parser.add_argument('--whodat', help = 'show users informations by uid', metavar = 'UID')
parser.add_argument('--whodat2', help = 'show users informations by name', metavar = 'UNAME')
parser.add_argument('--agent', choices = ['start','stop','list'], help = 'start/stop/list ssh-agents on the current host')
parser.add_argument('--report', action = 'store_true', help = 'show the cluster utilization report')
parser.add_argument('--agent', choices = ['start','stop','list'], help = 'start/stop/list ssh-agents on a host')
parser.add_argument('--report', action = 'store_true', help = 'show current cluster utilization')
parser.add_argument('--report2', action = 'store_true', help = 'show cluster utilization in last 24 hours')
args = parser.parse_args()

if len(sys.argv) == 1:
Expand Down Expand Up @@ -392,3 +394,24 @@ if args.report:
gpu_util=`(date +"%F"| tr "\n" " " ; echo "{g} gpu" | tr "\n" " " ; sinfo -p {g} -o %n,%G | grep -Po '(?<=:)\d' | awk '{{ sum1 += $1 }} END {{ print sum1 }}' | tr "\n" " " ; squeue -O jobid,partition,gres,state,username | grep RUNNING | grep -i {g} | awk '{{ print $3 }}' | awk 'BEGIN {{ FS=":" }} {{ total+=$2 }} END {{ print total }}')`
(echo $gpu_util | tr "\n" " " ; if [ -z `echo $gpu_util | awk '{{ print $5 }}'` ]; then echo "0 0"; else echo `echo $gpu_util | awk '{{ print $5/$4 }}'`; fi) | tr " " ","
""")

if args.report2:
if len(part_cpu) == 0:
print('No CPU partition found in the config file!')
sys.exit(1)

print('date,partition,total_corehour,allocation_corehour,utilization')
for c in part_cpu:
tocpu = os.popen(f"sinfo --partition {c} --Node --format %C | cut --delimiter '/' --fields 4 | awk '{{ sum1 += $1 }} END {{ print sum1 }}'").read().strip()
data_ = os.popen(f"sacct --partition {c} --allusers --parsable2 --noheader --allocation --duplicates --format partition,start,end,ncpus --state CANCELLED,COMPLETED,FAILED,NODE_FAIL,PREEMPTED,TIMEOU --starttime $(date --date='1 day ago' +'%Y-%m-%d-%H:%M') --endtime $(date +'%Y-%m-%d-%H:%M')").read().strip().split("\n")
if len(data_[0]) > 0:
tmcpu = []
for i in data_:
rw_ = i.split('|')
tm1 = time.strptime(rw_[1], '%Y-%m-%dT%H:%M:%S')
tm2 = time.strptime(rw_[2], '%Y-%m-%dT%H:%M:%S')
dif = time.mktime(tm2) - time.mktime(tm1) # running time per second
tmcpu.append(int(rw_[3]) * int(dif))
else:
tmcpu = [0]
print(time.strftime('%Y-%m-%d'), c, round(int(tocpu)*24), round(sum(tmcpu)/3600), round(sum(tmcpu)/(int(tocpu)*86400),2), sep = ',')
4 changes: 2 additions & 2 deletions docs/_static/docs-generator.sh
Expand Up @@ -4,10 +4,10 @@

## Create an env for Sphinx and Pandoc
#conda create -n sphinx -c conda-forge sphinx pandoc
conda activate sphinx
source activate sphinx

## Build the local html for preview at ../_build/html/
make -C ../ html
#make -C ../ html

## RST Docs
awk "/## Sbox/,/## Quick install/" ../../README.md | head -n -1 > sbox.md
Expand Down
6 changes: 4 additions & 2 deletions docs/sbox.rst
Expand Up @@ -76,8 +76,10 @@ Command line options
host. It requires one of the start/stop/list options as an argument.
Use ``ssh -o StrictHostKeyChecking=no`` to disable asking for host
key acceptances.
- ``--report``: Show the cluster utilization report. It uses ``sinfo``
and ``squeue`` commands.
- ``--report``: Show current cluster utilization based on the running
jobs. It uses slurm ``sinfo`` and ``squeue`` commands.
- ``--report2``: Show cluster utilization based on ended jobs in last
day. It uses Slurm ``sacct`` command.

**Examples**

Expand Down
3 changes: 2 additions & 1 deletion share/man/man1/interactive.1
@@ -1,7 +1,8 @@
.TH SBOX "1" "November 2021" "SBOX 1.2"
.TH SBOX "1" "December 2021" "SBOX 1.2"
.SH NAME
interactive \- an alias for using cluster interactively
.SH SYNOPSIS
interactive [-h] [-a] [-n] [-N] [-p] [-t] [-k] [-e] [-E] [-l] [-m] [-g] [{jupyter}]
.br
.SH DESCRIPTION
.PP
Expand Down
14 changes: 10 additions & 4 deletions share/man/man1/sbox.1
@@ -1,8 +1,8 @@
.TH SBOX "1" "November 2021" "SBOX 1.2"
.TH SBOX "1" "December 2021" "SBOX 1.2"
.SH NAME
sbox \- a simple toolbox for Slurm
.SH SYNOPSIS
sbox [-h] [-a] [-f] [-g] [-q] [-j JOBID] [-c] [-p] [-u UID] [-v] [--eff JOBID] [--history {day,week,month,year}] [--pending] [--running] [--cancel JOBID] [--qos] [--quota] [--ncpu] [--ngpu] [--gpu] [--license] [--reserve] [--topusage] [--whodat UID] [--whodat2 UNAME] [--agent {start,stop,list}] [--report]
sbox [-h] [-a] [-f] [-g] [-q] [-j JOBID] [-c] [-p] [-u UID] [-v] [--eff JOBID] [--history {day,week,month,year}] [--pending] [--running] [--cancel JOBID] [--qos] [--quota] [--ncpu] [--ngpu] [--gpu] [--license] [--reserve] [--topusage] [--whodat UID] [--whodat2 UNAME] [--agent {start,stop,list}] [--report] [--report2]
.br
.SH DESCRIPTION
.PP
Expand Down Expand Up @@ -145,8 +145,14 @@ Use ssh -o StrictHostKeyChecking=no to disable asking for host
key acceptances.
.TP
.B --report
Show the cluster utilization report.
It uses sinfo and squeue commands.
Show current cluster utilization based on the
running jobs.
It uses slurm sinfo and squeue commands.
.TP
.B --report2
Show cluster utilization based on ended jobs in
last day.
It uses Slurm sacct command.
.PP
Examples
.PP
Expand Down

0 comments on commit 0f04a19

Please sign in to comment.