# RANDOM FOREST CYTHON MPI

In [1]:
%%writefile rfcm.pyx
#cython: boundscheck=False, wraparound=False, cdivision=True
#cython: initializedcheck=False, language_level=3, infer_types=True
def rfcmf(trainset, testset) :
    import logging, os, sys, datetime
    import pandas as pd, numpy as np
    from sklearn.impute import SimpleImputer
    from sklearn.ensemble import RandomForestClassifier
    from sklearn import metrics
    from scipy.io import arff
    import ipyparallel as ipp
    from ipyparallel.joblib import IPythonParallelBackend
    from joblib import Parallel, parallel_backend
    from joblib import register_parallel_backend
    from joblib import delayed, cpu_count
    from time import time
    t = time()

    # Get & prepare data
    data = arff.loadarff(trainset)
    df = pd.DataFrame(data[0])
    df = df.replace(b'N', 0)
    df = df.replace(b'Y', 1)
    df['class'] = df['class'].str.decode('utf-8').fillna(df['class'])
    y_train = df['class']
    X_train = df.drop(columns=['class'])
    imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')
    df2 = pd.DataFrame(imp.fit_transform(X_train))
    df2.columns = X_train.columns
    df2.index = X_train.index
    X_train = df2

    datat = arff.loadarff(testset)
    df = pd.DataFrame(datat[0])
    df = df.replace(b'N', 0)
    df = df.replace(b'Y', 1)
    df['class'] = df['class'].str.decode('utf-8').fillna(df['class'])
    y_test = df['class']
    X_test = df.drop(columns = ['class'])
    imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')
    df2 = pd.DataFrame(imp.fit_transform(X_test))
    df2.columns = X_test.columns
    df2.index = X_test.index
    X_test = df2

    clf = RandomForestClassifier(n_estimators = 100)
    with parallel_backend('ipyparallel') :
        clf.fit(X_train, y_train)
    y_pred_test  = clf.predict(X_test)
    y_pred_train = clf.predict(X_train)
    accu = metrics.accuracy_score(y_train, y_pred_train,
                                  normalize = False)
    trtrsi = y_train.size
    trperr = ((trtrsi - accu) / (trtrsi)) * 100
    trkapp = metrics.cohen_kappa_score(y_train, y_pred_train)
    
    accu = metrics.accuracy_score(y_test, y_pred_test, 
                                  normalize = False)
    tetrsi = y_test.size
    teperr = ((tetrsi - accu) / (tetrsi)) * 100
    tekapp = metrics.cohen_kappa_score(y_test, y_pred_test)

    return trtrsi, trperr, trkapp, tetrsi, teperr, tekapp

Overwriting rfcm.pyx


In [2]:
%%writefile setups.py
from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
setup(
    name='rfcm',
    ext_modules=[
        Extension('rfcm',
            sources=['rfcm.pyx'],
            extra_compile_args=['-O3']
        )
    ],
    cmdclass = {'build_ext': build_ext}
)

Overwriting setups.py


In [3]:
%%bash
rm -f rfcm*.so  # clean
python setups.py build_ext --inplace --quiet

running build_ext
cythoning rfcm.pyx to rfcm.c
building 'rfcm' extension
/scratch/ampemi/xxxx.xxxx/env2/bin/x86_64-conda_cos6-linux-gnu-cc -Wno-unused-result -Wsign-compare -DNDEBUG -fwrapv -O2 -Wall -Wstrict-prototypes -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -pipe -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /scratch/ampemi/xxxx.xxxx/env2/include -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /scratch/ampemi/xxxx.xxxx/env2/include -fPIC -I/scratch/ampemi/xxxx.xxxx/env2/include/python3.8 -c rfcm.c -o build/temp.linux-x86_64-3.8/rfcm.o -O3
x86_64-conda_cos6-linux-gnu-gcc -pthread -shared -Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,-rpath,/scratch/ampemi/xxxx.xxxx/env2/lib -L/scratch/ampemi/xxxx.xxxx/env2/lib -Wl,-O2 -Wl,--sort-common -Wl,--as-n

In [14]:
! ls -gGh rfcm*.so

-rwxr-xr-x 1 1,3M Set 28 14:02 rfcm.cpython-38-x86_64-linux-gnu.so


In [15]:
import rfcm
help(rfcm)

Help on module rfcm:

NAME
    rfcm

FUNCTIONS
    rfcmf(...)

DATA
    __test__ = {}

FILE
    /prj/ampemi/xxxx.xxxx/rf/rfcm.cpython-38-x86_64-linux-gnu.so




In [5]:
%%writefile rfcmc6.py
import argparse
from time import time
from rfcm import rfcmf
import ipyparallel as ipp
from ipyparallel.joblib import IPythonParallelBackend
from joblib import Parallel, parallel_backend
from joblib import register_parallel_backend
from joblib import delayed, cpu_count

t0 = time()
trainset = "datasets/asteroid-train-66k.arff"
testset  = "datasets/asteroid-test-34k.arff"
parser = argparse.ArgumentParser()
parser.add_argument("-p", "--profile", required=True,
    help="Name of IPython profile to use")
profile = parser.parse_args().profile

# Prepare the engines
c = ipp.Client(profile = profile)
ncli = len(c.ids)
bview = c.load_balanced_view()
register_parallel_backend('ipyparallel',
    lambda : IPythonParallelBackend(view = bview) )

( trtrsi, trperr, trkapp, tetrsi, teperr, tekapp
    ) = rfcmf(trainset, testset)

# Shutdown the engines
c.shutdown(hub=True, block=False)

# Result
t1 = time() - t0
print(f'Trainset classification error is {trperr:.2f}% ',
      f'of {trtrsi} (kappa: {trkapp:.4f})')
print(f' Testset classification error is {teperr:.2f}% ',
      f'of {tetrsi} (kappa: {tekapp:.4f})')
print(f"T: {t1:.4f}  |  N: {ncli:0g}")

Overwriting rfcmc6.py


## Copy to /scratch

In [6]:
! cp rfcm* /scratch${PWD#/prj}

## SLURM script

In [7]:
%%writefile rfcm6.srm
#!/bin/bash -l
#SBATCH --job-name rfcm6        # Job name
#SBATCH --partition cpu_small  # Select partition
#SBATCH --ntasks=1             # Total tasks(CPUs)
#SBATCH --time=00:10:00        # Limit execution time
#SBATCH --exclusive            # Exclusive acccess to nodes

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks:' $SLURM_NTASKS
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
cd $SLURM_SUBMIT_DIR
echo -n '- Nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# get path
RF=/scratch${PWD#/prj}
SCR=${RF%/rf}

# path to a directory which IPython will use for user data
export IPYTHONSCR=$SCR/.ipython
              
# Load Python environment and MPI module
source $SCR/env2/etc/profile.d/conda.sh
conda activate $SCR/env2
module load openmpi/gnu/4.0.1

echo -n '<1. starting ipython>        ' && date
# create a new ipython profile appended with the job id number
PROFILE=job_${SLURM_JOB_ID}
ipython profile create ${PROFILE} --parallel --quiet

echo -n '<2. starting ipcontroller>   ' && date
# run ipcontroler on one core
ipcontroller --ip="*" --profile=${PROFILE} --quiet &
sleep 10

echo -n '<3. starting srun ipengine>  ' && date
# run ipengine on each available core
srun --mpi=pmi2 -n $SLURM_NTASKS \
    ipengine --location=$(hostname) --profile=${PROFILE} --quiet &
sleep 25

# Executable
EXEC='rfcmc6.py'

# run the script
echo -n '<4. starting python script > ' && date
echo '-- output -----------------------------'
python ${EXEC} --profile ${PROFILE}
echo '-- end --------------------------------'
echo -n '<5. quit>                    ' && date

Overwriting rfcm6.srm


## Check

In [8]:
! sbatch --partition cpu_dev --ntasks=96 rfcm6.srm

Submitted batch job 1366933


In [9]:
! squeue --name=rfcm6 --partition=cpu_dev --format="%.20S %.8i %.9P %.5j %.2t %.5M %.5D %.4C"

          START_TIME    JOBID PARTITION  NAME ST  TIME NODES CPUS
 2021-09-28T14:35:33  1366933   cpu_dev rfcm6  R  0:00     4   96


In [11]:
! squeue --name=rfcm6 --partition=cpu_dev --format="%.20S %.8i %.9P %.5j %.2t %.5M %.5D %.4C"

          START_TIME    JOBID PARTITION  NAME ST  TIME NODES CPUS


In [12]:
! cat /scratch${PWD#/prj}/slurm-1366933.out

- Job ID: 1366933
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1249 sdumont1250 sdumont1251 sdumont1252
<1. starting ipython>        Ter Set 28 14:35:40 -03 2021
<2. starting ipcontroller>   Ter Set 28 14:35:58 -03 2021
<3. starting srun ipengine>  Ter Set 28 14:36:08 -03 2021
<4. starting python script > Ter Set 28 14:36:33 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 16.0892  |  N: 96
-- end --------------------------------
<5. quit>                    Ter Set 28 14:36:51 -03 2021


<hr style="height:10px;border-width:0;background-color:green">

## Run

### 1 of (1, 4, 16, 24, 48, 72, 96)

In [13]:
%%bash
sbatch --ntasks=1 rfcm6.srm
sbatch --ntasks=1 rfcm6.srm
sbatch --ntasks=1 rfcm6.srm
sbatch --ntasks=1 rfcm6.srm

Submitted batch job 1366936
Submitted batch job 1366937
Submitted batch job 1366938
Submitted batch job 1366939


In [8]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1366936.out
cat /scratch${PWD#"/prj"}/slurm-1366937.out
cat /scratch${PWD#"/prj"}/slurm-1366938.out
cat /scratch${PWD#"/prj"}/slurm-1366939.out

- Job ID: 1366936
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1455
<1. starting ipython>        Ter Set 28 19:10:16 -03 2021
<2. starting ipcontroller>   Ter Set 28 19:10:33 -03 2021
<3. starting srun ipengine>  Ter Set 28 19:10:43 -03 2021
<4. starting python script > Ter Set 28 19:11:08 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 57.1904  |  N: 1
-- end --------------------------------
<5. quit>                    Ter Set 28 19:12:09 -03 2021
srun: Job step aborted: Waiting up to 302 seconds for job step to finish.
slurmstepd: error: *** STEP 1366936.0 ON sdumont1455 CANCELLED AT 2021-09-28T19:12:09 ***
srun: error: sdumont1455: task 0: Terminated
- Job ID: 1366937
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- Nodes allocated to the jo

<hr style="height:10px;border-width:0;background-color:green">

### 4 of (1, 4, 16, 24, 48, 72, 96)

In [14]:
%%bash
sbatch --ntasks=4 rfcm6.srm
sbatch --ntasks=4 rfcm6.srm
sbatch --ntasks=4 rfcm6.srm
sbatch --ntasks=4 rfcm6.srm

Submitted batch job 1366940
Submitted batch job 1366941
Submitted batch job 1366942
Submitted batch job 1366943


In [7]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1366940.out
cat /scratch${PWD#"/prj"}/slurm-1366941.out
cat /scratch${PWD#"/prj"}/slurm-1366942.out
cat /scratch${PWD#"/prj"}/slurm-1366943.out

- Job ID: 1366940
- # of nodes in the job: 1
- # of tasks: 4
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1503
<1. starting ipython>        Ter Set 28 23:52:57 -03 2021
<2. starting ipcontroller>   Ter Set 28 23:53:04 -03 2021
<3. starting srun ipengine>  Ter Set 28 23:53:14 -03 2021
<4. starting python script > Ter Set 28 23:53:39 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 18.3048  |  N: 4
-- end --------------------------------
<5. quit>                    Ter Set 28 23:53:59 -03 2021
srun: Job step aborted: Waiting up to 302 seconds for job step to finish.
slurmstepd: error: *** STEP 1366940.0 ON sdumont1503 CANCELLED AT 2021-09-28T23:53:59 ***
srun: error: sdumont1503: tasks 0-3: Terminated
- Job ID: 1366941
- # of nodes in the job: 1
- # of tasks: 4
- Dir from which sbatch was invoked: rf
- Nodes allocated to the

<hr style="height:10px;border-width:0;background-color:green">

### 16 of (1, 4, 16, 24, 48, 72, 96)

In [15]:
%%bash
sbatch --ntasks=16 rfcm6.srm
sbatch --ntasks=16 rfcm6.srm
sbatch --ntasks=16 rfcm6.srm
sbatch --ntasks=16 rfcm6.srm

Submitted batch job 1366944
Submitted batch job 1366945
Submitted batch job 1366946
Submitted batch job 1366947


In [6]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1366944.out
cat /scratch${PWD#"/prj"}/slurm-1366945.out
cat /scratch${PWD#"/prj"}/slurm-1366946.out
cat /scratch${PWD#"/prj"}/slurm-1366947.out

- Job ID: 1366944
- # of nodes in the job: 1
- # of tasks: 16
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284
<1. starting ipython>        Qua Set 29 00:02:09 -03 2021
<2. starting ipcontroller>   Qua Set 29 00:02:17 -03 2021
<3. starting srun ipengine>  Qua Set 29 00:02:27 -03 2021
<4. starting python script > Qua Set 29 00:02:52 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 15.1444  |  N: 16
-- end --------------------------------
<5. quit>                    Qua Set 29 00:03:09 -03 2021
srun: Job step aborted: Waiting up to 302 seconds for job step to finish.
slurmstepd: error: *** STEP 1366944.0 ON sdumont1284 CANCELLED AT 2021-09-29T00:03:09 ***
srun: error: sdumont1284: tasks 0-11: Terminated
- Job ID: 1366945
- # of nodes in the job: 1
- # of tasks: 16
- Dir from which sbatch was invoked: rf
- Nodes allocated to

<hr style="height:10px;border-width:0;background-color:green">

### 24 of (1, 4, 16, 24, 48, 72, 96)

In [16]:
%%bash
sbatch --ntasks=24 rfcm6.srm
sbatch --ntasks=24 rfcm6.srm
sbatch --ntasks=24 rfcm6.srm
sbatch --ntasks=24 rfcm6.srm

Submitted batch job 1366948
Submitted batch job 1366949
Submitted batch job 1366950
Submitted batch job 1366951


In [5]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1366948.out
cat /scratch${PWD#"/prj"}/slurm-1366949.out
cat /scratch${PWD#"/prj"}/slurm-1366950.out
cat /scratch${PWD#"/prj"}/slurm-1366951.out

- Job ID: 1366948
- # of nodes in the job: 1
- # of tasks: 24
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284
<1. starting ipython>        Qua Set 29 00:07:52 -03 2021
<2. starting ipcontroller>   Qua Set 29 00:07:59 -03 2021
<3. starting srun ipengine>  Qua Set 29 00:08:09 -03 2021
<4. starting python script > Qua Set 29 00:08:34 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 15.7684  |  N: 24
-- end --------------------------------
<5. quit>                    Qua Set 29 00:08:52 -03 2021
- Job ID: 1366949
- # of nodes in the job: 1
- # of tasks: 24
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284
<1. starting ipython>        Qua Set 29 00:09:03 -03 2021
<2. starting ipcontroller>   Qua Set 29 00:09:08 -03 2021
<3. starting srun ipengine>  Qua Set 29 00:09:18 -03 2021
<4. starting pyth

<hr style="height:10px;border-width:0;background-color:green">

### 48 of (1, 4, 16, 24, 48, 72, 96)

In [17]:
%%bash
sbatch --ntasks=48 rfcm6.srm
sbatch --ntasks=48 rfcm6.srm
sbatch --ntasks=48 rfcm6.srm
sbatch --ntasks=48 rfcm6.srm

Submitted batch job 1366952
Submitted batch job 1366953
Submitted batch job 1366954
Submitted batch job 1366955


In [4]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1366952.out
cat /scratch${PWD#"/prj"}/slurm-1366953.out
cat /scratch${PWD#"/prj"}/slurm-1366954.out
cat /scratch${PWD#"/prj"}/slurm-1366955.out

- Job ID: 1366952
- # of nodes in the job: 2
- # of tasks: 48
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284 sdumont1472
<1. starting ipython>        Qua Set 29 00:12:34 -03 2021
<2. starting ipcontroller>   Qua Set 29 00:12:39 -03 2021
<3. starting srun ipengine>  Qua Set 29 00:12:49 -03 2021
<4. starting python script > Qua Set 29 00:13:14 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 16.8985  |  N: 48
-- end --------------------------------
<5. quit>                    Qua Set 29 00:13:33 -03 2021
srun: Job step aborted: Waiting up to 302 seconds for job step to finish.
slurmstepd: error: *** STEP 1366952.0 ON sdumont1284 CANCELLED AT 2021-09-29T00:13:33 ***
- Job ID: 1366953
- # of nodes in the job: 2
- # of tasks: 48
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284 sdumont1472
<1.

<hr style="height:10px;border-width:0;background-color:green">

### 72 of (1, 4, 16, 24, 48, 72, 96)

In [18]:
%%bash
sbatch --ntasks=72 rfcm6.srm
sbatch --ntasks=72 rfcm6.srm
sbatch --ntasks=72 rfcm6.srm
sbatch --ntasks=72 rfcm6.srm

Submitted batch job 1366956
Submitted batch job 1366957
Submitted batch job 1366958
Submitted batch job 1366959


In [3]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1366956.out
cat /scratch${PWD#"/prj"}/slurm-1366957.out
cat /scratch${PWD#"/prj"}/slurm-1366958.out
cat /scratch${PWD#"/prj"}/slurm-1366959.out

- Job ID: 1366956
- # of nodes in the job: 3
- # of tasks: 72
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284 sdumont1455 sdumont1466
<1. starting ipython>        Qua Set 29 00:18:25 -03 2021
<2. starting ipcontroller>   Qua Set 29 00:18:29 -03 2021
<3. starting srun ipengine>  Qua Set 29 00:18:39 -03 2021
<4. starting python script > Qua Set 29 00:19:04 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 15.5817  |  N: 72
-- end --------------------------------
<5. quit>                    Qua Set 29 00:19:21 -03 2021
- Job ID: 1366957
- # of nodes in the job: 3
- # of tasks: 72
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284 sdumont1455 sdumont1466
<1. starting ipython>        Qua Set 29 00:19:33 -03 2021
<2. starting ipcontroller>   Qua Set 29 00:19:37 -03 2021
<3. starting srun ipengine>

<hr style="height:10px;border-width:0;background-color:green">

### 96 of (1, 4, 16, 24, 48, 72, 96)

In [19]:
%%bash
sbatch --ntasks=96  rfcm6.srm
sbatch --ntasks=96  rfcm6.srm
sbatch --ntasks=96  rfcm6.srm
sbatch --ntasks=96  rfcm6.srm

Submitted batch job 1366960
Submitted batch job 1366961
Submitted batch job 1366962
Submitted batch job 1366963


In [2]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1366960.out
cat /scratch${PWD#"/prj"}/slurm-1366961.out
cat /scratch${PWD#"/prj"}/slurm-1366962.out
cat /scratch${PWD#"/prj"}/slurm-1366963.out

- Job ID: 1366960
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdumont1284 sdumont1455 sdumont1466 sdumont1472
<1. starting ipython>        Qua Set 29 00:24:05 -03 2021
<2. starting ipcontroller>   Qua Set 29 00:24:11 -03 2021
<3. starting srun ipengine>  Qua Set 29 00:24:21 -03 2021
<4. starting python script > Qua Set 29 00:24:46 -03 2021
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 16.7496  |  N: 96
-- end --------------------------------
<5. quit>                    Qua Set 29 00:25:04 -03 2021
slurmstepd: error: *** STEP 1366960.0 ON sdumont1284 CANCELLED AT 2021-09-29T00:25:04 ***
srun: Job step aborted: Waiting up to 302 seconds for job step to finish.
- Job ID: 1366961
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: rf
- Nodes allocated to the job: sdu

In [26]:
! squeue --start --user=$(whoami) --format="%.20S %.8i %.9P %.5j %.2t %.5M %.5D %.4C"

          START_TIME    JOBID PARTITION  NAME ST  TIME NODES CPUS
                 N/A  1366936 cpu_small rfcm6 PD  0:00     1    1
                 N/A  1366937 cpu_small rfcm6 PD  0:00     1    1
                 N/A  1366938 cpu_small rfcm6 PD  0:00     1    1
                 N/A  1366939 cpu_small rfcm6 PD  0:00     1    1
                 N/A  1366940 cpu_small rfcm6 PD  0:00     1    4
                 N/A  1366941 cpu_small rfcm6 PD  0:00     1    4
                 N/A  1366942 cpu_small rfcm6 PD  0:00     1    4
                 N/A  1366943 cpu_small rfcm6 PD  0:00     1    4
                 N/A  1366944 cpu_small rfcm6 PD  0:00     1   16
                 N/A  1366945 cpu_small rfcm6 PD  0:00     1   16
                 N/A  1366946 cpu_small rfcm6 PD  0:00     1   16
                 N/A  1366947 cpu_small rfcm6 PD  0:00     1   16
                 N/A  1366948 cpu_small rfcm6 PD  0:00     1   24
                 N/A  1366949 cpu_small rfcm6 PD  0:00     1   24
          

In [1]:
! squeue --start --user=$(whoami) --format="%.20S %.8i %.9P %.5j %.2t %.5M %.5D %.4C"

          START_TIME    JOBID PARTITION  NAME ST  TIME NODES CPUS


In [23]:
! squeue -u $(whoami) -h -r | wc -l    # 100 é o máximo por projeto

28


In [24]:
! squeue --partition=cpu_small -h -r | wc -l

359


<hr style="height:10px;border-width:0;background-color:red">