# RF F2PY SEQ 66 k

In [3]:
%%writefile rfns6.py
import time as tm, parf003ser

t0 = tm.time()    # time measurement

resu = parf003ser.random_forest(
    "datasets/asteroid-train-66k.arff",
    "datasets/asteroid-test-34k.arff"
)
p_error_count = resu[0]
p_oob_count = resu[1]
p_kappa_value = resu[2]
p_instance_count = resu[3]
p_error = resu[4]
p_testset_kappa_value = resu[5]
p_time = resu[6]
p_rank = resu[7]
p_size = resu[8]

t1 = tm.time()    # time measurement

if p_rank == 0 :
    print(f'Trainset classification error is',
          f'{p_error_count * 100 / p_oob_count :.2f}%',
          f'of {p_oob_count} (kappa: {p_kappa_value :.4f})')
    print(f' Testset classification error is {p_error * 100 :.2f}%',
          f'of {p_instance_count} (kappa: {p_testset_kappa_value :.4f})')
    print(f'T: {p_time :.4f}  |  N: {p_size :0g}')

Overwriting rfns6.py


## Copy files to /scratch

In [4]:
! cp rfns6.py /scratch${PWD#"/prj"}

## Slurm batch script

In [10]:
%%writefile rfns6.srm
#!/bin/bash
#SBATCH --job-name rfns6       # Job name
#SBATCH --partition cpu_small  # Select partition
#SBATCH --ntasks=1             # Total tasks
#SBATCH --time=00:05:00        # Limit execution time
#SBATCH --exclusive            # Exclusive acccess to nodes

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- Tasks per node:' $SLURM_NTASKS_PER_NODE
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks:' $SLURM_NTASKS
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
cd $SLURM_SUBMIT_DIR
echo -n '- List of nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# Environment
echo '-- modules ----------------------------'
module load intel_psxe
source /opt/intel/parallel_studio_xe_2020/intelpython3/etc/profile.d/conda.sh
cd                                              
SCR=/scratch${PWD#/prj}
conda activate --stack $SCR/env4
export I_MPI_VAR_CHECK_SPELLING=0
cd $SCR/rf

# Executable config
EXEC="python rfns6.py"

# Start
echo '-- run --------------------------------'
echo '$ srun -n' $SLURM_NTASKS ${EXEC##*/}
echo '-- output -----------------------------'
srun -n $SLURM_NTASKS $EXEC
echo '~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

Overwriting rfns6.srm


## Check

In [11]:
! sbatch --partition cpu_dev --ntasks=1 rfns6.srm

Submitted batch job 1349198


In [12]:
! squeue --name=rfns6 --partition=cpu_dev --format="%.8i  %.9P %.5j %.2t %.5M %.5D %.4C"

   JOBID  PARTITION  NAME ST  TIME NODES CPUS
 1349198    cpu_dev rfns6 PD  0:00     1    1


In [17]:
! squeue --name=rfns6 --partition=cpu_dev --format="%.8i  %.9P %.5j %.2t %.5M %.5D %.4C"

   JOBID  PARTITION  NAME ST  TIME NODES CPUS


In [18]:
! cat /scratch${PWD#"/prj"}/slurm-1349198.out

- Job ID: 1349198
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- List of nodes allocated to the job: sdumont1254
-- modules ----------------------------
-- run --------------------------------
$ srun -n 1 python rfns6.py
-- output -----------------------------
Trainset classification error is 0.05% of 66000 (kappa: 0.9917)
 Testset classification error is 0.47% of 34000 (kappa: 0.9283)
T: 136.7781  |  N: 1
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## Run

In [19]:
%%bash
sbatch rfns6.srm
sbatch rfns6.srm
sbatch rfns6.srm

Submitted batch job 1349210
Submitted batch job 1349211
Submitted batch job 1349212


In [2]:
%%bash
cat /scratch${PWD#/prj}/slurm-1349210.out
cat /scratch${PWD#/prj}/slurm-1349211.out
cat /scratch${PWD#/prj}/slurm-1349212.out

- Job ID: 1349210
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- List of nodes allocated to the job: sdumont1286
-- modules ----------------------------
-- run --------------------------------
$ srun -n 1 python rfns6.py
-- output -----------------------------
Trainset classification error is 0.06% of 66000 (kappa: 0.9908)
 Testset classification error is 0.51% of 34000 (kappa: 0.9234)
T: 135.3215  |  N: 1
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1349211
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- List of nodes allocated to the job: sdumont1286
-- modules ----------------------------
-- run --------------------------------
$ srun -n 1 python rfns6.py
-- output -----------------------------
Trainset classification error is 0.06% of 66000 (kappa: 0.9906)
 Testset classification error is 0.65% of 34000 (kappa: 0.9020)
T: 134.1588  |  N: 1
~~ end ~~~~~~~~~~~~~~~~~~~~~

In [21]:
! squeue -u $(whoami) -h -t pending,running -r | wc -l

48


In [22]:
! squeue --partition=cpu_small -h -t pending,running -r | wc -l

315


In [24]:
! squeue --start --name=rfns6 -o "%S  %.8i  %.9P %.5j %.2t %.5M %.5D %.4C" --sort "i"

START_TIME     JOBID  PARTITION  NAME ST  TIME NODES CPUS
N/A   1349210  cpu_small rfns6 PD  0:00     1    1
N/A   1349211  cpu_small rfns6 PD  0:00     1    1
N/A   1349212  cpu_small rfns6 PD  0:00     1    1


In [1]:
! squeue --start --name=rfns6 -o "%S  %.8i  %.9P %.5j %.2t %.5M %.5D %.4C" --sort "i"

START_TIME     JOBID  PARTITION  NAME ST  TIME NODES CPUS
