# RF PYTHON SEQ 66 k

In [1]:
%%writefile rfps6.py
import pandas as pd
import numpy as np
import sys
from scipy.io import arff
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from time import time
t = time()

data = arff.loadarff(sys.argv[1])
df = pd.DataFrame(data[0])
df = df.replace(b'N', 0)
df = df.replace(b'Y', 1)
df['class'] = df['class'].str.decode('utf-8').fillna(df['class'])
y_train = df['class']
X_train = df.drop(columns=['class'])
imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')
df2 = pd.DataFrame(imp.fit_transform(X_train))
df2.columns = X_train.columns
df2.index = X_train.index
X_train = df2

datat = arff.loadarff(sys.argv[2])
df = pd.DataFrame(datat[0])
df = df.replace(b'N', 0)
df = df.replace(b'Y', 1)
df['class'] = df['class'].str.decode('utf-8').fillna(df['class'])
y_test = df['class']
X_test = df.drop(columns = ['class'])
imp = SimpleImputer(missing_values = np.nan, strategy = 'mean')
df2 = pd.DataFrame(imp.fit_transform(X_test))
df2.columns = X_test.columns
df2.index = X_test.index
X_test = df2

clf = RandomForestClassifier(n_estimators = 100)
clf.fit(X_train, y_train)
y_pred_test  = clf.predict(X_test)
y_pred_train = clf.predict(X_train)
accu = metrics.accuracy_score(y_train, y_pred_train, normalize = False)
trsi = y_train.size
perr = ((trsi - accu) / (trsi)) * 100
kapp = metrics.cohen_kappa_score(y_train, y_pred_train)
print(f'Trainset classification error is {perr:.2f}% ',
      f'of {trsi} (kappa: {kapp:.4f})')
accu = metrics.accuracy_score(y_test, y_pred_test, normalize = False)
trsi = y_test.size
perr = ((trsi - accu) / (trsi)) * 100
kapp = metrics.cohen_kappa_score(y_test, y_pred_test)
print(f' Testset classification error is {perr:.2f}% ',
      f'of {trsi} (kappa: {kapp:.4f})')

t = time() - t
print(f"T: {t:.4f} s")

Overwriting rfps6.py


## Check

In [2]:
%%bash
RF=/scratch${PWD#/prj}
SCR=${RF%/rf}
source $SCR/env2/etc/profile.d/conda.sh
conda activate $SCR/env2
/usr/bin/time -v python rfps6.py $RF/datasets/asteroid-train-66k.arff $RF/datasets/asteroid-test-34k.arff

Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 21.1073 s


	Command being timed: "python rfps6.py /scratch/ampemi/eduardo.miranda2/rf/datasets/asteroid-train-66k.arff /scratch/ampemi/eduardo.miranda2/rf/datasets/asteroid-test-34k.arff"
	User time (seconds): 21.55
	System time (seconds): 1.68
	Percent of CPU this job got: 36%
	Elapsed (wall clock) time (h:mm:ss or m:ss): 1:02.99
	Average shared text size (kbytes): 0
	Average unshared data size (kbytes): 0
	Average stack size (kbytes): 0
	Average total size (kbytes): 0
	Maximum resident set size (kbytes): 246668
	Average resident set size (kbytes): 0
	Major (requiring I/O) page faults: 0
	Minor (reclaiming a frame) page faults: 207611
	Voluntary context switches: 13540
	Involuntary context switches: 27
	Swaps: 0
	File system inputs: 421168
	File system outputs: 0
	Socket messages sent: 0
	Socket messages received: 0
	Signals delivered: 0
	Page size (bytes): 4096
	Exit status: 0


## Copy to scratch

In [4]:
! cp rfps6.py /scratch${PWD#/prj}

## Slurm script

In [5]:
%%writefile rfps6.srm
#!/bin/bash
#SBATCH --job-name rfps6       # Job name
#SBATCH --partition cpu_small  # Select partition
#SBATCH --ntasks=1             # Total tasks(CPUs)
#SBATCH --time=00:05:00        # Limit execution time
#SBATCH --exclusive            # Exclusive acccess to nodes

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks:' $SLURM_NTASKS
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
cd $SLURM_SUBMIT_DIR
echo -n '- List of nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# Environment
echo '-- modules ----------------------------'
RF=/scratch${PWD#/prj}
SCR=${RF%/rf}
source $SCR/env2/etc/profile.d/conda.sh
conda activate $SCR/env2
cd $RF

# Executable
DT1=asteroid-train-66k.arff
DT2=asteroid-test-34k.arff
EXEC="python rfps6.py datasets/"$DT1" datasets/"$DT2

# Start
echo '-- run --------------------------------'
echo '$ srun -n' $SLURM_NTASKS $EXEC
echo '-- output -----------------------------'
srun -n $SLURM_NTASKS $EXEC
echo '~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

Overwriting rfps6.srm


## Check

In [6]:
! sbatch --partition cpu_dev --time=00:05:00 --ntasks=1 rfps6.srm

Submitted batch job 1348476


In [7]:
! squeue --name=rfps6 --partition=cpu_dev --format="%.8i  %.9P %.5j %.2t %.5M %.5D %.4C"

   JOBID  PARTITION  NAME ST  TIME NODES CPUS
 1348476    cpu_dev rfps6 PD  0:00     1    1


In [9]:
! squeue --name=rfps6 --partition=cpu_dev --format="%.8i  %.9P %.5j %.2t %.5M %.5D %.4C"

   JOBID  PARTITION  NAME ST  TIME NODES CPUS


In [10]:
! cat /scratch${PWD#"/prj"}/slurm-1348476.out

- Job ID: 1348476
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- List of nodes allocated to the job: sdumont1244
-- modules ----------------------------
-- run --------------------------------
$ srun -n 1 python rfps6.py datasets/asteroid-train-66k.arff datasets/asteroid-test-34k.arff
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 23.4568 s
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## Run

In [15]:
%%bash
sbatch rfps6.srm
sbatch rfps6.srm
sbatch rfps6.srm

Submitted batch job 1348480
Submitted batch job 1348481
Submitted batch job 1348482


In [2]:
%%bash
cat /scratch${PWD#"/prj"}/slurm-1348480.out
cat /scratch${PWD#"/prj"}/slurm-1348481.out
cat /scratch${PWD#"/prj"}/slurm-1348482.out

- Job ID: 1348480
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- List of nodes allocated to the job: sdumont1286
-- modules ----------------------------
-- run --------------------------------
$ srun -n 1 python rfps6.py datasets/asteroid-train-66k.arff datasets/asteroid-test-34k.arff
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification error is 0.00%  of 34000 (kappa: 0.9997)
T: 25.8629 s
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1348481
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: rf
- List of nodes allocated to the job: sdumont1286
-- modules ----------------------------
-- run --------------------------------
$ srun -n 1 python rfps6.py datasets/asteroid-train-66k.arff datasets/asteroid-test-34k.arff
-- output -----------------------------
Trainset classification error is 0.00%  of 66000 (kappa: 1.0000)
 Testset classification 

In [16]:
! squeue -u $(whoami) -h -r | wc -l

39


In [17]:
! squeue --partition=cpu_small -h -r | wc -l

295


In [18]:
! squeue --start --user=$(whoami) --name=rfps6 -o "%S  %.8i  %.9P %.5j %.2t %.5M %.5D %.4C" --sort "i"

START_TIME     JOBID  PARTITION  NAME ST  TIME NODES CPUS
N/A   1348480  cpu_small rfps6 PD  0:00     1    1
N/A   1348481  cpu_small rfps6 PD  0:00     1    1
N/A   1348482  cpu_small rfps6 PD  0:00     1    1


In [1]:
! squeue --start --user=$(whoami) --name=rfps6 -o "%S  %.8i  %.9P %.5j %.2t %.5M %.5D %.4C" --sort "i"

START_TIME     JOBID  PARTITION  NAME ST  TIME NODES CPUS


In [1]:
! sacct --jobs=1348480 --format=jobname,ncpus,nnodes,maxrss,maxrssnode%13,start,elapsed,cputime

   JobName      NCPUS   NNodes     MaxRSS    MaxRSSNode               Start    Elapsed    CPUTime 
---------- ---------- -------- ---------- ------------- ------------------- ---------- ---------- 
     rfps6         24        1                          2021-09-15T00:56:05   00:02:09   00:51:36 
     batch         24        1      9304K   sdumont1286 2021-09-15T00:56:05   00:02:09   00:51:36 
    python          1        1    233104K   sdumont1286 2021-09-15T00:56:24   00:01:50   00:01:50 


In [2]:
! scontrol show node sdumont1286

NodeName=sdumont1286 Arch=x86_64 CoresPerSocket=12
   CPUAlloc=24 CPUErr=0 CPUTot=24 CPULoad=24.05
   AvailableFeatures=(null)
   ActiveFeatures=(null)
   Gres=(null)
   NodeAddr=sdumont1286 NodeHostName=sdumont1286 Version=17.02
   OS=Linux RealMemory=64000 AllocMem=64000 FreeMem=55681 Sockets=2 Boards=1
   State=ALLOCATED ThreadsPerCore=1 TmpDisk=0 Weight=1 Owner=N/A MCS_label=N/A
   Partitions=cpu,cpu_small,cpu_dev,cpu_scal,cpu_long 
   BootTime=2021-08-12T11:50:36 SlurmdStartTime=2021-08-12T13:12:37
   CfgTRES=cpu=24,mem=62.50G
   AllocTRES=cpu=24,mem=62.50G
   CapWatts=n/a
   Socket_CapWatts=n/a
   CurrentWatts=74 LowestJoules=830 ConsumedJoules=341341133
   ExtSensorsJoules=n/s ExtSensorsWatts=0 ExtSensorsTemp=n/s
   

