# PARF BUILD F90

Parallel Random Forest (RF) Algorithm, MPI-enabled, implemented in Fortran, CLI, linkage with gnuplot is also provided

Source: https://www.irb.hr/eng/Scientific-Support-Centres/Centre-for-Informatics-and-Computing/Projects2/IT-projects/PARF

Last revision: 2021-05-17

Compilind and running on SDumont, using Intel Fortran, and with the `module load intel_psxe/2020` preloaded

In [6]:
%%bash
ifort --version
mpiifort --version
mpirun --version
icc --version

ifort (IFORT) 19.1.2.254 20200623
Copyright (C) 1985-2020 Intel Corporation.  All rights reserved.

ifort (IFORT) 19.1.2.254 20200623
Copyright (C) 1985-2020 Intel Corporation.  All rights reserved.

Intel(R) MPI Library for Linux* OS, Version 2019 Update 8 Build 20200624 (id: 4f16ad915)
Copyright 2003-2020, Intel Corporation.
icc (ICC) 19.1.2.254 20200623
Copyright (C) 1985-2020 Intel Corporation.  All rights reserved.



Get PARF

In [1]:
! wget https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/parf/parf_2008-09-30.tgz

--2021-05-10 21:40:33--  https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/parf/parf_2008-09-30.tgz
Resolving storage.googleapis.com (storage.googleapis.com)... 172.217.30.16, 142.250.79.48, 142.250.79.240, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.217.30.16|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 49983 (49K) [application/octet-stream]
Saving to: ‘parf_2008-09-30.tgz’


2021-05-10 21:40:33 (3.17 MB/s) - ‘parf_2008-09-30.tgz’ saved [49983/49983]



Unpacking

In [2]:
! tar zxvf parf_2008-09-30.tgz

parf/Makefile
parf/bitvectors.f90
parf/bootstraps.f90
parf/forests.f90
parf/graphics.f90
parf/importances.f90
parf/instancesets.f90
parf/main.f90
parf/options.f90
parf/prototypes.f90
parf/trees.f90
parf/utilities.f90
parf/support.c
parf/farg/
parf/farg/farg.f
parf/parallel/
parf/parallel/none.f90
parf/parallel/mpi.f90
parf/merge.pl
parf/splitrows.pl
parf/splitset.pl
parf/LICENSE


# Adds time measurement

In [4]:
%%writefile parf/main.f90
PROGRAM random_forest
  USE options
  USE instancesets
  USE bootstraps
  USE utilities
  USE forests
  USE prototypes
  USE parallel
  IMPLICIT NONE

  TYPE (datadescription), POINTER :: datadesc
  TYPE (forest), POINTER :: rfptr
  INTEGER :: fill_pass
  LOGICAL :: last_pass

  !=[ added code ]------------------------
  real :: t0, t1
  call cpu_time(t0)  ! time measurement
  !---------------------------------------

! MPI ...
  CALL par_init()

  IF (parse_options()) THEN
    CALL init_graphics()
    NULLIFY (testset, trainset, protoset, datadesc, rfptr)
    trainset => new_instanceset(trainset_type)
    IF (LEN_TRIM(opts%load_forest).GT.0) THEN
      IF (opts%verbose) WRITE(6, af) "Loading forest"
      CALL load_forest(rfptr, datadesc)
      trainset%classes => trainset%estimated_class
      CALL fix_num_prox(UBOUND(trainset%estimated_class, 1))
      IF (LEN_TRIM(opts%testset).GT.0) THEN
        IF (opts%verbose) WRITE(6, af) "Loading test set"
        testset => new_instanceset(testset_type)
        testset%dd => datadesc
        IF (.NOT.parse_arff(testset, opts%testset)) GO TO 9999

        IF (opts%verbose) WRITE(6, af) "Classifying testing set"
        CALL classify_instanceset(testset, rfptr)
        testset%classes => testset%estimated_class
      END IF

      IF (opts%last_prox_required) THEN
        IF (opts%verbose) WRITE(6, af) "Calculating proximities"
        CALL calculate_proximities(rfptr, trainset)
        IF (opts%calc_test_prox) THEN
          CALL calculate_proximities(rfptr, testset)
        END IF
      END IF
    ELSE
      IF (opts%verbose .AND. par_processes.EQ.1) THEN
        WRITE(6, af) "Loading training set"
      ELSE IF (opts%verbose.AND.par_front) THEN
        WRITE(6, af) "Loading and distributing training set"
      END IF
      IF (.NOT.parse_arff(trainset, opts%trainset)) GO TO 9999
      CALL fix_num_prox(UBOUND(trainset%catvars, 1))
      datadesc => trainset%dd
      trainset%classes => trainset%catvars(:, &
        & datadesc%attributes(opts%class_attribute_num)%mapping)
      IF (LEN_TRIM(opts%testset).GT.0) THEN
        IF (opts%verbose) WRITE(6, af) "Loading test set"
        testset => new_instanceset(testset_type)
        testset%dd => datadesc
        IF (.NOT.parse_arff(testset, opts%testset)) GO TO 9999
      END IF
      IF (opts%verbose) THEN
        WRITE(6, "(A26, I6)") "Number of training cases: ", &
          & UBOUND(trainset%catvars, 1)
        WRITE(6, "(A26, I6)") "Number of attributes:     ", &
          & UBOUND(datadesc%attributes, 1)
      END IF

      ! Prelude
      IF (opts%verbose) WRITE(6, af) "Counting classes"
      CALL count_classes(trainset)
      IF (opts%fill_passes.NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Calculating rough fills"
        CALL calculate_rough_fills(trainset)
        IF (opts%verbose) WRITE(6, af) "Filling missing values"
        CALL fill_missing_rough(trainset)
      END IF
      CALL allocate_importance_arrays(trainset)
      CALL init_bootstraps(trainset)

      ! Variations
      DO
        CALL get_num_split_variables(datadesc)
        IF (opts%verbose) THEN
          WRITE(6, "(A26, I6)") "Number of used attributes:", &
            & UBOUND(datadesc%usedvars, 1)
          WRITE(6, "(A26, I6)") "Attributes to split on:   ", &
            & opts%split_variables
        END IF
        CALL zero_importance_arrays()
        fill_pass = 1
        DO WHILE (fill_pass.LE.MAX(1, opts%fill_passes)) ! at least 1 pass
          ! this is a while loop and not a for loop,
          ! to allow early exit in case proximities can't be calculated

          IF (opts%verbose.AND.opts%fill_passes.GT.1) &
            & WRITE(6, "(A6, I2)") "Pass #", fill_pass
          IF (opts%verbose) WRITE(6, af) "Sorting and ranking"
          CALL sort_and_rank(trainset, fill_pass.GT.1)

          IF (opts%verbose) WRITE(6, af) "Growing forest"
          rfptr => new_forest(trainset)

          last_pass = fill_pass.GE.opts%fill_passes &
            & .AND.opts%redo_with_important_vars.EQ.0 &
            & .AND.opts%redo_with_significant_vars.EQ.0

          IF (last_pass) THEN
            CALL calc_training_error(trainset)
            IF (LEN_TRIM(opts%testset).NE.0) THEN
              IF (opts%verbose) WRITE(6, af) "Classifying testing set"
              CALL classify_instanceset(testset, rfptr)
              testset%classes => testset%estimated_class
            END IF
          END IF

          IF (fill_pass.LT.opts%fill_passes.OR.opts%last_prox_required) THEN
            IF (opts%verbose) WRITE(6, af) "Calculating proximities"
            CALL calculate_proximities(rfptr, trainset)
            IF (opts%calc_test_prox.AND.last_pass) THEN
              ! test set proximities only on the very last pass
              CALL calculate_proximities(rfptr, testset)
            END IF
          END IF

          IF (fill_pass.NE.MAX(1, opts%fill_passes)) THEN ! each pass but last
            IF (opts%verbose) WRITE(6, af) "Filling missing values"
            CALL fill_missing_by_prox(trainset)
            CALL free_forest(rfptr)
          END IF

          fill_pass = fill_pass + 1
        END DO

        ! redo with most important variables?
        CALL finalize_importance_arrays(trainset)
        IF (opts%redo_with_important_vars.NE.0) THEN
          opts%redo_with_important_vars = 0 ! redo just once
        ELSE
          EXIT
        END IF
      END DO

      IF (LEN_TRIM(opts%save_forest).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Saving forest"
        CALL save_forest(rfptr)
      END IF
    END IF

    ! Finale

    IF (opts%num_prot.NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Calculating prototypes"
      CALL calculate_prototypes()
    END IF
    IF (opts%num_scale.NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Calculating scaling coordinates"
      IF (LEN_TRIM(opts%proto_scaling).NE.0) THEN
        CALL classify_instanceset(protoset, rfptr)
        CALL calculate_proximities(rfptr, protoset)
      END IF
      CALL calc_scaling()
    END IF

    IF (par_front) THEN
      IF (LEN_TRIM(opts%train_votes).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing training set votes"
        CALL print_votes(trainset, opts%train_votes)
      END IF
      IF (LEN_TRIM(opts%train_confusion).NE.0 &
          & .OR.LEN_TRIM(opts%positive_category).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Processing training set confusion matrix"
        CALL process_confusion_matrix(trainset, opts%train_confusion)
      END IF
      IF (LEN_TRIM(opts%fast_importances).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing fast variable importances"
        CALL print_fast_importances(rfptr%dgini, datadesc)
      END IF
      IF (LEN_TRIM(opts%importances).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing variable importances"
        CALL print_importances(datadesc)
      END IF
      IF (LEN_TRIM(opts%case_importances).NE.0) THEN
        IF (opts%verbose) &
          & WRITE(6, af) "Printing case-by-case variable importances"
        CALL print_case_importances(trainset)
      END IF
      IF (LEN_TRIM(opts%interaction).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing variable interaction"
        CALL print_interaction(rfptr, datadesc)
      END IF
      IF (LEN_TRIM(opts%prototype_analysis).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing prototype analysis"
        CALL print_prototype_analysis()
      END IF
      IF (LEN_TRIM(opts%prototypes).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing prototypes"
        CALL print_arff(opts%prototypes, protoset)
      END IF
      IF (LEN_TRIM(opts%train_outliers).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing training set outliers"
        CALL print_outliers(trainset)
      END IF
      IF (LEN_TRIM(opts%test_outliers).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing test set outliers"
        CALL print_outliers(testset)
      END IF
      IF (LEN_TRIM(opts%test_votes).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing test set votes"
        CALL print_votes(testset, opts%test_votes)
      END IF
      IF (LEN_TRIM(opts%test_arff).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing test set ARFF"
        CALL print_arff(opts%test_arff, testset)
      END IF
      IF (LEN_TRIM(opts%train_test_arff).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing train+test set ARFF"
        CALL print_arff(opts%train_test_arff, trainset, testset)
      END IF
      IF (LEN_TRIM(opts%test_confusion).NE.0 &
          & .OR.(LEN_TRIM(opts%positive_category).NE.0 &
          & .AND.LEN_TRIM(opts%testset).NE.0)) THEN
        IF (opts%verbose) WRITE(6, af) "Processing test set confusion matrix"
        CALL process_confusion_matrix(testset, opts%test_confusion)
      END IF
    END IF
    IF (LEN_TRIM(opts%train_scaling).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing training set scaling coordinates"
      CALL print_scaling(trainset)
    END IF
    IF (LEN_TRIM(opts%test_scaling).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing test set scaling coordinates"
      CALL print_scaling(testset)
    END IF
    IF (LEN_TRIM(opts%proto_scaling).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing prototype scaling coordinates"
      CALL print_scaling(protoset)
    END IF
    IF (LEN_TRIM(opts%dump_forest).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing forest"
      CALL print_forest(rfptr, datadesc)
    END IF

    9999 CONTINUE
    IF (par_front) CALL free_prototypes(datadesc)
    CALL free_importance_arrays()
    CALL free_forest(rfptr)
    CALL free_instanceset(trainset)
    CALL free_instanceset(testset)
    CALL free_datadescription(datadesc)
    CALL finish_bootstraps()
    CALL finish_graphics()
  END IF

! MPI ...
  IF (opts%verbose.AND.par_processes.GT.1) WRITE(6, af) "Finalizing"
  CALL par_finalize()
  IF (opts%verbose.AND.par_processes.GT.1) WRITE(6, af) "Finished"

  !=[ added code ]------------------------
  call cpu_time(t1)  ! time measurement
  if (par_rank == 0) then       
    write(6, "('T: 'sf0.4'  |  N: 'g0)" ) t1-t0, par_processes
  endif
  !---------------------------------------

END PROGRAM random_forest

Overwriting parf/main.f90


## Configure the Makefile file

In [8]:
%%writefile parf/Makefile
##### Configuration section

### Choose a Fortran 90 compiler and options
# FC = /opt/intel_fc_80/bin/ifort
# FFLAGS = -g -pg -CB -traceback --static
FC = ifort
FFLAGS = -O3

### Choose a C compiler and options
# CC = cc
# CFLAGS = -Wall -g -pg --static
CC = icc
CFLAGS = -O3

### Choose parallelisation library, comment for no parallelisation
PAR = mpi

### For MPI: the MPI Fortran compilation command
# MPIFC = mpif90
MPIFC = mpiifort

##### End of configuration section
# 
# No changes should be necessary below this line
# ---------------------------------------------------------------------

PAR ?= none
ifeq (${PAR},mpi)
	FC = ${MPIFC}
endif
MODSOURCES=trees.f90 bitvectors.f90 instancesets.f90 options.f90 \
	utilities.f90 bootstraps.f90 forests.f90 importances.f90 \
	prototypes.f90 graphics.f90
CSOURCES=support.c
COBJECTS=${CSOURCES:.c=.o}
MODOBJECTS=${MODSOURCES:.f90=.o}
ADDOBJECTS=${ADDSOURCES:.f=.o}
PROJECT=parf
DIR=$(notdir ${PWD})

${PROJECT}: main.f90 parallel.o ${MODOBJECTS} ${ADDOBJECTS} ${COBJECTS}
	${FC} ${FFLAGS} -o ${PROJECT} $+

parallel.o: parallel/${PAR}.f90
	${FC} ${FFLAGS} -c -o parallel.o $<

%.o: %.f90
	${FC} ${FFLAGS} -c $<

%.o: %.c
	${CC} ${CFLAGS} -c $<

main.o: Makefile options.o instancesets.o utilities.o forests.o \
	importances.o prototypes.o parallel.o
forests.o: Makefile trees.o instancesets.o bootstraps.o bitvectors.o \
	importances.o prototypes.o
trees.o: Makefile bitvectors.o instancesets.o bootstraps.o utilities.o
instancesets.o: Makefile utilities.o bitvectors.o \
	options.o parallel.o support.o
importances.o: Makefile instancesets.o graphics.o
bitvectors.o: Makefile utilities.o
utilities.o: Makefile support.o
options.o: Makefile support.o utilities.o parallel.o
#compatibility.o: Makefile
parallel.o: Makefile
bootstraps.o: Makefile instancesets.o utilities.o
prototypes.o: Makefile instancesets.o utilities.o options.o
graphics.o: Makefile utilities.o options.o
support.o: Makefile

clean:
	rm -f *.mod *.o ${PROJECT} gmon.out

#dist:
#	rm -f ${PROJECT}.tgz
#	cd .. && \
#		tar zcf ${DIR}/${PROJECT}.tgz ${DIR}/Makefile \
#		${DIR}/*.f90 ${DIR}/*.c ${DIR}/farg ${DIR}/parallel \
#		${DIR}/*.pl ${DIR}/LICENSE

.PHONY: clean dist

Overwriting parf/Makefile


Compiling the serial version (setting the option in the Makefile file)

In [6]:
%%bash
cd parf
make
mv parf parf-s

ifort -O3 -c -o parallel.o parallel/none.f90
icc -O3 -c support.c
ifort -O3 -c utilities.f90
ifort -O3 -c bitvectors.f90
ifort -O3 -c options.f90
ifort -O3 -c instancesets.f90
ifort -O3 -c bootstraps.f90
ifort -O3 -c trees.f90
ifort -O3 -c graphics.f90
ifort -O3 -c importances.f90
ifort -O3 -c prototypes.f90
ifort -O3 -c forests.f90
ifort -O3 -o parf main.f90 parallel.o trees.o bitvectors.o instancesets.o options.o utilities.o bootstraps.o forests.o importances.o prototypes.o graphics.o support.o


forests.f90(994): remark #8291: Recommended relationship between field width 'W' and the number of fractional digits 'D' in this edit descriptor is 'W>=D+7'.
      WRITE(handle, '(E10.4, 1X, E10.4, 1X, A30)'), med, dev, &
-----------------------^
forests.f90(994): remark #8291: Recommended relationship between field width 'W' and the number of fractional digits 'D' in this edit descriptor is 'W>=D+7'.
      WRITE(handle, '(E10.4, 1X, E10.4, 1X, A30)'), med, dev, &
----------------------------------^


In [7]:
! ls parf/parf-s

parf/parf-s


Compiling the MPI version (setting the option in the Makefile file)

In [9]:
%%bash
cd parf
make

mpiifort -O3 -c -o parallel.o parallel/mpi.f90
icc -O3 -c support.c
mpiifort -O3 -c utilities.f90
mpiifort -O3 -c bitvectors.f90
mpiifort -O3 -c options.f90
mpiifort -O3 -c instancesets.f90
mpiifort -O3 -c bootstraps.f90
mpiifort -O3 -c trees.f90
mpiifort -O3 -c graphics.f90
mpiifort -O3 -c importances.f90
mpiifort -O3 -c prototypes.f90
mpiifort -O3 -c forests.f90
mpiifort -O3 -o parf main.f90 parallel.o trees.o bitvectors.o instancesets.o options.o utilities.o bootstraps.o forests.o importances.o prototypes.o graphics.o support.o


forests.f90(994): remark #8291: Recommended relationship between field width 'W' and the number of fractional digits 'D' in this edit descriptor is 'W>=D+7'.
      WRITE(handle, '(E10.4, 1X, E10.4, 1X, A30)'), med, dev, &
-----------------------^
forests.f90(994): remark #8291: Recommended relationship between field width 'W' and the number of fractional digits 'D' in this edit descriptor is 'W>=D+7'.
      WRITE(handle, '(E10.4, 1X, E10.4, 1X, A30)'), med, dev, &
----------------------------------^


In [10]:
! ls parf/parf

parf/parf


In [11]:
! ldd parf/parf

	linux-vdso.so.1 =>  (0x00007ffd439ab000)
	libmpifort.so.12 => /opt/intel/parallel_studio_xe_2020/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/libmpifort.so.12 (0x00007f191437f000)
	libmpi.so.12 => /opt/intel/parallel_studio_xe_2020/compilers_and_libraries_2020.2.254/linux/mpi/intel64/lib/release/libmpi.so.12 (0x00007f1913163000)
	libdl.so.2 => /usr/lib64/libdl.so.2 (0x00007f1912f5f000)
	librt.so.1 => /usr/lib64/librt.so.1 (0x00007f1912d57000)
	libpthread.so.0 => /usr/lib64/libpthread.so.0 (0x00007f1912b3b000)
	libm.so.6 => /usr/lib64/libm.so.6 (0x00007f1912839000)
	libc.so.6 => /usr/lib64/libc.so.6 (0x00007f191246c000)
	libgcc_s.so.1 => /usr/lib64/libgcc_s.so.1 (0x00007f1912256000)
	libfabric.so.1 => /opt/intel/parallel_studio_xe_2020/compilers_and_libraries_2020.2.254/linux/mpi/intel64/libfabric/lib/libfabric.so.1 (0x00007f1912014000)
	/lib64/ld-linux-x86-64.so.2 (0x00007f191473e000)


Check run

In [12]:
! parf/parf

PARF (C) 2005 Rudjer Boskovic Institute
Goran Topic, Tomislav Smuc; algorithm by Leo Breiman and Adele Cutler
Licensed under GNU GPL 2.0
 
Usage: rf [OPTION...]
-h | --help   show this message
-t file       file to use as training set
-a file       file to analyse and classify
-tv [file]    training set votes output file
-tc [file]    training set confusion matrix output file
-av [file]    test set votes output file
-ac [file]    test set confusion matrix output file
-ar [file]    test set classification results output file
-aa [file]    test set ARFF output file
-ta [file]    train + test set ARFF output file
-c class      the class attribute, or NEW, or LAST (default)
-cq [n[%]]    quantity of generated class instances (only with -c NEW)
-cp category  positive category
-n trees      the number of trees to grow
-f n          the fill method: 0=none, 1=rough, 2+=# of passes
-v n          redo the forest with n most important variables
-vs n         redo the forest with variables more s

Get example dataset

In [14]:
%%bash
mkdir datasets
cd datasets
wget https://raw.githubusercontent.com/efurlanm/parf/master/datasets/glass.arff

--2021-05-10 21:56:23--  https://raw.githubusercontent.com/efurlanm/parf/master/datasets/glass.arff
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17850 (17K) [text/plain]
Saving to: ‘glass.arff’

     0K .......... .......                                    100% 66.5M=0s

2021-05-10 21:56:23 (66.5 MB/s) - ‘glass.arff’ saved [17850/17850]



In [17]:
! ls datasets/glass.arff

datasets/glass.arff


To train a forest in an example dataset (glass.arff), where -t file is the file to use as training set:

In [16]:
%%timeit -n 1 -r 1
%%bash
parf/parf-s --verbose -t datasets/glass.arff | head -15

Seed:  -1367879756
Loading training set
Number of training cases:    214
Number of attributes:         10
Counting classes
Number of used attributes:     9
Attributes to split on:        3
Sorting and ranking
Growing forest
        Tree #     1
        Tree #     2
        Tree #     3
        Tree #     4
        Tree #     5
        Tree #     6
25.9 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


Check MPI

From: https://www.osc.edu/supercomputing/batch-processing-at-osc/slurm_migration/slurm_migration_issues
* unset I_MPI_PMI_LIBRARY 
* export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0

In [15]:
%%timeit -n 1 -r 1
%%bash
unset I_MPI_PMI_LIBRARY
export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
mpirun -np 1 parf/parf --verbose -t datasets/glass.arff | head -15

Seed:  -1406524348
Loading training set
Number of training cases:    214
Number of attributes:         10
Counting classes
Number of used attributes:     9
Attributes to split on:        3
Sorting and ranking
Growing forest
        Tree #     1
        Tree #     2
        Tree #     3
        Tree #     4
        Tree #     5
        Tree #     6
793 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [17]:
%%timeit -n 1 -r 1
%%bash
unset I_MPI_PMI_LIBRARY
export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
mpirun -np 6 parf/parf --verbose -t datasets/glass.arff | head -15

Seed:  -1274625374
Loading and distributing training set
        Tree #    50 on     3
        Tree #    67 on     4
        Tree #    84 on     5
        Tree #    17 on     1
        Tree #    33 on     2
Number of training cases:    214
Number of attributes:         10
Counting classes
Number of used attributes:     9
Attributes to split on:        3
Sorting and ranking
        Tree #    51 on     3
        Tree #    68 on     4
3.33 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [18]:
%%timeit -n 1 -r 1
%%bash
unset I_MPI_PMI_LIBRARY
export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
mpirun -np 12 parf/parf -t datasets/glass.arff

Trainset classification error is  22.43% of     214 (kappa: 0.6522 )
T: 2.2403  |  N: 12
3.04 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## Asteroid dataset

Get

In [19]:
%%bash
cd datasets
wget https://raw.githubusercontent.com/efurlanm/parf/master/datasets/asteroid-train.arff
wget https://raw.githubusercontent.com/efurlanm/parf/master/datasets/asteroid-test.arff

--2021-05-17 17:53:00--  https://raw.githubusercontent.com/efurlanm/parf/master/datasets/asteroid-train.arff
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18406905 (18M) [text/plain]
Saving to: ‘asteroid-train.arff’

     0K .......... .......... .......... .......... ..........  0% 77.2M 0s
    50K .......... .......... .......... .......... ..........  0% 87.6M 0s
   100K .......... .......... .......... .......... ..........  0% 7.31M 1s
   150K .......... .......... .......... .......... ..........  1% 76.4M 1s
   200K .......... .......... .......... .......... ..........  1% 27.8M 1s
   250K .......... .......... .......... .......... ..........  1% 24.7M 1s
   300K .......... .......... .......... .......... ..........  1% 81.0M 1s
   350K .........

Chek

In [20]:
%%timeit -n 1 -r 1
%%bash
unset I_MPI_PMI_LIBRARY
export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
mpirun -np 1 parf/parf -t datasets/asteroid-train.arff -a datasets/asteroid-test.arff

Trainset classification error is   0.07% of   66000 (kappa: 0.9888 )
 Testset classification error is   0.48% of   34000 (kappa: 0.9269 )
T: 113.4175  |  N: 1
1min 54s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [21]:
%%timeit -n 1 -r 1
%%bash
unset I_MPI_PMI_LIBRARY
export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
mpirun -np 12 parf/parf -t datasets/asteroid-train.arff -a datasets/asteroid-test.arff

Trainset classification error is   0.06% of   66000 (kappa: 0.9910 )
 Testset classification error is   0.56% of   34000 (kappa: 0.9149 )
T: 21.8287  |  N: 12
22.6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [2]:
%%timeit -n 1 -r 1
%%bash
unset I_MPI_PMI_LIBRARY
export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
mpirun -np 24 parf/parf -t datasets/asteroid-train.arff -a datasets/asteroid-test.arff

Trainset classification error is   0.05% of   66000 (kappa: 0.9922 )
 Testset classification error is   0.44% of   34000 (kappa: 0.9332 )
T: 15.0425  |  N: 24
16.6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [1]:
%%timeit -n 1 -r 1
%%bash
unset I_MPI_PMI_LIBRARY
export I_MPI_JOB_RESPECT_PROCESS_PLACEMENT=0
mpirun -np 12 parf/parf -t datasets/glass.arff

Trainset classification error is  20.56% of     214 (kappa: 0.6812 )
T: 2.2061  |  N: 12
3.04 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
