# PARF F2PY BUILD

Compiling PARF sources using Intel Fortran Compiler, on the SDumont.

Preloaded:

    module load intel_psxe
    source /opt/intel/parallel_studio_xe_2020/intelpython3/etc/profile.d/conda.sh
    conda activate --stack /scratch/xxxx/yyyy/env4
    unset I_MPI_PMI_LIBRARY
    export I_MPI_VAR_CHECK_SPELLING=0
    jupyter-lab --no-browser --port=9000 --ip=0.0.0.0
    
References: 

- https://community.intel.com/t5/Intel-Distribution-for-Python/How-to-install-use-the-Intel-distribution-of-Python-from-my/m-p/1138999
- https://community.intel.com/t5/Intel-oneAPI-HPC-Toolkit/mpiexec-hydra-legacy-suppress-variable-is-not-supported/td-p/1162351

In [1]:
! python --version

Python 3.7.11 :: Intel Corporation


In [12]:
! mpiexec --version

Intel(R) MPI Library for Linux* OS, Version 2021.3 Build 20210601 (id: 6f90181f1)
Copyright 2003-2021, Intel Corporation.


In [2]:
! ls parf003/main.f90

parf003/main.f90


In [7]:
%%writefile parf003/main.f90
!-[ changed ]----------------------------
! PROGRAM random_forest
SUBROUTINE random_forest(p_trainset, p_testset, &
                p_error_count, p_oob_count, p_kappa_value, &
                p_instance_count, p_error, p_testset_kappa_value, &
                p_time, p_rank, p_size)
!---------------------------------------


  USE options
  USE instancesets
  USE bootstraps
  USE utilities
  USE forests
  USE prototypes
  USE parallel
  IMPLICIT NONE

  TYPE (datadescription), POINTER :: datadesc
  TYPE (forest), POINTER :: rfptr
  INTEGER :: fill_pass
  LOGICAL :: last_pass

  
!-[ changed ]----------------------------

! files
character(len=256), intent(in) :: p_trainset, p_testset

! Trainset
integer, intent(out) :: p_error_count, p_oob_count
real,    intent(out) :: p_kappa_value

! Testset
integer, intent(out) :: p_instance_count
real,    intent(out) :: p_error, p_testset_kappa_value

! Proc, time
integer, intent(out) :: p_rank, p_size
real,    intent(out) :: p_time

real :: t0, t1

p_error_count = 0
p_oob_count = 0
p_instance_count = 0
p_kappa_value = 0
p_error = 0 
p_testset_kappa_value = 0

call cpu_time(t0)  ! time measurement
!----------------------------------------


  ! MPI ...
  CALL par_init()

  IF (parse_options(p_trainset, p_testset)) THEN
    CALL init_graphics()
    NULLIFY (testset, trainset, protoset, datadesc, rfptr)
    trainset => new_instanceset(trainset_type)
    IF (LEN_TRIM(opts%load_forest).GT.0) THEN
      IF (opts%verbose) WRITE(6, af) "Loading forest"
      CALL load_forest(rfptr, datadesc)
      trainset%classes => trainset%estimated_class
      CALL fix_num_prox(UBOUND(trainset%estimated_class, 1))
      IF (LEN_TRIM(opts%testset).GT.0) THEN
        IF (opts%verbose) WRITE(6, af) "Loading test set"
        testset => new_instanceset(testset_type)
        testset%dd => datadesc
        IF (.NOT.parse_arff(testset, opts%testset)) GO TO 9999
        IF (opts%verbose) WRITE(6, af) "Classifying testing set"

        
!-[ changed ]----------------------------
        CALL classify_instanceset(testset, rfptr, p_error, &
                    p_instance_count, p_testset_kappa_value)
!---------------------------------------


        testset%classes => testset%estimated_class
      END IF
      IF (opts%last_prox_required) THEN
        IF (opts%verbose) WRITE(6, af) "Calculating proximities"
        CALL calculate_proximities(rfptr, trainset)
        IF (opts%calc_test_prox) THEN
          CALL calculate_proximities(rfptr, testset)
        END IF
      END IF
    ELSE
      IF (opts%verbose .AND. par_processes.EQ.1) THEN
        WRITE(6, af) "Loading training set"
      ELSE IF (opts%verbose.AND.par_front) THEN
        WRITE(6, af) "Loading and distributing training set"
      END IF

      IF (.NOT.parse_arff(trainset, opts%trainset)) GO TO 9999

      CALL fix_num_prox(UBOUND(trainset%catvars, 1))
      datadesc => trainset%dd
      trainset%classes => trainset%catvars(:, &
        & datadesc%attributes(opts%class_attribute_num)%mapping)
      IF (LEN_TRIM(opts%testset).GT.0) THEN
        IF (opts%verbose) WRITE(6, af) "Loading test set"
        testset => new_instanceset(testset_type)
        testset%dd => datadesc
        IF (.NOT.parse_arff(testset, opts%testset)) GO TO 9999
      END IF
      IF (opts%verbose) THEN
        WRITE(6, "(A26, I6)") "Number of training cases: ", &
          & UBOUND(trainset%catvars, 1)
        WRITE(6, "(A26, I6)") "Number of attributes:     ", &
          & UBOUND(datadesc%attributes, 1)
      END IF

      ! Prelude
      IF (opts%verbose) WRITE(6, af) "Counting classes"
      CALL count_classes(trainset)
      IF (opts%fill_passes.NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Calculating rough fills"
        CALL calculate_rough_fills(trainset)
        IF (opts%verbose) WRITE(6, af) "Filling missing values"
        CALL fill_missing_rough(trainset)
      END IF
      CALL allocate_importance_arrays(trainset)
      CALL init_bootstraps(trainset)

      ! Variations
      DO
        CALL get_num_split_variables(datadesc)
        IF (opts%verbose) THEN
          WRITE(6, "(A26, I6)") "Number of used attributes:", &
            & UBOUND(datadesc%usedvars, 1)
          WRITE(6, "(A26, I6)") "Attributes to split on:   ", &
            & opts%split_variables
        END IF
        CALL zero_importance_arrays()
        fill_pass = 1
        DO WHILE (fill_pass.LE.MAX(1, opts%fill_passes)) ! at least 1 pass
          ! this is a while loop and not a for loop,
          ! to allow early exit in case proximities can't be calculated

          IF (opts%verbose.AND.opts%fill_passes.GT.1) &
            & WRITE(6, "(A6, I2)") "Pass #", fill_pass
          IF (opts%verbose) WRITE(6, af) "Sorting and ranking"
          CALL sort_and_rank(trainset, fill_pass.GT.1)

          IF (opts%verbose) WRITE(6, af) "Growing forest"
          rfptr => new_forest(trainset)

          last_pass = fill_pass.GE.opts%fill_passes &
            & .AND.opts%redo_with_important_vars.EQ.0 &
            & .AND.opts%redo_with_significant_vars.EQ.0

          IF (last_pass) THEN

          
!-[ changed ]----------------------------
            CALL calc_training_error(trainset, p_error_count, &
                        p_oob_count, p_kappa_value)
!----------------------------------------


            IF (LEN_TRIM(opts%testset).NE.0) THEN
              IF (opts%verbose) WRITE(6, af) "Classifying testing set"

              
!-[ changed ]----------------------------
              CALL classify_instanceset(testset, rfptr, p_error, &
                        p_instance_count, p_testset_kappa_value)
!---------------------------------------


              testset%classes => testset%estimated_class
            END IF
          END IF
          IF (fill_pass.LT.opts%fill_passes.OR.opts%last_prox_required) THEN
            IF (opts%verbose) WRITE(6, af) "Calculating proximities"
            CALL calculate_proximities(rfptr, trainset)
            IF (opts%calc_test_prox.AND.last_pass) THEN
              ! test set proximities only on the very last pass
              CALL calculate_proximities(rfptr, testset)
            END IF
          END IF

          IF (fill_pass.NE.MAX(1, opts%fill_passes)) THEN ! each pass but last
            IF (opts%verbose) WRITE(6, af) "Filling missing values"
            CALL fill_missing_by_prox(trainset)
            CALL free_forest(rfptr)
          END IF

          fill_pass = fill_pass + 1
        END DO

        ! redo with most important variables?
        CALL finalize_importance_arrays(trainset)
        IF (opts%redo_with_important_vars.NE.0) THEN
          opts%redo_with_important_vars = 0 ! redo just once
        ELSE
          EXIT
        END IF
      END DO

      IF (LEN_TRIM(opts%save_forest).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Saving forest"
        CALL save_forest(rfptr)
      END IF
    END IF

    ! Finale

    IF (opts%num_prot.NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Calculating prototypes"
      CALL calculate_prototypes()
    END IF
    IF (opts%num_scale.NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Calculating scaling coordinates"
      IF (LEN_TRIM(opts%proto_scaling).NE.0) THEN

      
!-[ changed ]----------------------------
        CALL classify_instanceset(protoset, rfptr, p_error, &
                    p_instance_count, p_testset_kappa_value)
!---------------------------------------


        CALL calculate_proximities(rfptr, protoset)
      END IF
      CALL calc_scaling()
    END IF

    IF (par_front) THEN
      IF (LEN_TRIM(opts%train_votes).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing training set votes"
        CALL print_votes(trainset, opts%train_votes)
      END IF
      IF (LEN_TRIM(opts%train_confusion).NE.0 &
          & .OR.LEN_TRIM(opts%positive_category).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Processing training set confusion matrix"
        CALL process_confusion_matrix(trainset, opts%train_confusion)
      END IF
      IF (LEN_TRIM(opts%fast_importances).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing fast variable importances"
        CALL print_fast_importances(rfptr%dgini, datadesc)
      END IF
      IF (LEN_TRIM(opts%importances).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing variable importances"
        CALL print_importances(datadesc)
      END IF
      IF (LEN_TRIM(opts%case_importances).NE.0) THEN
        IF (opts%verbose) &
          & WRITE(6, af) "Printing case-by-case variable importances"
        CALL print_case_importances(trainset)
      END IF
      IF (LEN_TRIM(opts%interaction).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing variable interaction"
        CALL print_interaction(rfptr, datadesc)
      END IF
      IF (LEN_TRIM(opts%prototype_analysis).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing prototype analysis"
        CALL print_prototype_analysis()
      END IF
      IF (LEN_TRIM(opts%prototypes).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing prototypes"
        CALL print_arff(opts%prototypes, protoset)
      END IF
      IF (LEN_TRIM(opts%train_outliers).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing training set outliers"
        CALL print_outliers(trainset)
      END IF
      IF (LEN_TRIM(opts%test_outliers).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing test set outliers"
        CALL print_outliers(testset)
      END IF
      IF (LEN_TRIM(opts%test_votes).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing test set votes"
        CALL print_votes(testset, opts%test_votes)
      END IF
      IF (LEN_TRIM(opts%test_arff).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing test set ARFF"
        CALL print_arff(opts%test_arff, testset)
      END IF
      IF (LEN_TRIM(opts%train_test_arff).NE.0) THEN
        IF (opts%verbose) WRITE(6, af) "Printing train+test set ARFF"
        CALL print_arff(opts%train_test_arff, trainset, testset)
      END IF
      IF (LEN_TRIM(opts%test_confusion).NE.0 &
          & .OR.(LEN_TRIM(opts%positive_category).NE.0 &
          & .AND.LEN_TRIM(opts%testset).NE.0)) THEN
        IF (opts%verbose) WRITE(6, af) "Processing test set confusion matrix"
        CALL process_confusion_matrix(testset, opts%test_confusion)
      END IF
    END IF
    IF (LEN_TRIM(opts%train_scaling).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing training set scaling coordinates"
      CALL print_scaling(trainset)
    END IF
    IF (LEN_TRIM(opts%test_scaling).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing test set scaling coordinates"
      CALL print_scaling(testset)
    END IF
    IF (LEN_TRIM(opts%proto_scaling).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing prototype scaling coordinates"
      CALL print_scaling(protoset)
    END IF
    IF (LEN_TRIM(opts%dump_forest).NE.0) THEN
      IF (opts%verbose) WRITE(6, af) "Printing forest"
      CALL print_forest(rfptr, datadesc)
    END IF

    9999 CONTINUE
    IF (par_front) CALL free_prototypes(datadesc)
    CALL free_importance_arrays()
    CALL free_forest(rfptr)
    CALL free_instanceset(trainset)
    CALL free_instanceset(testset)
    CALL free_datadescription(datadesc)
    CALL finish_bootstraps()
    CALL finish_graphics()
  END IF

  ! MPI ...
  IF (opts%verbose.AND.par_processes.GT.1) WRITE(6, af) "Finalizing"
  CALL par_finalize()
  IF (opts%verbose.AND.par_processes.GT.1) WRITE(6, af) "Finished"

  
!-[ changed ]----------------------------
call cpu_time(t1)  ! time measurement
p_size = par_processes
p_rank = par_rank
p_time = t1 - t0
!---------------------------------------


!-[ changed ]----------------------------
! END PROGRAM random_forest
END SUBROUTINE
!---------------------------------------

Overwriting parf003/main.f90


- files changed to transform from command line to subroutines with parameter passing: main.f90, options.f90, forests.f90

In [5]:
%%writefile parf003/Makefile
##### Configuration section

### Choose a Fortran 90 compiler and options
## Intel Fortran 8.0 (PARF original compiler)
##    -g          level of debugging information in the object file
##    -pg         compiles and links for function profiling with gprof
##    -CB         same as the /check:bounds option
##    -traceback  extra information in the object file to provide
##        source file traceback information when a severe error occurs
##    --static    prevents linking with shared libraries
# FC = /opt/intel_fc_80/bin/ifort
# FFLAGS = -g -pg -CB -traceback --static
## GNU
# FC = gfortran
# FFLAGS = -O3
# debug: FFLAGS = -O0 -g -traceback -check all -CB 
## Intel 2021.2.0
FC = ifort
FFLAGS = -fPIC -diag-disable=8291 -O3

### Choose a C compiler and options
## GNU
# CC = cc
# CFLAGS = -Wall -g -pg --static
## Intel
CC = icc
CFLAGS = -fPIC -O3

### Choose parallelisation library, comment for no parallelisation
PAR = mpi

### For MPI: the MPI Fortran compilation command
## GNU + OpenMPI
# MPIFC = mpif90
## Intel MPI
MPIFC = mpiifort

##### End of configuration section
# 
# No changes should be necessary below this line
#---------------------------------------


PAR ?= none
ifeq (${PAR},mpi)
	FC = ${MPIFC}
endif
MODSOURCES=trees.f90 bitvectors.f90 instancesets.f90 options.f90 \
	utilities.f90 bootstraps.f90 forests.f90 importances.f90 \
	prototypes.f90 graphics.f90
CSOURCES=support.c
COBJECTS=${CSOURCES:.c=.o}
MODOBJECTS=${MODSOURCES:.f90=.o}
ADDOBJECTS=${ADDSOURCES:.f=.o}
PROJECT=parf
DIR=$(notdir ${PWD})

${PROJECT}: main.f90 parallel.o ${MODOBJECTS} ${ADDOBJECTS} ${COBJECTS}
	${FC} ${FFLAGS} -o ${PROJECT} $+

parallel.o: parallel/${PAR}.f90
	${FC} ${FFLAGS} -c -o parallel.o $<

%.o: %.f90
	${FC} ${FFLAGS} -c $<

%.o: %.c
	${CC} ${CFLAGS} -c $<

main.o: Makefile options.o instancesets.o utilities.o forests.o \
	importances.o prototypes.o parallel.o
forests.o: Makefile trees.o instancesets.o bootstraps.o bitvectors.o \
	importances.o prototypes.o
trees.o: Makefile bitvectors.o instancesets.o bootstraps.o utilities.o
instancesets.o: Makefile utilities.o bitvectors.o \
	options.o parallel.o support.o
importances.o: Makefile instancesets.o graphics.o
bitvectors.o: Makefile utilities.o
utilities.o: Makefile support.o
options.o: Makefile support.o utilities.o parallel.o
#compatibility.o: Makefile
parallel.o: Makefile
bootstraps.o: Makefile instancesets.o utilities.o
prototypes.o: Makefile instancesets.o utilities.o options.o
graphics.o: Makefile utilities.o options.o
support.o: Makefile

clean:
	rm -f *.mod *.o

#dist:
#	rm -f ${PROJECT}.tgz
#	cd .. && \
#		tar zcf ${DIR}/${PROJECT}.tgz ${DIR}/Makefile \
#		${DIR}/*.f90 ${DIR}/*.c ${DIR}/farg ${DIR}/parallel \
#		${DIR}/*.pl ${DIR}/LICENSE

.PHONY: clean dist

Overwriting parf003/Makefile


## Serial

    # PAR = mpi  (commented)
    compilers: icc, ifort

In [11]:
%%bash
cd parf003
rm *.o *.mod
make --always-make    # create object files (.o)
f2py  -m parf003ser  --overwrite-signature  -h sgnFile.pyf  main.f90
f2py  -c sgnFile.pyf  main.f90  --f90exec=ifort  --quiet  \
-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION  \
--fcompiler=intelem  --opt='-O3 -diag-disable=8291'  \
parallel.o trees.o bitvectors.o instancesets.o options.o utilities.o  \
bootstraps.o forests.o importances.o prototypes.o graphics.o support.o

ifort -fPIC -diag-disable=8291 -O3 -c -o parallel.o parallel/none.f90
icc -fPIC -O3 -c support.c
ifort -fPIC -diag-disable=8291 -O3 -c utilities.f90
ifort -fPIC -diag-disable=8291 -O3 -c bitvectors.f90
ifort -fPIC -diag-disable=8291 -O3 -c options.f90
ifort -fPIC -diag-disable=8291 -O3 -c instancesets.f90
ifort -fPIC -diag-disable=8291 -O3 -c bootstraps.f90
ifort -fPIC -diag-disable=8291 -O3 -c trees.f90
ifort -fPIC -diag-disable=8291 -O3 -c graphics.f90
ifort -fPIC -diag-disable=8291 -O3 -c importances.f90
ifort -fPIC -diag-disable=8291 -O3 -c prototypes.f90
ifort -fPIC -diag-disable=8291 -O3 -c forests.f90
ifort -fPIC -diag-disable=8291 -O3 -o parf main.f90 parallel.o trees.o bitvectors.o instancesets.o options.o utilities.o bootstraps.o forests.o importances.o prototypes.o graphics.o support.o
Reading fortran codes...
	Reading file 'main.f90' (format:free)
Post-processing...
	Block: parf003ser
			Block: random_forest
In: :parf003ser:main.f90:random_forest
get_useparameters: no modul

/opt/intel/parallel_studio_xe_2020/compilers_and_libraries_2020.2.254/linux/compiler/lib/intel64_lin/for_main.o: In function `main':
for_main.c:(.text+0x2e): undefined reference to `MAIN__'
make: ** [parf] Erro 1


In [12]:
! ls -gG parf003/parf003ser*

-rwxr-xr-x 1 444552 Set 13 14:42 parf003/parf003ser.cpython-37m-x86_64-linux-gnu.so


In [13]:
! cp parf003/parf003ser* .

In [14]:
import parf003ser

In [15]:
help(parf003ser)

Help on module parf003ser:

NAME
    parf003ser

DESCRIPTION
    This module 'parf003ser' is auto-generated with f2py (version:1.20.3).
    Functions:
      p_error_count,p_oob_count,p_kappa_value,p_instance_count,p_error,p_testset_kappa_value,p_time,p_rank,p_size = random_forest(p_trainset,p_testset)
    .

DATA
    __f2py_numpy_version__ = '1.20.3'
    random_forest = <fortran object>

VERSION
    1.20.3

FILE
    /prj/ampemi/xxxx.xxxx/rf/parf003ser.cpython-37m-x86_64-linux-gnu.so




In [1]:
%%writefile rfns.py
import time as tm, parf003ser

t0 = tm.time()    # time measurement

resu = parf003ser.random_forest(
    "datasets/asteroid-train-66k.arff",
    "datasets/asteroid-test-34k.arff"
)
p_error_count = resu[0]
p_oob_count = resu[1]
p_kappa_value = resu[2]
p_instance_count = resu[3]
p_error = resu[4]
p_testset_kappa_value = resu[5]
p_time = resu[6]
p_rank = resu[7]
p_size = resu[8]

t1 = tm.time()    # time measurement

if p_rank == 0 :
    print(f'Trainset classification error is',
          f'{p_error_count * 100 / p_oob_count :.2f}%',
          f'of {p_oob_count} (kappa: {p_kappa_value :.4f})')
    print(f' Testset classification error is {p_error * 100 :.2f}%',
          f'of {p_instance_count} (kappa: {p_testset_kappa_value :.4f})')
    print(f'T: {p_time :.4f}  |  N: {p_size :0g}')

Overwriting rfns.py


In [2]:
! time python rfns.py

Trainset classification error is 0.06% of 66000 (kappa: 0.9904)
 Testset classification error is 0.45% of 34000 (kappa: 0.9314)
T: 111.9269  |  N: 1

real	1m52.677s
user	1m49.973s
sys	0m2.216s


---

## MPI

Makefile must be changed to enable mpi option

        PAR = mpi  (uncommented)

In [2]:
! module list

Currently Loaded Modulefiles:
  1) intel_psxe/2020


In [3]:
%%bash
python --version
ifort --version
mpiifort --version
icc --version
mpirun --version

Python 3.7.11 :: Intel Corporation
ifort (IFORT) 19.1.2.254 20200623
Copyright (C) 1985-2020 Intel Corporation.  All rights reserved.

ifort (IFORT) 19.1.2.254 20200623
Copyright (C) 1985-2020 Intel Corporation.  All rights reserved.

icc (ICC) 19.1.2.254 20200623
Copyright (C) 1985-2020 Intel Corporation.  All rights reserved.

Intel(R) MPI Library for Linux* OS, Version 2021.3 Build 20210601 (id: 6f90181f1)
Copyright 2003-2021, Intel Corporation.


In [None]:
%%bash
module load intel_psxe/2020
cd parf003
rm -f *.o *.mod parf003mpi*.*
make --always-make    # create object files (.o)

In [8]:
%%bash
module load intel_psxe/2020
source /opt/intel/parallel_studio_xe_2020/intelpython3/etc/profile.d/conda.sh
SCR=/scratch${PWD#/prj}
conda activate --stack ${SCR%/rf}/env4
cd parf003
f2py  -m parf003mpi  --overwrite-signature  -h sgnFile.pyf  main.f90

Reading fortran codes...
	Reading file 'main.f90' (format:free)
Post-processing...
	Block: parf003mpi
			Block: random_forest
In: :parf003mpi:main.f90:random_forest
get_useparameters: no module options info used by random_forest
In: :parf003mpi:main.f90:random_forest
get_useparameters: no module instancesets info used by random_forest
In: :parf003mpi:main.f90:random_forest
get_useparameters: no module bootstraps info used by random_forest
In: :parf003mpi:main.f90:random_forest
get_useparameters: no module utilities info used by random_forest
In: :parf003mpi:main.f90:random_forest
get_useparameters: no module forests info used by random_forest
In: :parf003mpi:main.f90:random_forest
get_useparameters: no module prototypes info used by random_forest
In: :parf003mpi:main.f90:random_forest
get_useparameters: no module parallel info used by random_forest
Post-processing (stage 2)...
Saving signatures to file "./sgnFile.pyf"


In [None]:
%%bash
module load intel_psxe/2020
source /opt/intel/parallel_studio_xe_2020/intelpython3/etc/profile.d/conda.sh
SCR=/scratch${PWD#/prj}
conda activate --stack ${SCR%/rf}/env4
cd parf003
f2py  -c sgnFile.pyf  main.f90  --f90exec=mpiifort  --quiet  \
-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION  \
--fcompiler=intelem  --opt='-O3 -diag-disable=8291'  \
parallel.o trees.o bitvectors.o instancesets.o options.o utilities.o  \
bootstraps.o forests.o importances.o prototypes.o graphics.o support.o

In [11]:
! ls parf003/parf003mpi*

parf003/parf003mpi.cpython-37m-x86_64-linux-gnu.so


In [12]:
! cp parf003/parf003mpi* .

In [13]:
import parf003mpi

In [14]:
help(parf003mpi)

Help on module parf003mpi:

NAME
    parf003mpi

DESCRIPTION
    This module 'parf003mpi' is auto-generated with f2py (version:1.20.3).
    Functions:
      p_error_count,p_oob_count,p_kappa_value,p_instance_count,p_error,p_testset_kappa_value,p_time,p_rank,p_size = random_forest(p_trainset,p_testset)
    .

DATA
    __f2py_numpy_version__ = '1.20.3'
    random_forest = <fortran object>

VERSION
    1.20.3

FILE
    /prj/ampemi/xxxx.xxxx/rf/parf003mpi.cpython-37m-x86_64-linux-gnu.so




In [17]:
%%writefile rfnm.py
import time as tm, parf003mpi

t0 = tm.time()    # time measurement

resu = parf003mpi.random_forest(
    "datasets/asteroid-train-66k.arff",
    "datasets/asteroid-test-34k.arff"
)
p_error_count = resu[0]
p_oob_count = resu[1]
p_kappa_value = resu[2]
p_instance_count = resu[3]
p_error = resu[4]
p_testset_kappa_value = resu[5]
p_time = resu[6]
p_rank = resu[7]
p_size = resu[8]

t1 = tm.time()    # time measurement

if p_rank == 0 :
    print(f'Trainset classification error is',
          f'{p_error_count * 100 / p_oob_count :.2f}%',
          f'of {p_oob_count} (kappa: {p_kappa_value :.4f})')
    print(f' Testset classification error is {p_error * 100 :.2f}%',
          f'of {p_instance_count} (kappa: {p_testset_kappa_value :.4f})')
    print(f'T: {p_time :.4f}  |  N: {p_size :0g}')

Overwriting rfnm.py


In [18]:
%%bash
unset I_MPI_PMI_LIBRARY
time mpiexec -n 1 python rfnm.py

Trainset classification error is 0.06% of 66000 (kappa: 0.9915)
 Testset classification error is 0.60% of 34000 (kappa: 0.9091)
T: 106.1714  |  N: 1



real	1m48.911s
user	1m44.437s
sys	0m2.066s


In [19]:
%%bash
unset I_MPI_PMI_LIBRARY
time mpiexec -n 2 python rfnm.py

Trainset classification error is 0.05% of 66000 (kappa: 0.9920)
 Testset classification error is 0.53% of 34000 (kappa: 0.9202)
T: 64.6468  |  N: 2



real	1m5.458s
user	2m8.761s
sys	0m1.069s


In [20]:
%%bash
unset I_MPI_PMI_LIBRARY
time mpiexec -n 4 python rfnm.py

Trainset classification error is 0.06% of 66000 (kappa: 0.9904)
 Testset classification error is 0.46% of 34000 (kappa: 0.9305)
T: 40.2618  |  N: 4



real	0m41.133s
user	2m40.354s
sys	0m1.721s


In [21]:
%%bash
unset I_MPI_PMI_LIBRARY
time mpiexec -n 16 python rfnm.py

Trainset classification error is 0.05% of 66000 (kappa: 0.9922)
 Testset classification error is 0.45% of 34000 (kappa: 0.9318)
T: 18.5569  |  N: 16



real	0m20.131s
user	4m53.772s
sys	0m7.123s
