# FFTW C2C F90 MPI 576

- FFTW MPI F90 Interface http://fftw.org/doc/FFTW-MPI-Fortran-Interface.html#FFTW-MPI-Fortran-Interface

In [1]:
#-----------------------------------------------------------------------

In [9]:
%%writefile fc2cp.f90
program main
    use, intrinsic :: iso_c_binding
    use MPI
    implicit none
    include 'fftw3-mpi.f03'
    integer :: mpirank, mpisize, mpierror, i, j, k
    integer(C_INTPTR_T), parameter :: L = 576, M = 576, N = 576
    type(C_PTR) :: plan, cdata
    complex(C_DOUBLE_COMPLEX), pointer :: data(:,:,:)
    integer(C_INTPTR_T) :: alloc_local, local_N, local_start
    complex(C_DOUBLE_COMPLEX) :: s, rs
    double precision :: t0, t1, t2

    call cpu_time(t0)    ! time measurement

    call MPI_Init(mpierror)
    call MPI_Comm_rank(MPI_COMM_WORLD, mpirank, mpierror)
    call MPI_Comm_size(MPI_COMM_WORLD, mpisize, mpierror)

    ! init
    call fftw_mpi_init()    

    ! get local data size and allocate (note dimension reversal)
    alloc_local = fftw_mpi_local_size_3d(N, M, L,  &
                 MPI_COMM_WORLD, local_N, local_start)
    cdata = fftw_alloc_complex(alloc_local)
    call c_f_pointer(cdata, data, [L, M, local_N])

    ! create MPI plan for in-place forward DFT (note dimension reversal)
    plan = fftw_mpi_plan_dft_3d(N, M, L, data, data,  &
                MPI_COMM_WORLD, FFTW_FORWARD, FFTW_ESTIMATE)

    ! Fills the array with complex values
    do k = 1, int(local_N)
        do j = 1, M
            do i = 1, L
                data(i, j, k) = dcmplx( sin( real(i + j + (k + local_start)) ) , 0)
            enddo
        enddo
    enddo

    call cpu_time(t1)    ! time measurement

    ! Compute transform (as many times as desired)
    call fftw_mpi_execute_dft(plan, data, data)

    ! Checksum
    s = sum(data)
    call MPI_Reduce(s,                   &! send data
                    rs,                  &! recv data
                    1,                   &! count
                    MPI_DOUBLE_COMPLEX,  &! data type
                    MPI_SUM,             &! operation
                    0,                   &! rank of root process
                    MPI_COMM_WORLD, mpierror)
    
    ! clean

    call cpu_time(t2)    ! time measurement

    call fftw_destroy_plan(plan)
    call fftw_free(cdata)
    call fftw_mpi_cleanup()
    call mpi_finalize(mpierror)
    
    ! show the result
    if (mpirank == 0) then
        write(*, "('S: 'spf0.0spf0.0'j')", advance="no") rs * 1e-5
        write(*, "(' | L: 'g0)", advance="no") L
        write(*, "(' | N: 'g0)", advance="no") mpisize
        write(*, "(' | T1: 'sf0.4)", advance="no") t1-t0
        write(*, "(' | TF: 'sf0.4)", advance="no") t2-t1
        write(*, "(' | TT: 'sf0.4)") t2-t0
    endif

end

Overwriting fc2cp.f90


In [10]:
%%bash
module load  openmpi/gnu/4.0.4_ucx_1.6
module load  mathlibs/fftw/3.3.8_openmpi-3.1_gnu
dir=/scratch/app/mathlibs/fftw/3.3.8_openmpi-3.1_gnu
mpifort  -O3  -o fc2cp  fc2cp.f90  \
         -L $dir/lib  -l fftw3_mpi  -l fftw3  -l m  -I $dir/include

## Check imag = 0

In [31]:
%%bash
module load  mathlibs/fftw/3.3.8_openmpi-3.1_gnu
time mpiexec -n 4 ./fc2cp

S: +270.-0.j | L: 576 | N: 4 | T1: 2.1405 | TF: 5.3970 | TT: 7.5374



real	0m10.428s
user	0m25.194s
sys	0m10.514s


In [14]:
%%bash
module load  mathlibs/fftw/3.3.8_openmpi-3.1_gnu
time mpiexec -n 16 ./fc2cp

S: +270.-0.j | L: 576 | N: 16 | T1: 2.8404 | TF: 1.5309 | TT: 4.3713



real	0m12.166s
user	0m27.205s
sys	0m42.221s


## Check imag = real

In [36]:
%%bash
module load  mathlibs/fftw/3.3.8_openmpi-3.1_gnu
time mpiexec -n 4 ./fc2cp

S: +270.+270.j | L: 576 | N: 4 | T1: 2.0767 | TF: 5.3662 | TT: 7.4429



real	0m7.730s
user	0m24.625s
sys	0m5.398s


In [35]:
%%bash
module load  mathlibs/fftw/3.3.8_openmpi-3.1_gnu
time mpiexec -n 16 ./fc2cp

S: +270.+270.j | L: 576 | N: 16 | T1: 3.4871 | TF: 1.2839 | TT: 4.7710



real	0m6.615s
user	0m25.867s
sys	0m36.066s


## Copy to /scratch

In [15]:
%%bash
dst=/scratch${PWD#"/prj"}
cp fc2cp $dst

## Configures the batch script 

In [16]:
%%writefile fc2cp.srm
#!/bin/bash
#SBATCH --job-name fc2cp       # Job name
#SBATCH --partition cpu_small  # Select partition
#SBATCH --ntasks=1             # Total tasks
#SBATCH --time=00:05:00        # Limit execution time
#SBATCH --exclusive            # Exclusive acccess to nodes

echo '========================================'
echo '- Job ID:' $SLURM_JOB_ID
echo '- Tasks per node:' $SLURM_NTASKS_PER_NODE
echo '- # of nodes in the job:' $SLURM_JOB_NUM_NODES
echo '- # of tasks:' $SLURM_NTASKS
echo '- Dir from which sbatch was invoked:' ${SLURM_SUBMIT_DIR##*/}
cd $SLURM_SUBMIT_DIR
echo -n '- List of nodes allocated to the job: '
nodeset -e $SLURM_JOB_NODELIST

# Modules config
echo '-- modules ----------------------------'
echo '$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu'
module load  mathlibs/fftw/3.3.8_openmpi-3.1_gnu

# Executable config
EXEC=$PWD/fc2cp

# Start
echo '-- run --------------------------------'
echo '$ srun --mpi=pmi2 -n' $SLURM_NTASKS ${EXEC##*/}
echo '-- output -----------------------------'
srun --mpi=pmi2 -n $SLURM_NTASKS $EXEC
echo '~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'

Overwriting fc2cp.srm


## Check

In [17]:
! sbatch --partition=cpu_dev --ntasks=96 fc2cp.srm

Submitted batch job 1337187


In [18]:
! squeue -n fc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C" --partition=cpu_dev

             JOBID  PARTITION  ST  TIME NODES CPUS
           1337187    cpu_dev  PD  0:00     4   96


In [28]:
! squeue -n fc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C" --partition=cpu_dev

             JOBID  PARTITION  ST  TIME NODES CPUS


In [29]:
! cat /scratch${PWD#"/prj"}/slurm-1337187.out

- Job ID: 1337187
- Tasks per node:
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1243 sdumont1244 sdumont1245 sdumont1246
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 96 fc2cp
-- output -----------------------------
S: +270.-0.j | L: 576 | N: 96 | T1: 1.3296 | TF: .9637 | TT: 2.2933
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


## Run

### 1 of (1, 4, 16, 24, 48, 72, 96)

In [19]:
! sbatch --ntasks=1 fc2cp.srm
! sbatch --ntasks=1 fc2cp.srm
! sbatch --ntasks=1 fc2cp.srm

Submitted batch job 1337188
Submitted batch job 1337189
Submitted batch job 1337190


In [8]:
! cat /scratch${PWD#"/prj"}/slurm-1337188.out
! cat /scratch${PWD#"/prj"}/slurm-1337189.out
! cat /scratch${PWD#"/prj"}/slurm-1337190.out

- Job ID: 1337188
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 1 fc2cp
-- output -----------------------------
S: +270.+0.j | L: 576 | N: 1 | T1: 7.3483 | TF: 16.1846 | TT: 23.5329
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337189
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 1
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 1 fc2cp
-- output -----------------------------
S: +270.+0.j | L: 576 | N: 1 | T1: 7.3227 | TF: 16.1144 | TT: 23.4371
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337190
- Tasks per node:
- # of

### 4 of (1, 4, 16, 24, 48, 72, 96)

In [20]:
! sbatch --ntasks=4 fc2cp.srm
! sbatch --ntasks=4 fc2cp.srm
! sbatch --ntasks=4 fc2cp.srm

Submitted batch job 1337191
Submitted batch job 1337192
Submitted batch job 1337193


In [7]:
! cat /scratch${PWD#"/prj"}/slurm-1337191.out
! cat /scratch${PWD#"/prj"}/slurm-1337192.out
! cat /scratch${PWD#"/prj"}/slurm-1337193.out

- Job ID: 1337191
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 4
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1490
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 4 fc2cp
-- output -----------------------------
S: +270.-0.j | L: 576 | N: 4 | T1: 2.0473 | TF: 4.3915 | TT: 6.4388
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337192
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 4
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 4 fc2cp
-- output -----------------------------
S: +270.-0.j | L: 576 | N: 4 | T1: 2.0056 | TF: 4.3714 | TT: 6.3770
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337193
- Tasks per node:
- # of nod

### 16 of (1, 4, 16, 24, 48, 72, 96)

In [21]:
! sbatch --ntasks=16 fc2cp.srm
! sbatch --ntasks=16 fc2cp.srm
! sbatch --ntasks=16 fc2cp.srm

Submitted batch job 1337194
Submitted batch job 1337195
Submitted batch job 1337196


In [6]:
! cat /scratch${PWD#"/prj"}/slurm-1337194.out
! cat /scratch${PWD#"/prj"}/slurm-1337195.out
! cat /scratch${PWD#"/prj"}/slurm-1337196.out

- Job ID: 1337194
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 16
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 16 fc2cp
-- output -----------------------------
S: +270.-0.j | L: 576 | N: 16 | T1: 1.0949 | TF: 1.5387 | TT: 2.6336
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337195
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 16
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1490
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 16 fc2cp
-- output -----------------------------
S: +270.-0.j | L: 576 | N: 16 | T1: 1.1542 | TF: 1.5381 | TT: 2.6923
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337196
- Tasks per node:
- # 

### 24 of (1, 4, 16, 24, 48, 72, 96)

In [22]:
! sbatch --ntasks=24 fc2cp.srm
! sbatch --ntasks=24 fc2cp.srm
! sbatch --ntasks=24 fc2cp.srm

Submitted batch job 1337197
Submitted batch job 1337198
Submitted batch job 1337199


In [5]:
! cat /scratch${PWD#"/prj"}/slurm-1337197.out
! cat /scratch${PWD#"/prj"}/slurm-1337198.out
! cat /scratch${PWD#"/prj"}/slurm-1337199.out

- Job ID: 1337197
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 24
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1490
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 24 fc2cp
-- output -----------------------------
S: +270.+0.j | L: 576 | N: 24 | T1: 1.3397 | TF: 1.0351 | TT: 2.3748
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337198
- Tasks per node:
- # of nodes in the job: 1
- # of tasks: 24
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 24 fc2cp
-- output -----------------------------
S: +270.+0.j | L: 576 | N: 24 | T1: 1.2595 | TF: 1.0447 | TT: 2.3042
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337199
- Tasks per node:
- # 

### 48 of (1, 4, 16, 24, 48, 72, 96)

In [23]:
! sbatch --ntasks=48 fc2cp.srm
! sbatch --ntasks=48 fc2cp.srm
! sbatch --ntasks=48 fc2cp.srm

Submitted batch job 1337200
Submitted batch job 1337201
Submitted batch job 1337202


In [4]:
! cat /scratch${PWD#"/prj"}/slurm-1337200.out
! cat /scratch${PWD#"/prj"}/slurm-1337201.out
! cat /scratch${PWD#"/prj"}/slurm-1337202.out

- Job ID: 1337200
- Tasks per node:
- # of nodes in the job: 2
- # of tasks: 48
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457 sdumont1490
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 48 fc2cp
-- output -----------------------------
S: +270.+0.j | L: 576 | N: 48 | T1: 1.5118 | TF: .7764 | TT: 2.2883
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337201
- Tasks per node:
- # of nodes in the job: 2
- # of tasks: 48
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1457 sdumont1490
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 48 fc2cp
-- output -----------------------------
S: +270.+0.j | L: 576 | N: 48 | T1: 1.3375 | TF: .7932 | TT: 2.1307
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337202


### 72 of (1, 4, 16, 24, 48, 72, 96)

In [24]:
! sbatch --ntasks=72 fc2cp.srm
! sbatch --ntasks=72 fc2cp.srm
! sbatch --ntasks=72 fc2cp.srm

Submitted batch job 1337203
Submitted batch job 1337204
Submitted batch job 1337205


In [3]:
! cat /scratch${PWD#"/prj"}/slurm-1337203.out
! cat /scratch${PWD#"/prj"}/slurm-1337204.out
! cat /scratch${PWD#"/prj"}/slurm-1337205.out

- Job ID: 1337203
- Tasks per node:
- # of nodes in the job: 3
- # of tasks: 72
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1453 sdumont1457 sdumont1490
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 72 fc2cp
-- output -----------------------------
S: +270.-0.j | L: 576 | N: 72 | T1: 1.5588 | TF: .8443 | TT: 2.4031
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Job ID: 1337204
- Tasks per node:
- # of nodes in the job: 3
- # of tasks: 72
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1453 sdumont1457 sdumont1490
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 72 fc2cp
-- output -----------------------------
S: +270.-0.j | L: 576 | N: 72 | T1: 1.3025 | TF: .8825 | TT: 2.1850
~~ end ~~~~~~~~~~~~~~~~~~~~~~~~~~~

### 96 of (1, 4, 16, 24, 48, 72, 96)

In [25]:
! sbatch --ntasks=96 fc2cp.srm
! sbatch --ntasks=96 fc2cp.srm
! sbatch --ntasks=96 fc2cp.srm

Submitted batch job 1337206
Submitted batch job 1337207
Submitted batch job 1337208


In [2]:
! cat /scratch${PWD#"/prj"}/slurm-1337206.out
! cat /scratch${PWD#"/prj"}/slurm-1337207.out
! cat /scratch${PWD#"/prj"}/slurm-1337208.out

- Job ID: 1337206
- Tasks per node:
- # of nodes in the job: 4
- # of tasks: 96
- Dir from which sbatch was invoked: fft
- List of nodes allocated to the job: sdumont1484 sdumont1492 sdumont1502 sdumont1503
-- modules ----------------------------
$ module load mathlibs/fftw/3.3.8_openmpi-3.1_gnu
-- run --------------------------------
$ srun --mpi=pmi2 -n 96 fc2cp
-- output -----------------------------
rank:76, cpu:4
rank:19, cpu:19
rank:35, cpu:11
rank:53, cpu:5
rank:26, cpu:2
rank:59, cpu:11
rank:80, cpu:8
rank:56, cpu:8
rank:39, cpu:15
rank:23, cpu:23
rank:79, cpu:7
rank:14, cpu:14
rank:47, cpu:23
rank:87, cpu:15
rank:85, cpu:13
rank:50, cpu:2
rank:86, cpu:14
rank:63, cpu:15
rank:52, cpu:4
rank:00, cpu:0
rank:27, cpu:3
S: +270.-0.j | L: 576 | N: 96 | T1: 1.3269 | TF: 1.1039 | TT: 2.4308
rank:18, cpu:18
rank:77, cpu:5
rank:07, cpu:7
rank:12, cpu:12
rank:02, cpu:2
rank:74, cpu:2
rank:73, cpu:1
rank:20, cpu:20
rank:51, cpu:3
rank:29, cpu:5
rank:04, cpu:4
rank:46, cpu:22
rank:32, cpu:8

In [26]:
! squeue --partition=cpu_small -h -t pending,running -r | wc -l

270


In [27]:
! squeue -n fc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C"

             JOBID  PARTITION  ST  TIME NODES CPUS
           1337188  cpu_small  PD  0:00     1    1
           1337189  cpu_small  PD  0:00     1    1
           1337190  cpu_small  PD  0:00     1    1
           1337191  cpu_small  PD  0:00     1    4
           1337192  cpu_small  PD  0:00     1    4
           1337193  cpu_small  PD  0:00     1    4
           1337194  cpu_small  PD  0:00     1   16
           1337195  cpu_small  PD  0:00     1   16
           1337196  cpu_small  PD  0:00     1   16
           1337197  cpu_small  PD  0:00     1   24
           1337198  cpu_small  PD  0:00     1   24
           1337199  cpu_small  PD  0:00     1   24
           1337200  cpu_small  PD  0:00     2   48
           1337201  cpu_small  PD  0:00     2   48
           1337202  cpu_small  PD  0:00     2   48
           1337203  cpu_small  PD  0:00     3   72
           1337204  cpu_small  PD  0:00     3   72
           1337205  cpu_small  PD  0:00     3   72
           1337206  cpu_small  

In [1]:
! squeue -n fc2cp -o "%.18i  %.9P  %.2t %.5M %.5D %.4C"

             JOBID  PARTITION  ST  TIME NODES CPUS


## Version

In [11]:
%%bash
module load  mathlibs/fftw/3.3.8_openmpi-3.1_gnu
mpifort --version
ompi_info --version

GNU Fortran (GCC) 4.8.5 20150623 (Red Hat 4.8.5-36)
Copyright (C) 2015 Free Software Foundation, Inc.

GNU Fortran comes with NO WARRANTY, to the extent permitted by law.
You may redistribute copies of GNU Fortran
under the terms of the GNU General Public License.
For more information about these matters, see the file named COPYING

Open MPI v3.1.4

http://www.open-mpi.org/community/help/


In [2]:
! module avail 2>&1 | grep -i fftw

mathlibs/fftw/3.3.8_intel
mathlibs/fftw/3.3.8_openmpi-2.0_gnu
mathlibs/fftw/3.3.8_openmpi-2.0_intel
mathlibs/fftw/3.3.8_openmpi-3.1_gnu


In [16]:
! hostnamectl | grep Operating

  Operating System: Red Hat Enterprise Linux Server 7.6 (Maipo)


In [15]:
! lscpu

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                24
On-line CPU(s) list:   0-23
Thread(s) per core:    1
Core(s) per socket:    12
Socket(s):             2
NUMA node(s):          2
Vendor ID:             GenuineIntel
CPU family:            6
Model:                 62
Model name:            Intel(R) Xeon(R) CPU E5-2695 v2 @ 2.40GHz
Stepping:              4
CPU MHz:               2667.773
CPU max MHz:           3200.0000
CPU min MHz:           1200.0000
BogoMIPS:              4799.91
Virtualization:        VT-x
L1d cache:             32K
L1i cache:             32K
L2 cache:              256K
L3 cache:              30720K
NUMA node0 CPU(s):     0-11
NUMA node1 CPU(s):     12-23
Flags:                 fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonsto