Skip to content

Commit

Permalink
grid: Precompute and cache sphere bounds for ortho kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
oschuett committed Oct 18, 2020
1 parent d11024a commit 965dda1
Show file tree
Hide file tree
Showing 6 changed files with 304 additions and 76 deletions.
1 change: 1 addition & 0 deletions src/grid/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ ALL_OBJECTS := grid_collocate_replay.o \
grid_collocate.o \
common/grid_library.o \
common/grid_basis_set.o \
common/grid_sphere_cache.o \
cpu/grid_cpu_task_list.o \
ref/grid_ref_task_list.o \
ref/grid_ref_collocate.o \
Expand Down
72 changes: 31 additions & 41 deletions src/grid/common/grid_library.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "grid_constants.h"
#include "grid_library.h"

static grid_library_stats **per_thread_stats = NULL;
static grid_library_globals **per_thread_globals = NULL;
static bool library_initialized = false;
static grid_library_config config = {.backend = GRID_BACKEND_AUTO,
.validate = false};
Expand All @@ -31,15 +31,15 @@ void grid_library_init() {
abort();
}

per_thread_stats =
malloc(sizeof(grid_library_stats *) * omp_get_max_threads());
per_thread_globals =
malloc(sizeof(grid_library_globals *) * omp_get_max_threads());

// Using parallel regions to ensure memory is allocated near a thread's core.
#pragma omp parallel default(none) shared(per_thread_stats)
#pragma omp parallel default(none) shared(per_thread_globals)
{
const int ithread = omp_get_thread_num();
per_thread_stats[ithread] = malloc(sizeof(grid_library_stats));
memset(per_thread_stats[ithread], 0, sizeof(grid_library_stats));
per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
}

library_initialized = true;
Expand All @@ -56,13 +56,22 @@ void grid_library_finalize() {
}

for (int i = 0; i < omp_get_max_threads(); i++) {
free(per_thread_stats[i]);
grid_sphere_cache_free(&per_thread_globals[i]->sphere_cache);
free(per_thread_globals[i]);
}
free(per_thread_stats);
per_thread_stats = NULL;
free(per_thread_globals);
per_thread_globals = NULL;
library_initialized = false;
}

/*******************************************************************************
* \brief Returns a pointer to the thread local sphere cache.
* \author Ole Schuett
******************************************************************************/
grid_sphere_cache *grid_library_get_sphere_cache() {
return &per_thread_globals[omp_get_thread_num()]->sphere_cache;
}

/*******************************************************************************
* \brief Configures the grid library.
* \author Ole Schuett
Expand All @@ -78,28 +87,13 @@ void grid_library_set_config(const int backend, const bool validate) {
******************************************************************************/
grid_library_config grid_library_get_config() { return config; }

/*******************************************************************************
* \brief Internal helper for summing two sets of counters.
* \author Ole Schuett
******************************************************************************/
static void sum_stats(const grid_library_stats increment,
grid_library_stats *accumulator) {
for (int lp = 0; lp < 20; lp++) {
for (int kern = 0; kern < 2; kern++) {
for (int op = 0; op < 2; op++) {
accumulator->counters[lp][kern][op] += increment.counters[lp][kern][op];
}
}
}
}

/*******************************************************************************
* \brief Increment specified counter, see grid_library.h for details.
* \author Ole Schuett
******************************************************************************/
void grid_library_increment_counter(int lp, int kern, int op) {
lp = imin(lp, 19);
per_thread_stats[omp_get_thread_num()]->counters[lp][kern][op]++;
per_thread_globals[omp_get_thread_num()]->counters[lp][kern][op]++;
}

/*******************************************************************************
Expand All @@ -115,21 +109,18 @@ void grid_library_print_stats(void (*mpi_sum_func)(long *, int),
abort();
}

grid_library_stats totals;
memset(&totals, 0, sizeof(grid_library_stats));

// Sum across threads.
for (int i = 0; i < omp_get_max_threads(); i++) {
sum_stats(*per_thread_stats[i], &totals);
}

// Sum across mpi ranks.
double total_all = 0.0;
// Sum all counters across threads and mpi ranks.
long counters[20][2][2] = {0};
double total = 0.0;
for (int lp = 0; lp < 20; lp++) {
for (int kern = 0; kern < 2; kern++) {
for (int op = 0; op < 2; op++) {
mpi_sum_func(&totals.counters[lp][kern][op], mpi_comm);
total_all += totals.counters[lp][kern][op];
for (int i = 0; i < omp_get_max_threads(); i++) {
counters[lp][kern][op] +=
per_thread_globals[i]->counters[lp][kern][op];
}
mpi_sum_func(&counters[lp][kern][op], mpi_comm);
total += counters[lp][kern][op];
}
}
}
Expand Down Expand Up @@ -158,15 +149,14 @@ void grid_library_print_stats(void (*mpi_sum_func)(long *, int),
for (int lp = 0; lp < 20; lp++) {
for (int kern = 0; kern < 2; kern++) {
for (int op = 0; op < 2; op++) {
long count = totals.counters[lp][kern][op];
if (count == 0)
if (counters[lp][kern][op] == 0)
continue; // skip empty counters
const char *op_str = (op == 1) ? "collocate" : "integrate";
const char *kern_str = (kern == 1) ? "ortho" : "general";
char buffer[100];
double percent = 100.0 * count / total_all;
double percent = 100.0 * counters[lp][kern][op] / total;
snprintf(buffer, sizeof(buffer), " %-5i %-9s %-12s %38li %10.2f%%\n",
lp, kern_str, op_str, count, percent);
lp, kern_str, op_str, counters[lp][kern][op], percent);
print_func(buffer, output_unit);
}
}
Expand Down
10 changes: 9 additions & 1 deletion src/grid/common/grid_library.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#ifndef GRID_LIBRARY_H
#define GRID_LIBRARY_H

#include "grid_sphere_cache.h"
#include <stdbool.h>

/*******************************************************************************
Expand Down Expand Up @@ -55,8 +56,15 @@ void grid_library_print_stats(void (*mpi_sum_func)(long *, int), int mpi_comm,
* \author Ole Schuett
******************************************************************************/
typedef struct {
grid_sphere_cache sphere_cache;
long counters[20][2][2]; // [lp][kernel][operation]
} grid_library_stats;
} grid_library_globals;

/*******************************************************************************
* \brief Returns a pointer to the thread local sphere cache.
* \author Ole Schuett
******************************************************************************/
grid_sphere_cache *grid_library_get_sphere_cache();

/*******************************************************************************
* \brief Increment specified counter.
Expand Down
174 changes: 174 additions & 0 deletions src/grid/common/grid_sphere_cache.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*----------------------------------------------------------------------------*/
/* CP2K: A general program to perform molecular dynamics simulations */
/* Copyright 2000-2020 CP2K developers group <https://cp2k.org> */
/* */
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*----------------------------------------------------------------------------*/

#include "grid_sphere_cache.h"
#include "grid_common.h"
#include "grid_library.h"
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*******************************************************************************
* \brief Compute the sphere bounds for a given single radius.
* \author Ole Schuett
******************************************************************************/
static int single_sphere_bounds(const double disr_radius, const double dh[3][3],
const double dh_inv[3][3], int *bounds) {

int ibound = 0;

// The cube contains an even number of grid points in each direction and
// collocation is always performed on a pair of two opposing grid points.
// Hence, the points with index 0 and 1 are both assigned distance zero via
// the formular distance=(2*index-1)/2.
const int kgmin = ceil(-1e-8 - disr_radius * dh_inv[2][2]);
if (bounds != NULL) {
bounds[ibound] = kgmin;
}
ibound++;
for (int kg = kgmin; kg <= 0; kg++) {
const int kd = (2 * kg - 1) / 2; // distance from center in grid points
const double kr = kd * dh[2][2]; // distance from center in a.u.
const double kremain = disr_radius * disr_radius - kr * kr;
const int jgmin = ceil(-1e-8 - sqrt(fmax(0.0, kremain)) * dh_inv[1][1]);
if (bounds != NULL) {
bounds[ibound] = jgmin;
}
ibound++;
for (int jg = jgmin; jg <= 0; jg++) {
const int jd = (2 * jg - 1) / 2; // distance from center in grid points
const double jr = jd * dh[1][1]; // distance from center in a.u.
const double jremain = kremain - jr * jr;
const int igmin = ceil(-1e-8 - sqrt(fmax(0.0, jremain)) * dh_inv[0][0]);
if (bounds != NULL) {
bounds[ibound] = igmin;
}
ibound++;
}
}
return ibound; // Number of bounds - needed to allocate array.
}

/*******************************************************************************
* \brief Rebuild a cache entry for a given cell and max radius.
* \author Ole Schuett
******************************************************************************/
static void rebuild_cache_entry(const int max_imr, const double drmin,
const double dh[3][3],
const double dh_inv[3][3],
grid_sphere_cache_entry *entry) {
if (entry->max_imr > 0) {
free(entry->offsets);
free(entry->storage);
}
entry->max_imr = max_imr;

// Compute required storage size.
entry->offsets = malloc(max_imr * sizeof(int));
int nbounds_total = 0;
for (int imr = 1; imr <= max_imr; imr++) {
const double radius = imr * drmin;
const int nbounds = single_sphere_bounds(radius, dh, dh_inv, NULL);
entry->offsets[imr - 1] = nbounds_total;
nbounds_total += nbounds;
}

// Allocate and fill storage.
entry->storage = malloc(nbounds_total * sizeof(int));
for (int imr = 1; imr <= max_imr; imr++) {
const double radius = imr * drmin;
const int offset = entry->offsets[imr - 1];
single_sphere_bounds(radius, dh, dh_inv, &entry->storage[offset]);
}
}

/*******************************************************************************
* \brief Lookup the sphere bound from cache and compute them as needed.
* See grid_sphere_cache.h for details.
* \author Ole Schuett
******************************************************************************/
void grid_sphere_cache_lookup(const double radius, const double dh[3][3],
const double dh_inv[3][3], int **sphere_bounds,
double *discr_radius) {

// Prepare the cache.
grid_sphere_cache *cache = grid_library_get_sphere_cache();

// Find or create cache entry for given grid.
const double dr0 = dh[0][0], dr1 = dh[1][1], dr2 = dh[2][2];
grid_sphere_cache_entry *entry;
bool found = false;

// Fast path: check prev match.
if (cache->prev_match < cache->size) {
entry = &cache->entries[cache->prev_match];
if (entry->dr[0] == dr0 && entry->dr[1] == dr1 && entry->dr[2] == dr2) {
found = true;
}
}

// Full search.
if (!found) {
for (int i = 0; i < cache->size; i++) {
entry = &cache->entries[i];
if (entry->dr[0] == dr0 && entry->dr[1] == dr1 && entry->dr[2] == dr2) {
cache->prev_match = i;
found = true;
break;
}
}
}

// If no existing cache entry was found then create a new one.
if (!found) {
cache->size++;
grid_sphere_cache_entry *old_entries = cache->entries;
const size_t entry_size = sizeof(grid_sphere_cache_entry);
cache->entries = malloc(cache->size * entry_size);
memcpy(cache->entries, old_entries, (cache->size - 1) * entry_size);
free(old_entries);
cache->prev_match = cache->size - 1;
entry = &cache->entries[cache->size - 1];
// Initialize new cache entry
entry->max_imr = 0;
entry->dr[0] = dr0;
entry->dr[1] = dr1;
entry->dr[2] = dr2;
entry->drmin = fmin(dr0, fmin(dr1, dr2));
entry->drmin_inv = 1.0 / entry->drmin;
}

// Discretize the radius.
const int imr = imax(1, (int)ceil(radius * entry->drmin_inv));
*discr_radius = entry->drmin * imr;

// Rebuild cache entry if requested radius is too large.
if (entry->max_imr < imr) {
rebuild_cache_entry(imr, entry->drmin, dh, dh_inv, entry);
}
const int offset = entry->offsets[imr - 1];
*sphere_bounds = &entry->storage[offset];
}

/*******************************************************************************
* \brief Free the memory of the sphere cache.
* \author Ole Schuett
******************************************************************************/
void grid_sphere_cache_free(grid_sphere_cache *cache) {
for (int i = 0; i < cache->size; i++) {
if (cache->entries[i].max_imr > 0) {
free(cache->entries[i].offsets);
free(cache->entries[i].storage);
}
}
free(cache->entries);
cache->size = 0;
}

// EOF
55 changes: 55 additions & 0 deletions src/grid/common/grid_sphere_cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*----------------------------------------------------------------------------*/
/* CP2K: A general program to perform molecular dynamics simulations */
/* Copyright 2000-2020 CP2K developers group <https://cp2k.org> */
/* */
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*----------------------------------------------------------------------------*/

#ifndef GRID_SPHERE_CACHE_H
#define GRID_SPHERE_CACHE_H

/*******************************************************************************
* \brief Struct holding the sphere cache for one grid as specified by dr[3].
* \author Ole Schuett
******************************************************************************/
typedef struct {
double dr[3];
double drmin;
double drmin_inv;
int max_imr;
int *offsets;
int *storage;
} grid_sphere_cache_entry;

/*******************************************************************************
* \brief Struct holding the entire sphere cache, ie. for all grids.
* \author Ole Schuett
******************************************************************************/
typedef struct {
int size;
int prev_match;
grid_sphere_cache_entry *entries;
} grid_sphere_cache;

/*******************************************************************************
* \brief Lookup the sphere bounds from the cache and compute them when missing.
* \param radius Non-discretized radius.
* \param dh Incremental grid matrix.
* \param dh_inv Inverse incremental grid matrix.
* \param sphere_bounds Returned pointer to sphere bounds.
* \param discr_radius Returned discretized radius.
* \author Ole Schuett
******************************************************************************/
void grid_sphere_cache_lookup(const double radius, const double dh[3][3],
const double dh_inv[3][3], int **sphere_bounds,
double *discretized_radius);

/*******************************************************************************
* \brief Free the memory of the sphere cache.
* \author Ole Schuett
******************************************************************************/
void grid_sphere_cache_free(grid_sphere_cache *cache);

#endif

// EOF

0 comments on commit 965dda1

Please sign in to comment.