Skip to content

Commit

Permalink
Merge b293c6f into 3ed58dd
Browse files Browse the repository at this point in the history
  • Loading branch information
jehturner authored May 29, 2018
2 parents 3ed58dd + b293c6f commit 937d765
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 71 deletions.
4 changes: 0 additions & 4 deletions astroscrappy/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@ def test_medfilt3():
def test_medfilt5():
a = np.ascontiguousarray(np.random.random((1001, 1001))).astype('f4')
npmed5 = ndimage.filters.median_filter(a, size=(5, 5), mode='nearest')
npmed5[:2, :] = a[:2, :]
npmed5[-2:, :] = a[-2:, :]
npmed5[:, :2] = a[:, :2]
npmed5[:, -2:] = a[:, -2:]

med5 = medfilt5(a)
assert np.all(med5 == npmed5)
Expand Down
246 changes: 179 additions & 67 deletions astroscrappy/utils/medutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -478,76 +478,16 @@ PyMedFilt5(float* data, float* output, int nx, int ny)
"PyMedFilt5(data, output, nx, ny) -> void\n\n"
"Calculate the 5x5 median filter on an array data with dimensions "
"nx x ny. The results are saved in the output array. The output "
"array should already be allocated as we work on it in place. The "
"median filter is not calculated for a 2 pixel border around the "
"image. These pixel values are copied from the input data. Note "
"that the data array needs to be striped in the x direction such "
"that pixel i,j has memory location data[i + nx * j]");
"array should already be allocated as we work on it in place. "
"At each border of the image, the median window is completed by "
"replicating the edge row or column. Note that the data array "
"needs to be striped in the x direction such that pixel i,j has "
"memory location data[i + nx * j]");

/*Total size of the array */
int nxny = nx * ny;

/* Loop indices */
int i, j, nxj;
int k, l, nxk;

/* 25 element array to calculate the median and a counter index. Note that
* these both need to be unique for each thread so they both need to be
* private and we wait to allocate memory until the pragma below. */
float* medarr;
int medcounter;

/* Each thread needs to access the data and the output so we make them
* firstprivate. We make sure that our algorithm doesn't have multiple
* threads read or write the same piece of memory. */
#pragma omp parallel firstprivate(output, data, nx, ny) \
private(i, j, k, l, medarr, nxj, nxk, medcounter)
{
/*Each thread allocates its own array. */
medarr = (float *) malloc(25 * sizeof(float));

/* Go through each pixel excluding the border.*/
#pragma omp for nowait
for (j = 2; j < ny - 2; j++) {
/* Precalculate the multiplication nx * j, minor optimization */
nxj = nx * j;
for (i = 2; i < nx - 2; i++) {
medcounter = 0;
/* The compiler should optimize away these loops */
for (k = -2; k < 3; k++) {
nxk = nx * k;
for (l = -2; l < 3; l++) {
medarr[medcounter] = data[nxj + i + nxk + l];
medcounter++;
}
}
/* Calculate the median in the fastest way possible */
output[nxj + i] = PyOptMed25(medarr);
}
}
/* Each thread needs to free its own copy of medarr */
free(medarr);
}

#pragma omp parallel firstprivate(output, data, nx, nxny) private(i)
/* Copy the border pixels from the original data into the output array */
for (i = 0; i < nx; i++) {
output[i] = data[i];
output[i + nx] = data[i + nx];
output[nxny - nx + i] = data[nxny - nx + i];
output[nxny - nx - nx + i] = data[nxny - nx - nx + i];
}

#pragma omp parallel firstprivate(output, data, nx, ny) private(j, nxj)
for (j = 0; j < ny; j++) {
nxj = nx * j;
output[nxj] = data[nxj];
output[nxj + 1] = data[nxj + 1];
output[nxj + nx - 1] = data[nxj + nx - 1];
output[nxj + nx - 2] = data[nxj + nx - 2];
}
PyMedFiltN(data, output, nx, ny, 5);

return;

}

/* Calculate the 7x7 median filter of an array data that has dimensions
Expand Down Expand Up @@ -1160,3 +1100,175 @@ PySepMedFilt9(float* data, float* output, int nx, int ny)

return;
}

/* NxN median filter, with replicated edges. */
void
PyMedFiltN(float* data, float* output, int nx, int ny, int wsize)
{
PyDoc_STRVAR(PyMedFiltN__doc__,
"PyMedFiltN(data, output, nx, ny, wsize) -> void\n\n"
"Calculate the NxN median filter on an array with dimensions "
"nx x ny, using a window size of N=wsize (which must be an odd "
"number). The results are saved in the output array. The output "
"array should already be allocated, as we work on it in place. At "
"each border of the image, the median window is completed by "
"replicating the edge row or column. Note that the data array "
"needs to be striped in the x direction such that pixel i,j has "
"memory location data[i + nx * j].");

/*Total size of the array */
int nxny = nx * ny;

/* Loop indices */
int i, j, nxj;
int k, l, nxk, ipl, rci;
int wstart, wend;

/* N^2 element array to calculate the median and a counter index. Note that
* these both need to be unique for each thread so they both need to be
* private and we wait to allocate memory until the pragma below. */
float* medarr;
int medcounter;

/* Pointer to function that calculates median for a length N^2 array */
float (*medfunc)();
int medarg2;

/* Rows/columns at each edge where window extends past image */
int nedgerc = wsize-1;
int n1edge = nedgerc / 2;
int *rows, *cols;

rows = (int *) malloc(nedgerc * sizeof(int));
cols = (int *) malloc(nedgerc * sizeof(int));

/* Calculate indices of edge rows/columns: */
for (rci = 0; rci < n1edge; rci++) {
rows[rci] = rci;
cols[rci] = rci;
rows[nedgerc-rci-1] = ny-rci-1;
cols[nedgerc-rci-1] = nx-rci-1;
}

/* Select appropriate function to use for determining median */
switch (wsize) {
case 3:
medfunc = PyOptMed9;
medarg2 = 0;
break;
case 5:
medfunc = PyOptMed25;
medarg2 = 0;
break;
default:
medfunc = PyMedian;
medarg2 = wsize*wsize;
}

/* Start/end offsets for sliding window (is this significantly faster than
using n1edge directly below?)
*/
wstart = -n1edge;
wend = n1edge + 1;

/* Each thread needs to access the data and the output so we make them
* firstprivate. We make sure that our algorithm doesn't have multiple
* threads read or write the same piece of memory. */
#pragma omp parallel firstprivate(output, data, nx, ny, rows) \
private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter)
{
/*Each thread allocates its own array. */
medarr = (float *) malloc(wsize * wsize * sizeof(float));

/* Go through each pixel excluding the border.*/
#pragma omp for nowait
for (j = n1edge; j < ny - n1edge; j++) {
/* Precalculate the multiplication nx * j, minor optimization */
nxj = nx * j;
for (i = n1edge; i < nx - n1edge; i++) {
medcounter = 0;
/* The compiler should optimize away these loops */
for (k = wstart; k < wend; k++) {
nxk = nx * k;
for (l = wstart; l < wend; l++) {
medarr[medcounter] = data[nxj + i + nxk + l];
medcounter++;
}
}
/* Calculate the median in the fastest way possible */
if (medarg2)
output[nxj + i] = medfunc(medarr, medarg2);
else
output[nxj + i] = medfunc(medarr);
}
}

/* Now pad the input array with the nearest value for border pixels
(as in the original IRAF algorithm). I'm not sure how
thread-optimized this is after my changes but it only happens for
a small number of rows/columns - JT.
*/

/* Border rows: */
#pragma omp for nowait
for (rci = 0; rci < nedgerc; rci++) {
j = rows[rci];
nxj = nx * j;
for (i = 0; i < nx; i++) {
medcounter = 0;
for (k = wstart; k < wend; k++) {
if (j+k < 0) nxk = -nxj;
else if (j+k >= ny) nxk = nx * (ny-j-1);
else nxk = nx * k;
for (l = wstart; l < wend; l++) {
ipl = i+l;
if (ipl < 1) ipl = 0;
else if (ipl >= nx) ipl = nx-1;
medarr[medcounter] = data[nxj + nxk + ipl];
medcounter++;
}
}
if (medarg2)
output[nxj + i] = medfunc(medarr, medarg2);
else
output[nxj + i] = medfunc(medarr);
}
}

/* Border columns: */
#pragma omp for nowait
for (j = 0; j < ny; j++) {
nxj = nx * j;

for (rci = 0; rci < nedgerc; rci++) {
i = cols[rci];
medcounter = 0;

for (k = wstart; k < wend; k++) {
if (j+k < 0) nxk = -nxj;
else if (j+k >= ny) nxk = nx * (ny-j-1);
else nxk = nx * k;
for (l = wstart; l < wend; l++) {
ipl = i+l;
if (ipl < 1) ipl = 0;
else if (ipl >= nx) ipl = nx-1;
medarr[medcounter] = data[nxj + nxk + ipl];
medcounter++;
}
}
if (medarg2)
output[nxj + i] = medfunc(medarr, medarg2);
else
output[nxj + i] = medfunc(medarr);
}
}

/* Each thread needs to free its own copy of medarr */
free(medarr);
}

free(rows);
free(cols);

return;
}
11 changes: 11 additions & 0 deletions astroscrappy/utils/medutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,17 @@ PyMedFilt5(float* data, float* output, int nx, int ny);
void
PyMedFilt7(float* data, float* output, int nx, int ny);

/* Calculate the NxN median filter on an array with dimensions nx x ny,
* using a window size of N=wsize (which must be an odd number). The
* results are saved in the output array. The output array should already
* be allocated, as we work on it in place. At each border of the image,
* the median window is completed by replicating the edge row or
* column. Note that the data array needs to be striped in the x direction
* such that pixel i,j has memory location data[i + nx * j].
*/
void
PyMedFiltN(float* data, float* output, int nx, int ny, int wsize);

/* Calculate the 3x3 separable median filter of an array data that has
* dimensions nx x ny. The results are saved in the output array. The output
* array should already be allocated as we work on it in place. The median
Expand Down

0 comments on commit 937d765

Please sign in to comment.