Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
file 141 lines (110 sloc) 3.199 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
#include "cache.h"
#include "fast_math.h"

#include <stddef.h>
#include <time.h>
#include <stdlib.h>
#include <assert.h>

/*
This function is basically manually code generated.
DON'T TOUCH UNLESS YOU UNDERSTAND WHAT AN OPCODE IS.

See: http://igoro.com/archive/gallery-of-processor-cache-effects/
*/
static void iterate_through_data(char* data, unsigned int dataSize)
{
static const unsigned int steps = 64*1024*1024; /* Arbitrary. */

unsigned int lengthMod = dataSize - 1;
unsigned int i;

assert(is_power_of_two(dataSize));

/*
NOTE:
If n is "steps", and m is maxAlignment, then this code is proportional
to O(n log m). Therefore, be careful about increasing the step size to
fix this assert. Try decreasing the maxAlignment instead. Or "max", as
external code knows it by.
*/
assert(dataSize < steps);

for(i = 0; i < steps; ++i)
++data[(i * 64) & lengthMod];
}

static clock_t timed_iteration(char* data, unsigned int dataSize)
{
clock_t begin, end;

begin = clock();
iterate_through_data(data, dataSize);
end = clock();

return end - begin;
}

static void fill_timing_data(
clock_t* timingData,
unsigned int timingDataLength,
unsigned int maxAlignment
)
{
unsigned int currentAlignment;
unsigned int i;

char* targetArray = NULL;

for(currentAlignment = 1, i = 0; currentAlignment < maxAlignment; currentAlignment *= 2, ++i)
{
targetArray = realloc(targetArray, currentAlignment);
timingData[i] = timed_iteration(targetArray, currentAlignment);
}

free(targetArray);
}

/* There's probably a magical math formula involving logarithms that should be used here, but I'm too tired to find it. */
static unsigned int determine_size_of_timing_data_required(unsigned int maxAlignment)
{
unsigned int size, currentAlignment;

for(
size = 0, currentAlignment = 1;
currentAlignment < maxAlignment;
++size, currentAlignment *= 2
);

return size;
}

/*
This function essentially finds the biggest "jump" in timings.
A decent heuristic - it worked on my machine!
*/
static unsigned int get_cache_line_size_from_timing_data(
clock_t* timingData,
unsigned int numberOfDataPoints
)
{
unsigned int i;
clock_t delta;

clock_t biggestJumpAmount = -9001; /* It's under(?) 9000! */

unsigned int locationOfBiggestJumpAmount = 0;

/* We ignore the first point. You can't get a delta from just one point! */
for(i = 1; i < numberOfDataPoints; ++i)
{
delta = timingData[i] - timingData[i - 1];

if(delta > biggestJumpAmount)
{
biggestJumpAmount = delta;
locationOfBiggestJumpAmount = i;
}
}

/*
The best timing data is at the point before the biggest jump,
because it's at the magical boundary that's painful to access.
*/
return int_pow(2, locationOfBiggestJumpAmount - 1);
}

unsigned int get_cache_line(unsigned int max)
{
unsigned int startAlignment = 1;

unsigned int timingDataLength = determine_size_of_timing_data_required(max);
clock_t* timingData = malloc(timingDataLength * sizeof(clock_t));

unsigned int result;

fill_timing_data(
timingData,
timingDataLength,
max
);

result = get_cache_line_size_from_timing_data(
timingData,
timingDataLength
);

free(timingData);

return result;
}
Something went wrong with that request. Please try again.