Skip to content

Commit

Permalink
memcmp.c: extend to support timingsafe_{b,mem}cmp()
Browse files Browse the repository at this point in the history
Not available in glibc atm.  Performance of our new timingsafe_bcmp
is pretty good:

os: FreeBSD
arch: amd64
cpu: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz
            │ memcmp.pre.out │          memcmp.scalar.out          │         memcmp.baseline.out         │
            │     sec/op     │   sec/op     vs base                │   sec/op     vs base                │
TsBcmpShort     101.65µ ± 1%   56.70µ ± 1%  -44.22% (p=0.000 n=20)   36.65µ ± 0%  -63.95% (p=0.000 n=20)
TsBcmpMid       29.106µ ± 0%   8.412µ ± 0%  -71.10% (p=0.000 n=20)   7.028µ ± 0%  -75.85% (p=0.000 n=20)
TsBcmpLong      13.974µ ± 0%   5.096µ ± 0%  -63.53% (p=0.000 n=20)   3.481µ ± 0%  -75.09% (p=0.000 n=20)
geomean          34.58µ        13.44µ       -61.12%                  9.643µ       -72.11%

            │ memcmp.pre.out │           memcmp.scalar.out            │          memcmp.baseline.out           │
            │      B/s       │      B/s       vs base                 │      B/s       vs base                 │
TsBcmpShort     1.145Gi ± 1%    2.053Gi ± 1%   +79.28% (p=0.000 n=20)    3.177Gi ± 0%  +177.36% (p=0.000 n=20)
TsBcmpMid       4.000Gi ± 0%   13.840Gi ± 0%  +246.02% (p=0.000 n=20)   16.565Gi ± 0%  +314.14% (p=0.000 n=20)
TsBcmpLong      8.331Gi ± 0%   22.845Gi ± 0%  +174.23% (p=0.000 n=20)   33.443Gi ± 0%  +301.44% (p=0.000 n=20)
geomean         3.367Gi         8.659Gi       +157.18%                   12.07Gi       +258.60%

Sponsored by:	The FreeBSD Foundation
  • Loading branch information
clausecker committed Aug 31, 2023
1 parent 3305d4c commit 7021098
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 4 deletions.
28 changes: 24 additions & 4 deletions memcmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

#include "benchmark.h"

static int (*mymemcmp)(const void *, const void *, size_t);

struct testparam
shortparam = { .avglen = 16, .buflen = 1 << 17, .charlen = 1, .maxchar = 255, .xseed = { 123, 456, 789 }},
midparam = { .avglen = 64, .buflen = 1 << 17, .charlen = 1, .maxchar = 255, .xseed = { 234, 567, 890 }},
Expand All @@ -46,7 +48,7 @@ domemcmpbench(const char *bufa, const char *bufb, const char **ptrs)
size_t i;

for (i = 0; ptrs[i+1] != NULL; i++)
result += memcmp(ptrs[i], bufb + (ptrs[i] - bufa), (size_t)(ptrs[i+1] - ptrs[i]));
result += mymemcmp(ptrs[i], bufb + (ptrs[i] - bufa), (size_t)(ptrs[i+1] - ptrs[i]));
}

static void
Expand Down Expand Up @@ -85,7 +87,25 @@ main(void)
{
preamble();

runbenchmark("short", memcmpbench, (void *)&shortparam);
runbenchmark("mid", memcmpbench, (void *)&midparam);
runbenchmark("long", memcmpbench, (void *)&longparam);
mymemcmp = memcmp;
runbenchmark("memcmpShort", memcmpbench, (void *)&shortparam);
runbenchmark("memcmpMid", memcmpbench, (void *)&midparam);
runbenchmark("memcmpLong", memcmpbench, (void *)&longparam);

mymemcmp = bcmp;
runbenchmark("bcmpShort", memcmpbench, (void *)&shortparam);
runbenchmark("bcmpMid", memcmpbench, (void *)&midparam);
runbenchmark("bcmpLong", memcmpbench, (void *)&longparam);

#ifdef __FreeBSD__
mymemcmp = timingsafe_bcmp;
runbenchmark("tsBmpShort", memcmpbench, (void *)&shortparam);
runbenchmark("tsBcmpMid", memcmpbench, (void *)&midparam);
runbenchmark("tsBcmpLong", memcmpbench, (void *)&longparam);

mymemcmp = timingsafe_memcmp;
runbenchmark("tsMempShort", memcmpbench, (void *)&shortparam);
runbenchmark("tsMemcmpMid", memcmpbench, (void *)&midparam);
runbenchmark("tsMemcmpLong", memcmpbench, (void *)&longparam);
#endif
}
64 changes: 64 additions & 0 deletions results/timingsafe_bcmp/memcmp.baseline.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
os: FreeBSD
arch: amd64
cpu: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz

BenchmarkTsBcmpShort 32732 36630.916 ns/op 3412.418 MB/s
BenchmarkTsBcmpMid 169586 7038.4324 ns/op 17759.636 MB/s
BenchmarkTsBcmpLong 350365 3479.1353 ns/op 35928.467 MB/s
BenchmarkTsBcmpShort 32566 36870.454 ns/op 3390.2485 MB/s
BenchmarkTsBcmpMid 169650 7064.4356 ns/op 17694.266 MB/s
BenchmarkTsBcmpLong 350319 3482.9671 ns/op 35888.94 MB/s
BenchmarkTsBcmpShort 32678 36691.787 ns/op 3406.757 MB/s
BenchmarkTsBcmpMid 169310 7020.7093 ns/op 17804.469 MB/s
BenchmarkTsBcmpLong 350348 3479.7203 ns/op 35922.428 MB/s
BenchmarkTsBcmpShort 32754 36643.034 ns/op 3411.2896 MB/s
BenchmarkTsBcmpMid 169930 7060.0691 ns/op 17705.209 MB/s
BenchmarkTsBcmpLong 350293 3485.901 ns/op 35858.735 MB/s
BenchmarkTsBcmpShort 32815 36420.763 ns/op 3432.1082 MB/s
BenchmarkTsBcmpMid 170262 7040.8528 ns/op 17753.531 MB/s
BenchmarkTsBcmpLong 350313 3481.1819 ns/op 35907.345 MB/s
BenchmarkTsBcmpShort 32601 36792.14 ns/op 3397.4648 MB/s
BenchmarkTsBcmpMid 170712 7016.4061 ns/op 17815.389 MB/s
BenchmarkTsBcmpLong 350343 3479.891 ns/op 35920.665 MB/s
BenchmarkTsBcmpShort 33154 35776.26 ns/op 3493.937 MB/s
BenchmarkTsBcmpMid 169650 7020.8332 ns/op 17804.155 MB/s
BenchmarkTsBcmpLong 350350 3482.6092 ns/op 35892.629 MB/s
BenchmarkTsBcmpShort 32607 36937.83 ns/op 3384.0645 MB/s
BenchmarkTsBcmpMid 170236 7043.6739 ns/op 17746.421 MB/s
BenchmarkTsBcmpLong 350331 3481.487 ns/op 35904.199 MB/s
BenchmarkTsBcmpShort 32382 36580.585 ns/op 3417.1132 MB/s
BenchmarkTsBcmpMid 170475 7023.4799 ns/op 17797.445 MB/s
BenchmarkTsBcmpLong 350343 3480.2382 ns/op 35917.082 MB/s
BenchmarkTsBcmpShort 32676 36663.085 ns/op 3409.4239 MB/s
BenchmarkTsBcmpMid 170107 7012.4264 ns/op 17825.499 MB/s
BenchmarkTsBcmpLong 350298 3481.1731 ns/op 35907.436 MB/s
BenchmarkTsBcmpShort 32758 36313.727 ns/op 3442.2245 MB/s
BenchmarkTsBcmpMid 170970 7011.2953 ns/op 17828.375 MB/s
BenchmarkTsBcmpLong 350328 3482.2512 ns/op 35896.319 MB/s
BenchmarkTsBcmpShort 32756 36605.003 ns/op 3414.8337 MB/s
BenchmarkTsBcmpMid 169760 7046.8907 ns/op 17738.32 MB/s
BenchmarkTsBcmpLong 350343 3480.3513 ns/op 35915.914 MB/s
BenchmarkTsBcmpShort 33138 36689.491 ns/op 3406.9702 MB/s
BenchmarkTsBcmpMid 170944 7027.3543 ns/op 17787.633 MB/s
BenchmarkTsBcmpLong 350299 3482.1262 ns/op 35897.607 MB/s
BenchmarkTsBcmpShort 32589 36816.21 ns/op 3395.2436 MB/s
BenchmarkTsBcmpMid 169843 7025.6774 ns/op 17791.879 MB/s
BenchmarkTsBcmpLong 350322 3480.8216 ns/op 35911.062 MB/s
BenchmarkTsBcmpShort 32682 36647.685 ns/op 3410.8567 MB/s
BenchmarkTsBcmpMid 170584 7020.0064 ns/op 17806.252 MB/s
BenchmarkTsBcmpLong 350313 3482.5089 ns/op 35893.663 MB/s
BenchmarkTsBcmpShort 32810 36781.499 ns/op 3398.4477 MB/s
BenchmarkTsBcmpMid 169706 7074.7391 ns/op 17668.496 MB/s
BenchmarkTsBcmpLong 350338 3479.9255 ns/op 35920.309 MB/s
BenchmarkTsBcmpShort 32709 36151.071 ns/op 3457.7122 MB/s
BenchmarkTsBcmpMid 171123 7017.5127 ns/op 17812.579 MB/s
BenchmarkTsBcmpLong 350353 3480.1053 ns/op 35918.453 MB/s
BenchmarkTsBcmpShort 32661 36273.704 ns/op 3446.0225 MB/s
BenchmarkTsBcmpMid 170546 7036.1463 ns/op 17765.407 MB/s
BenchmarkTsBcmpLong 350325 3478.9341 ns/op 35930.546 MB/s
BenchmarkTsBcmpShort 32725 36749.235 ns/op 3401.4313 MB/s
BenchmarkTsBcmpMid 170276 7038.5716 ns/op 17759.285 MB/s
BenchmarkTsBcmpLong 350348 3479.4164 ns/op 35925.565 MB/s
BenchmarkTsBcmpShort 32538 36648.552 ns/op 3410.776 MB/s
BenchmarkTsBcmpMid 170547 7028.4686 ns/op 17784.813 MB/s
BenchmarkTsBcmpLong 350354 3481.1261 ns/op 35907.92 MB/s
64 changes: 64 additions & 0 deletions results/timingsafe_bcmp/memcmp.pre.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
os: FreeBSD
arch: amd64
cpu: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz

BenchmarkTsBcmpShort 10000 100184.58 ns/op 1247.697 MB/s
BenchmarkTsBcmpMid 41445 28979.188 ns/op 4313.4405 MB/s
BenchmarkTsBcmpLong 85878 13972.874 ns/op 8945.9049 MB/s
BenchmarkTsBcmpShort 10000 102265.63 ns/op 1222.3071 MB/s
BenchmarkTsBcmpMid 41438 28932.945 ns/op 4320.3345 MB/s
BenchmarkTsBcmpLong 85878 13973.508 ns/op 8945.499 MB/s
BenchmarkTsBcmpShort 10000 102309.46 ns/op 1221.7834 MB/s
BenchmarkTsBcmpMid 41235 29102.422 ns/op 4295.1751 MB/s
BenchmarkTsBcmpLong 85857 13975.926 ns/op 8943.9514 MB/s
BenchmarkTsBcmpShort 10000 100765.89 ns/op 1240.4991 MB/s
BenchmarkTsBcmpMid 41554 29382.277 ns/op 4254.2652 MB/s
BenchmarkTsBcmpLong 75663 13973.756 ns/op 8945.3398 MB/s
BenchmarkTsBcmpShort 10000 101504.81 ns/op 1231.4688 MB/s
BenchmarkTsBcmpMid 41167 29087.904 ns/op 4297.319 MB/s
BenchmarkTsBcmpLong 85711 13986.118 ns/op 8937.4337 MB/s
BenchmarkTsBcmpShort 10000 102193.87 ns/op 1223.1653 MB/s
BenchmarkTsBcmpMid 41518 28919.486 ns/op 4322.3452 MB/s
BenchmarkTsBcmpLong 85878 13975.26 ns/op 8944.3775 MB/s
BenchmarkTsBcmpShort 10000 100798 ns/op 1240.104 MB/s
BenchmarkTsBcmpMid 41509 28923.545 ns/op 4321.7386 MB/s
BenchmarkTsBcmpLong 85873 13973.611 ns/op 8945.4329 MB/s
BenchmarkTsBcmpShort 10000 104391.55 ns/op 1197.4149 MB/s
BenchmarkTsBcmpMid 41324 29025.4 ns/op 4306.5728 MB/s
BenchmarkTsBcmpLong 85856 13976.45 ns/op 8943.616 MB/s
BenchmarkTsBcmpShort 10000 101229.65 ns/op 1234.816 MB/s
BenchmarkTsBcmpMid 41264 29108.822 ns/op 4294.2309 MB/s
BenchmarkTsBcmpLong 85873 13974.153 ns/op 8945.0861 MB/s
BenchmarkTsBcmpShort 12009 101593.74 ns/op 1230.3908 MB/s
BenchmarkTsBcmpMid 40987 29257.003 ns/op 4272.4813 MB/s
BenchmarkTsBcmpLong 85874 13980.383 ns/op 8941.1 MB/s
BenchmarkTsBcmpShort 10000 103183.36 ns/op 1211.4357 MB/s
BenchmarkTsBcmpMid 40989 29245.378 ns/op 4274.1797 MB/s
BenchmarkTsBcmpLong 85878 13973.145 ns/op 8945.7311 MB/s
BenchmarkTsBcmpShort 10000 101900.29 ns/op 1226.6894 MB/s
BenchmarkTsBcmpMid 41415 28942.276 ns/op 4318.9416 MB/s
BenchmarkTsBcmpLong 85876 13973.804 ns/op 8945.3094 MB/s
BenchmarkTsBcmpShort 10000 100536.68 ns/op 1243.3273 MB/s
BenchmarkTsBcmpMid 41526 28927.787 ns/op 4321.1049 MB/s
BenchmarkTsBcmpLong 85875 13975.296 ns/op 8944.3544 MB/s
BenchmarkTsBcmpShort 10000 101680.48 ns/op 1229.3411 MB/s
BenchmarkTsBcmpMid 41199 29124.351 ns/op 4291.9411 MB/s
BenchmarkTsBcmpLong 85863 13977.347 ns/op 8943.0418 MB/s
BenchmarkTsBcmpShort 10000 102261.22 ns/op 1222.3597 MB/s
BenchmarkTsBcmpMid 41210 29117.091 ns/op 4293.0113 MB/s
BenchmarkTsBcmpLong 85876 13973.589 ns/op 8945.4469 MB/s
BenchmarkTsBcmpShort 10000 101300.28 ns/op 1233.9551 MB/s
BenchmarkTsBcmpMid 41023 29266.848 ns/op 4271.0441 MB/s
BenchmarkTsBcmpLong 85872 13973.611 ns/op 8945.4332 MB/s
BenchmarkTsBcmpShort 10000 107346.98 ns/op 1164.4483 MB/s
BenchmarkTsBcmpMid 41473 28974.594 ns/op 4314.1243 MB/s
BenchmarkTsBcmpLong 85870 13974.054 ns/op 8945.1492 MB/s
BenchmarkTsBcmpShort 10000 103070.18 ns/op 1212.7659 MB/s
BenchmarkTsBcmpMid 39052 29112.762 ns/op 4293.6496 MB/s
BenchmarkTsBcmpLong 85876 13973.13 ns/op 8945.7412 MB/s
BenchmarkTsBcmpShort 12002 100026.77 ns/op 1249.6655 MB/s
BenchmarkTsBcmpMid 41226 29140.839 ns/op 4289.5128 MB/s
BenchmarkTsBcmpLong 85879 13985.66 ns/op 8937.7265 MB/s
BenchmarkTsBcmpShort 10000 101612.3 ns/op 1230.1661 MB/s
BenchmarkTsBcmpMid 41125 29191.589 ns/op 4282.0554 MB/s
BenchmarkTsBcmpLong 85875 13975.203 ns/op 8944.414 MB/s
64 changes: 64 additions & 0 deletions results/timingsafe_bcmp/memcmp.scalar.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
os: FreeBSD
arch: amd64
cpu: 11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz

BenchmarkTsBcmpShort 21051 56495.327 ns/op 2212.5724 MB/s
BenchmarkTsBcmpMid 142452 8381.442 ns/op 14913.902 MB/s
BenchmarkTsBcmpLong 243946 5094.9423 ns/op 24534.135 MB/s
BenchmarkTsBcmpShort 19508 57123.824 ns/op 2188.2289 MB/s
BenchmarkTsBcmpMid 141777 8438.3019 ns/op 14813.407 MB/s
BenchmarkTsBcmpLong 243816 4904.5469 ns/op 25486.554 MB/s
BenchmarkTsBcmpShort 20023 56571.071 ns/op 2209.6099 MB/s
BenchmarkTsBcmpMid 142854 8431.5817 ns/op 14825.214 MB/s
BenchmarkTsBcmpLong 243723 5096.9485 ns/op 24524.478 MB/s
BenchmarkTsBcmpShort 21000 56546.139 ns/op 2210.5842 MB/s
BenchmarkTsBcmpMid 139920 8444.054 ns/op 14803.316 MB/s
BenchmarkTsBcmpLong 244089 5095.1861 ns/op 24532.96 MB/s
BenchmarkTsBcmpShort 20900 56774.346 ns/op 2201.6986 MB/s
BenchmarkTsBcmpMid 142054 8422.2322 ns/op 14841.671 MB/s
BenchmarkTsBcmpLong 244119 5095.7675 ns/op 24530.162 MB/s
BenchmarkTsBcmpShort 20988 57151.026 ns/op 2187.1873 MB/s
BenchmarkTsBcmpMid 142416 8428.6281 ns/op 14830.409 MB/s
BenchmarkTsBcmpLong 244026 5095.965 ns/op 24529.211 MB/s
BenchmarkTsBcmpShort 20620 56473.794 ns/op 2213.416 MB/s
BenchmarkTsBcmpMid 142244 8407.6034 ns/op 14867.495 MB/s
BenchmarkTsBcmpLong 243962 5095.0253 ns/op 24533.735 MB/s
BenchmarkTsBcmpShort 20133 55984.653 ns/op 2232.7548 MB/s
BenchmarkTsBcmpMid 142525 8452.7019 ns/op 14788.171 MB/s
BenchmarkTsBcmpLong 243948 5096.0903 ns/op 24528.608 MB/s
BenchmarkTsBcmpShort 20434 56896.251 ns/op 2196.9813 MB/s
BenchmarkTsBcmpMid 142789 8407.0299 ns/op 14868.509 MB/s
BenchmarkTsBcmpLong 244034 5095.4771 ns/op 24531.56 MB/s
BenchmarkTsBcmpShort 19903 56878.232 ns/op 2197.6773 MB/s
BenchmarkTsBcmpMid 142329 8407.9248 ns/op 14866.927 MB/s
BenchmarkTsBcmpLong 243976 5097.0043 ns/op 24524.209 MB/s
BenchmarkTsBcmpShort 21321 56548.614 ns/op 2210.4874 MB/s
BenchmarkTsBcmpMid 142527 8396.9822 ns/op 14886.3 MB/s
BenchmarkTsBcmpLong 244066 5095.4807 ns/op 24531.542 MB/s
BenchmarkTsBcmpShort 20876 57834.697 ns/op 2161.3323 MB/s
BenchmarkTsBcmpMid 142772 8415.2882 ns/op 14853.918 MB/s
BenchmarkTsBcmpLong 243807 5095.8233 ns/op 24529.893 MB/s
BenchmarkTsBcmpShort 20630 56622.685 ns/op 2207.5958 MB/s
BenchmarkTsBcmpMid 142833 8381.5965 ns/op 14913.626 MB/s
BenchmarkTsBcmpLong 244003 5096.0923 ns/op 24528.598 MB/s
BenchmarkTsBcmpShort 20385 56506.168 ns/op 2212.1479 MB/s
BenchmarkTsBcmpMid 142594 8393.2358 ns/op 14892.945 MB/s
BenchmarkTsBcmpLong 243760 5096.728 ns/op 24525.539 MB/s
BenchmarkTsBcmpShort 20556 57020.821 ns/op 2192.1817 MB/s
BenchmarkTsBcmpMid 140608 8598.5741 ns/op 14537.294 MB/s
BenchmarkTsBcmpLong 243945 5412.8382 ns/op 23093.245 MB/s
BenchmarkTsBcmpShort 21048 56970.203 ns/op 2194.1294 MB/s
BenchmarkTsBcmpMid 142428 8378.5911 ns/op 14918.976 MB/s
BenchmarkTsBcmpLong 243999 5097.0629 ns/op 24523.927 MB/s
BenchmarkTsBcmpShort 20989 57748.053 ns/op 2164.5751 MB/s
BenchmarkTsBcmpMid 142376 8423.6149 ns/op 14839.235 MB/s
BenchmarkTsBcmpLong 243950 5095.8595 ns/op 24529.718 MB/s
BenchmarkTsBcmpShort 21000 57134.883 ns/op 2187.8053 MB/s
BenchmarkTsBcmpMid 142357 8432.1696 ns/op 14824.18 MB/s
BenchmarkTsBcmpLong 244180 5095.2453 ns/op 24532.676 MB/s
BenchmarkTsBcmpShort 21036 56074.466 ns/op 2229.1786 MB/s
BenchmarkTsBcmpMid 142600 8377.6514 ns/op 14920.649 MB/s
BenchmarkTsBcmpLong 243880 5093.8888 ns/op 24539.209 MB/s
BenchmarkTsBcmpShort 20842 56358.98 ns/op 2217.9252 MB/s
BenchmarkTsBcmpMid 142772 8394.0042 ns/op 14891.582 MB/s
BenchmarkTsBcmpLong 244359 5095.146 ns/op 24533.154 MB/s

0 comments on commit 7021098

Please sign in to comment.