Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Summary: Move the core of add_points_to_volumes to the new C++/CUDA implementation. Add new flag to let the user stop this happening. Avoids copies. About a 30% speedup on the larger cases, up to 50% on the smaller cases. New timings ``` Benchmark Avg Time(μs) Peak Time(μs) Iterations -------------------------------------------------------------------------------- ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[25, 25, 25]_1000 4575 12591 110 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[25, 25, 25]_10000 25468 29186 20 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[25, 25, 25]_100000 202085 209897 3 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[101, 111, 121]_1000 46059 48188 11 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[101, 111, 121]_10000 83759 95669 7 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[101, 111, 121]_100000 326056 339393 2 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[25, 25, 25]_1000 2379 4738 211 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[25, 25, 25]_10000 12100 63099 42 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[25, 25, 25]_100000 63323 63737 8 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[101, 111, 121]_1000 45216 45479 12 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[101, 111, 121]_10000 57205 58524 9 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[101, 111, 121]_100000 139499 139926 4 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[25, 25, 25]_1000 40129 40431 13 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[25, 25, 25]_10000 204949 239293 3 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[25, 25, 25]_100000 1664541 1664541 1 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[101, 111, 121]_1000 391573 395108 2 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[101, 111, 121]_10000 674869 674869 1 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[101, 111, 121]_100000 2713632 2713632 1 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[25, 25, 25]_1000 12726 13506 40 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[25, 25, 25]_10000 73103 73299 7 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[25, 25, 25]_100000 598634 598634 1 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[101, 111, 121]_1000 398742 399256 2 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[101, 111, 121]_10000 543129 543129 1 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[101, 111, 121]_100000 1242956 1242956 1 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[25, 25, 25]_1000 1814 8884 276 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[25, 25, 25]_10000 1996 8851 251 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[25, 25, 25]_100000 4608 11529 109 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[101, 111, 121]_1000 5183 12508 97 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[101, 111, 121]_10000 7106 14077 71 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[101, 111, 121]_100000 25914 31818 20 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[25, 25, 25]_1000 1778 8823 282 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[25, 25, 25]_10000 1825 8613 274 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[25, 25, 25]_100000 3154 10161 159 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[101, 111, 121]_1000 4888 9404 103 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[101, 111, 121]_10000 5194 9963 97 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[101, 111, 121]_100000 8109 14933 62 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[25, 25, 25]_1000 3320 10306 151 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[25, 25, 25]_10000 7003 8595 72 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[25, 25, 25]_100000 49140 52957 11 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[101, 111, 121]_1000 35890 36918 14 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[101, 111, 121]_10000 58890 59337 9 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[101, 111, 121]_100000 286878 287600 2 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[25, 25, 25]_1000 2484 8805 202 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[25, 25, 25]_10000 3967 9090 127 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[25, 25, 25]_100000 19423 19799 26 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[101, 111, 121]_1000 33228 33329 16 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[101, 111, 121]_10000 37292 37370 14 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[101, 111, 121]_100000 73550 74017 7 -------------------------------------------------------------------------------- ``` Previous timings ``` Benchmark Avg Time(μs) Peak Time(μs) Iterations -------------------------------------------------------------------------------- ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[25, 25, 25]_1000 10100 46422 50 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[25, 25, 25]_10000 28442 32100 18 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[25, 25, 25]_100000 241127 254269 3 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[101, 111, 121]_1000 54149 79480 10 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[101, 111, 121]_10000 125459 212734 4 ADD_POINTS_TO_VOLUMES_cpu_10_trilinear_[101, 111, 121]_100000 512739 512739 1 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[25, 25, 25]_1000 2866 13365 175 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[25, 25, 25]_10000 7026 12604 72 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[25, 25, 25]_100000 48822 55607 11 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[101, 111, 121]_1000 38098 38576 14 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[101, 111, 121]_10000 48006 54120 11 ADD_POINTS_TO_VOLUMES_cpu_10_nearest_[101, 111, 121]_100000 131563 138536 4 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[25, 25, 25]_1000 64615 91735 8 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[25, 25, 25]_10000 228815 246095 3 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[25, 25, 25]_100000 3086615 3086615 1 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[101, 111, 121]_1000 464298 465292 2 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[101, 111, 121]_10000 1053440 1053440 1 ADD_POINTS_TO_VOLUMES_cpu_100_trilinear_[101, 111, 121]_100000 6736236 6736236 1 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[25, 25, 25]_1000 11940 12440 42 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[25, 25, 25]_10000 56641 58051 9 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[25, 25, 25]_100000 711492 711492 1 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[101, 111, 121]_1000 326437 329846 2 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[101, 111, 121]_10000 418514 427911 2 ADD_POINTS_TO_VOLUMES_cpu_100_nearest_[101, 111, 121]_100000 1524285 1524285 1 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[25, 25, 25]_1000 5949 13602 85 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[25, 25, 25]_10000 5817 13001 86 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[25, 25, 25]_100000 23833 25971 21 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[101, 111, 121]_1000 9029 16178 56 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[101, 111, 121]_10000 11595 18601 44 ADD_POINTS_TO_VOLUMES_cuda:0_10_trilinear_[101, 111, 121]_100000 46986 47344 11 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[25, 25, 25]_1000 2554 9747 196 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[25, 25, 25]_10000 2676 9537 187 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[25, 25, 25]_100000 6567 14179 77 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[101, 111, 121]_1000 5840 12811 86 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[101, 111, 121]_10000 6102 13128 82 ADD_POINTS_TO_VOLUMES_cuda:0_10_nearest_[101, 111, 121]_100000 11945 11995 42 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[25, 25, 25]_1000 7642 13671 66 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[25, 25, 25]_10000 25190 25260 20 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[25, 25, 25]_100000 212018 212134 3 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[101, 111, 121]_1000 40421 45692 13 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[101, 111, 121]_10000 92078 92132 6 ADD_POINTS_TO_VOLUMES_cuda:0_100_trilinear_[101, 111, 121]_100000 457211 457229 2 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[25, 25, 25]_1000 3574 10377 140 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[25, 25, 25]_10000 7222 13023 70 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[25, 25, 25]_100000 48127 48165 11 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[101, 111, 121]_1000 34732 35295 15 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[101, 111, 121]_10000 43050 51064 12 ADD_POINTS_TO_VOLUMES_cuda:0_100_nearest_[101, 111, 121]_100000 106028 106058 5 -------------------------------------------------------------------------------- ``` Reviewed By: nikhilaravi Differential Revision: D29548609 fbshipit-source-id: 7026e832ea299145c3f6b55687f3c1601294f5c0
- Loading branch information