Skip to content

Commit

Permalink
Distributed Fast Fourier Transforms (#1218)
Browse files Browse the repository at this point in the history
* implement fftn, first draft

* implement general , add

* split fft_op and fftn_op, implement inverse and real fft

* add TODO hermitian fft

* add fft tests first draft

* implement tests first draft

* update fft/__init__.py

* expand tests fft

* expand tests ffftn

* expand tests and fix errors

* add Hermitian FFTs

* heat/fft/tests/test_fft.py

* raise IndexError, not ValueError, when axes don't match dimensions

* expand tests

* edit error message for better understanding

* replace == with allclose for 2D FFTs

* fix error

* remove redundant communication

* remove redundant tests

* fix bug in axes handling

* test hermitian FFT

* cast numpy fft2 to complex64

* expand tests

* edit error messages

* remove unnecessary axis check

* test inverse ffts as well

* skip comm-intensive tests on gpu

* introduce helper functions for real fft operations

* fix output shape wrt Nyquist frequency

* double precision tests

* debugging: introduce synchronization

* debugging

* debugging

* debugging

* specify default even size of last fft dim for inverse real ops

* add tests

* fix output shape calc when input is real

* expand tests

* expand tests

* expand tests

* add ihfft2, ihfft

* expand tests

* implement fftfreq, fftshift operations and tests

* fix local output dtype mismatch when local input tensor is empty

* remove print statements

* expand tests

* simplify dealing with multi-axis real FFT

* cannot be a list

* update documentation

* edit docs

---------

Co-authored-by: Fabian Hoppe <112093564+mrfh92@users.noreply.github.com>
  • Loading branch information
ClaudiaComito and mrfh92 committed Nov 27, 2023
1 parent 3302a44 commit 8b87890
Show file tree
Hide file tree
Showing 7 changed files with 1,410 additions and 7 deletions.
1 change: 1 addition & 0 deletions heat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from . import core
from . import classification
from . import cluster
from . import fft
from . import graph
from . import naive_bayes
from . import nn
Expand Down
11 changes: 8 additions & 3 deletions heat/core/stride_tricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ def sanitize_axis(
"""
# scalars are handled like unsplit matrices
if len(shape) == 0:
original_axis = axis
ndim = len(shape)

if ndim == 0:
axis = None

if axis is not None and not isinstance(axis, int) and not isinstance(axis, tuple):
Expand All @@ -160,7 +163,9 @@ def sanitize_axis(
axis = tuple(dim + len(shape) if dim < 0 else dim for dim in axis)
for dim in axis:
if dim < 0 or dim >= len(shape):
raise ValueError(f"axis {axis} is out of bounds for shape {shape}")
raise ValueError(
f"axis {original_axis} is out of bounds for {ndim}-dimensional array"
)
return axis

if axis is None or 0 <= axis < len(shape):
Expand All @@ -169,7 +174,7 @@ def sanitize_axis(
axis += len(shape)

if axis < 0 or axis >= len(shape):
raise ValueError(f"axis {axis} is out of bounds for shape {shape}")
raise ValueError(f"axis {original_axis} is out of bounds for {ndim}-dimensional array")

return axis

Expand Down
8 changes: 4 additions & 4 deletions heat/core/tests/test_suites/basic_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,11 @@ def assert_array_equal(self, heat_array, expected_array):
f"Local shapes do not match. Got {heat_array.lshape} expected {expected_array[slices].shape}",
)
# compare local tensors to corresponding slice of expected_array
is_allclose = np.allclose(heat_array.larray.cpu(), expected_array[slices])
ht_is_allclose = ht.array(
[is_allclose], dtype=ht.bool, is_split=0, device=heat_array.device
is_allclose = torch.tensor(
np.allclose(heat_array.larray.cpu(), expected_array[slices]), dtype=torch.int32
)
self.assertTrue(ht.all(ht_is_allclose))
heat_array.comm.Allreduce(MPI.IN_PLACE, is_allclose, MPI.SUM)
self.assertTrue(is_allclose == heat_array.comm.size)

def assert_func_equal(
self,
Expand Down
5 changes: 5 additions & 0 deletions heat/fft/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
import the fft functions into the fft namespace
"""

from .fft import *
Loading

1 comment on commit 8b87890

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: 8b87890 Previous: 3302a44 Ratio
matmul_split_0_N1_GPU - RUNTIME 0.007017500698566437 s (0.014469628222286701) 0.002603390021249652 s (0.006333751603960991) 2.70
matmul_split_0_N1_GPU - CPU_UTIL 86.41626709372461 % (7.497791345216131) 26.097684747403566 % (3.261074024611978) 3.31
matmul_split_1_N1_GPU - CPU_UTIL 86.16538367201522 % (7.539155916320975) 25.856649768274337 % (3.4118364558357817) 3.33
qr_split_0_N1_GPU - CPU_UTIL 84.38511427607698 % (8.196118409692257) 24.821012981107344 % (4.18130343902579) 3.40
qr_split_1_N1_GPU - CPU_UTIL 81.42570468391419 % (10.28865153902457) 22.965160953705908 % (6.261581862048063) 3.55
lanczos_N1_GPU - CPU_UTIL 79.19429186332415 % (12.407264714250434) 20.336936804483464 % (9.524618075954992) 3.89
hierachical_svd_rank_N1_GPU - CPU_UTIL 78.73179690267051 % (12.925253374186736) 19.19226304383277 % (11.024591515344797) 4.10
hierachical_svd_tol_N1_GPU - CPU_UTIL 78.71230914361438 % (12.945706636861019) 18.981862685590478 % (11.33747446609521) 4.15
kmeans_N1_GPU - CPU_UTIL 79.370602533506 % (12.682345772899973) 18.856300757391526 % (11.715180439146348) 4.21
kmedians_N1_GPU - CPU_UTIL 78.61231208641762 % (10.637750159345384) 18.85433358177458 % (11.852101519451143) 4.17
kmedoids_N1_GPU - CPU_UTIL 75.89407987113074 % (15.46850512535414) 20.783858639013737 % (11.972159856220008) 3.65
reshape_N1_GPU - CPU_UTIL 76.6130541690728 % (20.724061786524725) 22.155914415212216 % (13.53664413597613) 3.46
concatenate_N1_GPU - CPU_UTIL 76.6136691290199 % (20.723875493794324) 22.155092273835585 % (13.53602296426989) 3.46
apply_inplace_standard_scaler_and_inverse_N1_GPU - CPU_UTIL 76.6160218495481 % (20.723072036592235) 22.14764316500045 % (13.530839525169128) 3.46
apply_inplace_min_max_scaler_and_inverse_N1_GPU - CPU_UTIL 76.61732357720598 % (20.722701114144403) 22.144700916487018 % (13.528898709519552) 3.46
apply_inplace_max_abs_scaler_and_inverse_N1_GPU - CPU_UTIL 76.61795914955168 % (20.72255046246034) 22.143611868533533 % (13.52801327438862) 3.46
apply_inplace_robust_scaler_and_inverse_N1_GPU - CPU_UTIL 79.18146261469116 % (11.755070574046954) 20.080137089728744 % (11.171567823872769) 3.94
apply_inplace_normalizer_N1_GPU - CPU_UTIL 73.31179600788793 % (12.895136185108) 19.101003681864807 % (13.064362492410353) 3.84
matmul_split_0_N4_CPU - POWER 39.513828964962876 W (16.193301459079382) 15.890759183255938 W (14.621856580797465) 2.49
matmul_split_0_N4_CPU - CPU_UTIL 100 % 32.754474825272816 % (1.5797409019648219) 3.05
matmul_split_0_N4_CPU - GPU_UTIL 5.75958251953125 % (2.03591032207673) 1.55914306640625 % (0.43572262352865454) 3.69
matmul_split_1_N4_CPU - POWER 39.43630102183744 W (16.122886742094575) 15.818158042476867 W (14.68703414527611) 2.49
matmul_split_1_N4_CPU - CPU_UTIL 100 % 33.74072262170039 % (2.8063102646068585) 2.96
matmul_split_1_N4_CPU - GPU_UTIL 5.75958251953125 % (2.03591032207673) 1.55914306640625 % (0.43572262352865454) 3.69
qr_split_0_N4_CPU - POWER 88.22502684777655 W (19.31471811875712) 41.73167924021085 W (15.78867461095859) 2.11
qr_split_0_N4_CPU - CPU_UTIL 99.99956074969816 % (0.0013177509055054772) 33.514057312688955 % (2.8675977414109264) 2.98
qr_split_0_N4_CPU - GPU_UTIL 5.76263427734375 % (2.038341120567663) 1.56427001953125 % (0.43338805122187496) 3.68
qr_split_1_N4_CPU - CPU_UTIL 99.9962274193314 % (0.007608835268156654) 33.58541331464233 % (3.089009137196712) 2.98
qr_split_1_N4_CPU - GPU_UTIL 5.76690673828125 % (2.041808344018959) 1.56768798828125 % (0.43212890625) 3.68
lanczos_N4_CPU - CPU_UTIL 100 % 33.19942603700165 % (2.9175233553977424) 3.01
lanczos_N4_CPU - GPU_UTIL 5.76690673828125 % (2.041808344018959) 1.53167724609375 % (0.45692509453523056) 3.77
hierachical_svd_rank_N4_CPU - CPU_UTIL 99.99840091269266 % (0.004797261922004736) 33.816329673768266 % (2.7741056130286377) 2.96
hierachical_svd_rank_N4_CPU - GPU_UTIL 5.76690673828125 % (2.041808344018959) 1.49566650390625 % (0.504150390625) 3.86
hierachical_svd_tol_N4_CPU - CPU_UTIL 99.99592862427932 % (0.012214127162044975) 33.92388945427287 % (2.8273263271445535) 2.95
hierachical_svd_tol_N4_CPU - GPU_UTIL 5.76690673828125 % (2.041808344018959) 1.49566650390625 % (0.504150390625) 3.86
kmeans_N4_CPU - CPU_UTIL 99.99118318677127 % (0.026450439686192338) 34.36424967195981 % (3.1386464270968752) 2.91
kmeans_N4_CPU - GPU_UTIL 5.76690673828125 % (2.041808344018959) 1.49566650390625 % (0.504150390625) 3.86
kmedians_N4_CPU - CPU_UTIL 99.99384310152394 % (0.018470695428183602) 34.46150643582016 % (3.1590363240501502) 2.90
kmedians_N4_CPU - GPU_UTIL 5.775529909133911 % (2.0437859679616497) 1.49566650390625 % (0.504150390625) 3.86
kmedoids_N4_CPU - CPU_UTIL 99.98941197987158 % (0.021837611463384168) 34.310239631573765 % (3.63249741201671) 2.91
kmedoids_N4_CPU - GPU_UTIL 5.82692277431488 % (2.0622742096778874) 1.49566650390625 % (0.504150390625) 3.90
reshape_N4_CPU - POWER 37.04503328719161 W (19.002548275318457) 15.892926479802984 W (14.79213118139459) 2.33
reshape_N4_CPU - CPU_UTIL 99.99037999688551 % (0.023033400112747016) 34.38994630539768 % (4.2097493494946425) 2.91
reshape_N4_CPU - GPU_UTIL 5.875543653964996 % (2.089119269594205) 1.49566650390625 % (0.504150390625) 3.93
concatenate_N4_CPU - POWER 38.5991844686702 W (17.550115998233967) 16.76007632006317 W (14.622779430743012) 2.30
concatenate_N4_CPU - CPU_UTIL 99.99857019476626 % (0.004289415701209976) 34.323328842013474 % (3.9248438646835506) 2.91
concatenate_N4_CPU - GPU_UTIL 5.890258967876434 % (2.0808057962735074) 1.49566650390625 % (0.504150390625) 3.94
apply_inplace_standard_scaler_and_inverse_N4_CPU - POWER 39.835286763427625 W (16.890926921839434) 15.884963691623705 W (14.864059240733933) 2.51
apply_inplace_standard_scaler_and_inverse_N4_CPU - CPU_UTIL 100 % 34.12162601436858 % (3.38804990192346) 2.93
apply_inplace_standard_scaler_and_inverse_N4_CPU - GPU_UTIL 5.900794985890388 % (2.0549340188311254) 1.49566650390625 % (0.504150390625) 3.95
apply_inplace_min_max_scaler_and_inverse_N4_CPU - POWER 39.906779391008165 W (16.877574898126948) 15.885234926922427 W (14.866067593030678) 2.51
apply_inplace_min_max_scaler_and_inverse_N4_CPU - CPU_UTIL 100 % 34.10854527270701 % (3.368942565431922) 2.93
apply_inplace_min_max_scaler_and_inverse_N4_CPU - GPU_UTIL 5.91492173075676 % (2.020501115254683) 1.49566650390625 % (0.504150390625) 3.95
apply_inplace_max_abs_scaler_and_inverse_N4_CPU - POWER 39.93025960427133 W (16.874393395959068) 15.885322956536248 W (14.866455222843564) 2.51
apply_inplace_max_abs_scaler_and_inverse_N4_CPU - CPU_UTIL 100 % 34.10585187350874 % (3.3652601759040772) 2.93
apply_inplace_max_abs_scaler_and_inverse_N4_CPU - GPU_UTIL 5.921336236596107 % (2.004966744437327) 1.49566650390625 % (0.504150390625) 3.96
apply_inplace_robust_scaler_and_inverse_N4_CPU - POWER 108.69777172368879 W (14.244552205435237) 53.19984799841639 W (11.716065728087878) 2.04
apply_inplace_robust_scaler_and_inverse_N4_CPU - CPU_UTIL 99.99503037886186 % (0.011194081945285896) 34.520206973119585 % (3.823210925723654) 2.90
apply_inplace_robust_scaler_and_inverse_N4_CPU - GPU_UTIL 6.270264613628387 % (1.4065196396605266) 1.533538669347763 % (0.36941817762108087) 4.09
apply_inplace_normalizer_N4_CPU - POWER 45.12751449218392 W (12.77645816035297) 16.099381209378326 W (14.53557029009554) 2.80
apply_inplace_normalizer_N4_CPU - CPU_UTIL 100 % 35.01080587137886 % (4.182493638896648) 2.86
apply_inplace_normalizer_N4_CPU - GPU_UTIL 6.07330322265625 % (2.0292216958006732) 1.5590690135955811 % (0.43562329387276183) 3.90

This comment was automatically generated by workflow using github-action-benchmark.

CC: @web-flow

Please sign in to comment.