In [1]:
using FFTW, Test
using CuArrays

In [None]:
x = rand(10)
FFTW.r2r!(x, FFTW.REDFT10)

# x = rand(8, 8, 4)
# y = FFTW.r2r!(x, FFTW.REDFT10, 3)

In [None]:
t = collect(1:16)
cat(t[1:2:N], t[N:-2:2]; dims=1)

In [None]:
function dct_1d_gpu!(f)
    N = size(f)[1]
    f .= cat(f[1:2:N], f[N:-2:2]; dims=1)
    FFTW.fft!(f)
    f .*= 2 * exp.(collect(-1im*π*(0:N-1) / (2*N)))
    nothing
end

In [None]:
N = 16
x = Complex.(rand(Float64, N))
y = copy(x)

dct_1d_gpu!(x)

FFTW.r2r!(y, FFTW.REDFT10);

@test real.(x) ≈ real.(y)

In [2]:
Nx, Ny, Nz = 4, 4, 4
t = reshape(collect(1:Nx*Ny*Nz), Nx, Ny, Nz)
cat(t[:, :, 1:2:Nz], t[:, :, Nz:-2:2]; dims=3)

4×4×4 Array{Int64,3}:
[:, :, 1] =
 1  5   9  13
 2  6  10  14
 3  7  11  15
 4  8  12  16

[:, :, 2] =
 33  37  41  45
 34  38  42  46
 35  39  43  47
 36  40  44  48

[:, :, 3] =
 49  53  57  61
 50  54  58  62
 51  55  59  63
 52  56  60  64

[:, :, 4] =
 17  21  25  29
 18  22  26  30
 19  23  27  31
 20  24  28  32

In [None]:
factors = 2 * exp.(collect(-1im*π*(0:Nz-1) / (2*Nz)))
repeat(reshape(factors, 1, 1, Nz), Nx, Ny, 1)

In [2]:
function dct_dim3_gpu!(f)
    Nx, Ny, Nz = size(f)
    f .= cat(f[:, :, 1:2:Nz], f[:, :, Nz:-2:2]; dims=3)
    fft!(f, 3)

    factors = 2 * exp.(collect(-1im*π*(0:Nz-1) / (2*Nz)))
    
    # f .*= repeat(reshape(factors, 1, 1, Nz), Nx, Ny, 1)
    f .*= cu(repeat(reshape(factors, 1, 1, Nz), Nx, Ny, 1))
    
    nothing
end

dct_dim3_gpu! (generic function with 1 method)

In [None]:
Nx, Ny, Nz = 4, 4, 4
# x = Complex{Float64}.(reshape(collect(1:Nx*Ny*Nz), Nx, Ny, Nz))
x = Complex.(rand(Float64, Nx, Ny, Nz))
y = copy(x)

dct_dim3_gpu!(x)

FFTW.r2r!(y, FFTW.REDFT10, 3);

@test real.(x) ≈ real.(y)

In [None]:
N = 16
t = collect(1:N)
t = cat(t[1:2:N], t[N:-2:2]; dims=1)
reshape(cat(t[1:Int(N/2)], t[end:-1:Int(N/2)+1]; dims=2)', N)

In [None]:
Nx, Ny, Nz = 4, 4, 4
t = reshape(collect(1:Nx*Ny*Nz), Nx, Ny, Nz)

In [None]:
t = cat(t[:, :, 1:2:Nz], t[:, :, Nz:-2:2]; dims=3)
t = cat(t[:, :, 1:Int(Nz/2)], t[:, :, end:-1:Int(Nz/2)+1]; dims=4)
reshape(permutedims(t, (1, 2, 4, 3)), Nx, Ny, Nz)

In [3]:
function idct_dim3_gpu!(f)
    Nx, Ny, Nz = size(f)
    
    bfactors = 0.5 * exp.(collect(1im*π*(0:Nz-1) / (2*Nz)))
    # f .*= repeat(reshape(bfactors, 1, 1, Nz), Nx, Ny, 1)
    f .*= cu(repeat(reshape(bfactors, 1, 1, Nz), Nx, Ny, 1))
    
    ifft!(f, 3)
    
    # f = cat(f[:, :, 1:Int(Nz/2)], f[:, :, end:-1:Int(Nz/2)+1]; dims=4)
    # f = reshape(permutedims(f, (1, 2, 4, 3)), Nx, Ny, Nz)
    # f .= reshape(permutedims(cat(f[:, :, 1:Int(Nz/2)], f[:, :, end:-1:Int(Nz/2)+1]; dims=4), (1, 2, 4, 3)), Nx, Ny, Nz)
    f .= cu(reshape(permutedims(cat(f[:, :, 1:Int(Nz/2)], f[:, :, end:-1:Int(Nz/2)+1]; dims=4), (1, 2, 4, 3)), Nx, Ny, Nz))
    
    nothing
end

idct_dim3_gpu! (generic function with 1 method)

In [None]:
Nx, Ny, Nz = 16, 8, 4
x = Complex{Float64}.(reshape(collect(1:Nx*Ny*Nz), Nx, Ny, Nz))
y = copy(x)

dct_dim3_gpu!(x)
FFTW.r2r!(y, FFTW.REDFT10, 3);

@show @test real.(x) ≈ real.(y)

idct_dim3_gpu!(x)
FFTW.r2r!(y, FFTW.REDFT01, 3);
@. y /= 2Nz;

@show @test real.(x) ≈ real.(y);

In [None]:
real.(x)

In [None]:
real.(y)

In [None]:
@. real(x) / real(y)

In [None]:
Nx, Ny, Nz = 16, 8, 4
x = Complex{Float64}.(reshape(collect(1:Nx*Ny*Nz), Nx, Ny, Nz))
y = copy(x)

dct_dim3_gpu!(x)
FFTW.r2r!(y, FFTW.REDFT10, 3);
@show @test real.(x) ≈ real.(y)

FFTW.fft!(x, [1, 2])
FFTW.fft!(y, [1, 2])

FFTW.ifft!(x, [1, 2])
FFTW.ifft!(y, [1, 2])

idct_dim3_gpu!(x)
FFTW.r2r!(y, FFTW.REDFT01, 3);
@. y /= 2Nz;

@show @test real.(x) ≈ real.(y);

In [None]:
Nx, Ny, Nz = 16, 8, 4

x = cu(rand(Complex{Float32}, Nx, Ny, Nz))
x .= real.(x)

y = copy(Array(x))

kx = cu(rand(Float32, Nx, Ny, Nz))
ky = Array(kx)

@show @test x == y
@show @test kx == ky

dct_dim3_gpu!(x)
FFTW.r2r!(y, FFTW.REDFT10, 3);

@show @test real.(x) ≈ real.(y)

FFTW.fft!(x, [1, 2])
FFTW.fft!(y, [1, 2])

@. x = -x / kx
@. y = -y / ky

FFTW.ifft!(x, [1, 2])
FFTW.ifft!(y, [1, 2])

@show @test real.(x) ≈ real.(y);

idct_dim3_gpu!(x)
FFTW.r2r!(y, FFTW.REDFT01, 3);
@. y /= 2Nz;

@show @test real.(x) ≈ real.(y);

In [17]:
Nx, Ny, Nz = 16, 8, 4

x = cu(rand(Complex{Float32}, Nx, Ny, Nz))
x .= real.(x)

y = copy(Array(x))

# idct_dim3_gpu!(x)
FFTW.r2r!(y, FFTW.REDFT01, 3);
@. y /= 2Nz;

bfactors = 0.5 * exp.(collect(1im*π*(0:Nz-1) / (2*Nz)))
x .*= cu(repeat(reshape(bfactors, 1, 1, Nz), Nx, Ny, 1))
ifft!(x, 3)
x .= cu(reshape(permutedims(cat(x[:, :, 1:Int(Nz/2)], f[:, :, end:-1:Int(Nz/2)+1]; dims=4), (1, 2, 4, 3)), Nx, Ny, Nz))

In [13]:
y[:, :, 1]

16×8 Array{Complex{Float32},2}:
 0.432557+0.0im  0.552097+0.0im  …  0.324695+0.0im  0.181245+0.0im
 0.341684+0.0im  0.292073+0.0im     0.335509+0.0im  0.379299+0.0im
 0.257901+0.0im  0.303876+0.0im     0.405432+0.0im  0.408953+0.0im
 0.437629+0.0im  0.211984+0.0im     0.279617+0.0im  0.341208+0.0im
   0.4461+0.0im  0.119644+0.0im      0.21366+0.0im  0.170343+0.0im
 0.289897+0.0im  0.339579+0.0im  …  0.239026+0.0im  0.207095+0.0im
 0.264088+0.0im  0.129622+0.0im     0.294096+0.0im  0.507927+0.0im
 0.272716+0.0im  0.198426+0.0im     0.340131+0.0im  0.120046+0.0im
 0.103531+0.0im  0.400599+0.0im     0.377542+0.0im  0.203129+0.0im
 0.245047+0.0im  0.452354+0.0im      0.25429+0.0im  0.385653+0.0im
 0.335605+0.0im  0.238777+0.0im  …  0.279498+0.0im  0.343935+0.0im
 0.280251+0.0im  0.434311+0.0im      0.27354+0.0im  0.148348+0.0im
 0.406978+0.0im  0.351335+0.0im     0.456173+0.0im  0.335022+0.0im
 0.347995+0.0im  0.337335+0.0im      0.27507+0.0im  0.384047+0.0im
 0.229173+0.0im  0.360627+0.0i

In [18]:
x[:, :, 1]

16×8 CuArray{Complex{Float32},2}:
 0.0740363+0.0682404im   0.179421+0.146899im   …   0.180332+0.116598im 
  0.163806+0.126536im    0.283922+0.144633im       0.240142+0.16602im  
  0.172272+0.103766im    0.222357+0.124849im       0.183608+0.0991422im
  0.191064+0.0786579im   0.166621+0.113537im       0.172541+0.143816im 
  0.140553+0.139211im    0.122851+0.0996444im       0.18473+0.114874im 
 0.0847755+0.0293257im  0.0791336+0.0384683im  …   0.156292+0.0953667im
   0.24744+0.153517im    0.262134+0.179852im       0.307522+0.198364im 
  0.301886+0.136331im    0.202356+0.0787567im       0.17534+0.0647393im
  0.203461+0.12276im     0.203163+0.112372im       0.193984+0.130052im 
   0.22002+0.180588im    0.218942+0.165592im       0.245507+0.116678im 
  0.133594+0.0608431im    0.16661+0.0974097im  …   0.204075+0.142037im 
   0.24311+0.143036im    0.150306+0.0359469im      0.283273+0.195291im 
  0.242428+0.113578im    0.121388+0.140933im         0.1888+0.0686948im
  0.169367+0.112091im    0.216