diff --git a/README.md b/README.md index fee5f45..8d717fb 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # [QD](https://github.com/eschnett/QD.jl) -A Julia library wrapping the [QD +A Julia library wrapping David H. Bailey's [QD package](http://crd-legacy.lbl.gov/~dhbailey/mpdist/), providing high precision `Float128` and `Float256` arithmetic. @@ -10,3 +10,73 @@ precision `Float128` and `Float256` arithmetic. (Note: The test coverage is actually much higher. Julia's coverage calculator misses many source code lines that are actually executed.) + +## Example + +This package provides two new types `Float128` and `Float256` that +have a much higher precision than `Float64` -- they have about 30 and +60 digits of precision, respectively. + +```Julia +julia> big(1/3) +3.33333333333333314829616256247390992939472198486328125e-01 + +julia> big(one(Float128)/3) +3.333333333333333333333333333333323061706963268075450368117639547054301130124543e-01 + +julia> big(one(Float256)/3) +3.333333333333333333333333333333333333333333333333333333333333333301681440847467e-01 + +julia> big(Float64(π)) +3.141592653589793115997963468544185161590576171875 + +julia> big(Float128(π)) +3.141592653589793238462643383279505878966979117714660462569212467758006379625613 + +julia> big(Float256(π)) +3.141592653589793238462643383279502884197169399375105820974944592302144174306569 + +julia> big(π) +3.141592653589793238462643383279502884197169399375105820974944592307816406286198 +``` + +There is not much else to say. All basic arithmetic (`+` `-` `*` `/` +`^`) and most elementary function (`sqrt`, `sin`, `cos`, ...) are +supported. + +The main advantage of `Float128` and `Float256` over `BigFloat` is +their speed and their compact representation which doesn't require +memory allocations: + +```Julia +julia> using BenchmarkTools +julia> using LinearAlgebra +julia> n=100; + +julia> A64 = rand(Float64, n, n); +julia> B64 = @btime inv(A64); + 332.547 μs (6 allocations: 129.27 KiB) +julia> norm(B64 * A64 - I) +2.002496069046987e-12 + +julia> A128 = rand(Float128, n, n); +julia> B128 = @btime inv(A128); + 30.359 ms (15 allocations: 474.42 KiB) +julia> norm(B128 * A128 - I) +3.96457501799424717532464111284210e-29 + +julia> A256 = rand(Float256, n, n); +julia> B256 = @btime inv(A256); + 141.966 ms (15 allocations: 946.14 KiB) +julia> norm(B256 * A256 - I) +9.5568080248899741307802777589483931559896810980339187916348988423e-62 + +julia> Abig = rand(BigFloat, n, n); +julia> Bbig = @btime inv(Abig); + 272.190 ms (5334037 allocations: 285.10 MiB) +julia> norm(Bbig * Abig - I) +1.582347286040459990308714227387729179609262775249068116182950508091290236546631e-73 +``` + +(Times measured on a 2.8 GHz Intel Core i7.) + diff --git a/src/QD128.jl b/src/QD128.jl index f16613b..1b0c1a8 100644 --- a/src/QD128.jl +++ b/src/QD128.jl @@ -27,7 +27,7 @@ function Float128(x::Float64) ccall((:c_dd_copy_d, libqd), Cvoid, (Cdouble, Ref{D2}), x, r) Float128(r[]) end -function Float128(x::Union{Int8, Int16, Int32, UInt8, UInt16, UInt32, +function Float128(x::Union{Bool, Int8, Int16, Int32, UInt8, UInt16, UInt32, Float16, Float32}) Float128(Float64(x)) end @@ -36,6 +36,9 @@ function Float128(x::Union{Int64, UInt64}) lo = x - hi Float128(Float64(hi)) + Float128(Float64(lo)) end +function Float128(x::Rational) + Float128(numerator(x)) / Float128(denominator(x)) +end function Float128(x::BigFloat) r1 = Float128(Float64(x)) x -= BigFloat(r1) @@ -396,6 +399,14 @@ function Base.rand(::Type{Float128}) ccall((:c_dd_rand, libqd), Cvoid, (Ref{D2},), r) Float128(r[]) end +function Base.rand(::Type{Float128}, dims::Dims) + r = Array{Float128}(undef, dims) + for i in LinearIndices(r) + r[i] = rand(Float128) + end + r +end +Base.rand(::Type{Float128}, dims::Integer...) = rand(Float128, Dims(dims)) diff --git a/src/QD256.jl b/src/QD256.jl index d0f86e0..6042866 100644 --- a/src/QD256.jl +++ b/src/QD256.jl @@ -32,7 +32,7 @@ function Float256(x::Float128) ccall((:c_qd_copy_dd, libqd), Cvoid, (Ref{D2}, Ref{D4}), x.d2, r) Float256(r[]) end -function Float256(x::Union{Int8, Int16, Int32, UInt8, UInt16, UInt32, +function Float256(x::Union{Bool, Int8, Int16, Int32, UInt8, UInt16, UInt32, Float16, Float32}) Float256(Float64(x)) end @@ -41,6 +41,9 @@ function Float256(x::Union{Int64, UInt64}) lo = x - hi Float256(Float64(hi)) + Float256(Float64(lo)) end +function Float256(x::Rational) + Float256(numerator(x)) / Float256(denominator(x)) +end function Float256(x::BigFloat) r1 = Float256(Float64(x)) x -= BigFloat(r1) @@ -461,6 +464,14 @@ function Base.rand(::Type{Float256}) ccall((:c_qd_rand, libqd), Cvoid, (Ref{D4},), r) Float256(r[]) end +function Base.rand(::Type{Float256}, dims::Dims) + r = Array{Float256}(undef, dims) + for i in LinearIndices(r) + r[i] = rand(Float256) + end + r +end +Base.rand(::Type{Float256}, dims::Integer...) = rand(Float256, Dims(dims))