Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Review all uses of `int` and switch to `size_t` or auto where necessa…

…ry. This fixes some hidden 64-bit bugs like always using the fallback path for matrix solving. Also, add a trivial fix to SVD.
  • Loading branch information...
commit b4a7cde134e470a6b6d8807a93c798db5adfacad 1 parent b7db96f
David Simcha authored
Showing with 1,074 additions and 1,068 deletions.
  1. +495 −495 scid/lapack.d
  2. +112 −106 scid/linalg.d
  3. +467 −467 scid/ops/common.d
View
990 scid/lapack.d
@@ -1,495 +1,495 @@
-module scid.lapack;
-import scid.blas;
-import scid.common.traits, scid.common.meta;
-import std.algorithm, std.math, std.conv;
-import std.ascii, std.exception;
-
-// debug = lapackCalls;
-//version = nodeps;
-
-debug( lapackCalls ) {
- import std.stdio;
- import scid.internal.assertmessages;
-}
-
-
-
-version( nodeps ) {
- private enum forceNaive = true;
-} else {
- private enum forceNaive = false;
- static import scid.bindings.lapack.dlapack;
- alias scid.bindings.lapack.dlapack lapack_;
-
-}
-
-// Save typing
-int toi(size_t x) { return to!int(x); }
-
-struct lapack {
- static void laswp( T )( size_t n, T *a, size_t lda, size_t k1, size_t k2, int *ipiv, size_t incx ) {
- debug( lapackCalls )
- writef( "laswp( %s, %s, %s, %s ) ", matrixToString( 'n', n, n, a, lda ), k1, k2, stridedToString( ipiv, n, incx ) );
-
- naive_.laswp( n, a, lda, k1, k2, ipiv, incx );
-
- debug( lapackCalls ) {
- writeln( "=> ", matrixToString( 'n', n, n, a, lda ) );
- }
-
- }
-
- static void getrs( char trans, T )( size_t n, size_t nrhs, T *a, size_t lda, int *ipiv, T *b, size_t ldb, ref int info ) {
- debug( lapackCalls )
- writef( "getrs( %s, %s, %s ) ", matrixToString( trans, n, n, a, lda ), ipiv[ 0 .. n ], matrixToString( trans, n, nrhs, b, ldb ) );
-
- static if( isFortranType!T && !forceNaive )
- lapack_.getrs( trans, toi(n), toi(nrhs), a, toi(lda), ipiv, b, toi(ldb), info );
- else
- naive_.xgetrs!(trans, 'L')( n, nrhs, a, lda, ipiv, b, ldb, info );
-
- debug( lapackCalls )
- writeln( "=> ", matrixToString( trans, n, nrhs, b, ldb ) );
- }
-
- static void gesv( T )( size_t n, size_t nrhs, T *a, size_t lda, int* ipiv, T *b, size_t ldb, ref int info ) {
- debug( lapackCalls )
- writef( "gesv( %s, %s, %s ) ", matrixToString( trans, n, n, a, lda ), matrixToString( trans, n, nrhs, b, ldb ) );
-
- static if( isFortranType!T && !forceNaive )
- lapack_.gesv( toi(n), toi(nrhs), a, toi(lda), ipiv, b, toi(ldb), info );
- else
- naive_.gesv( n, nrhs, a, lda, ipiv, b, ldb, info );
-
- debug( lapackCalls )
- writeln( "=> ", matrixToString( trans, n, nrhs, b, ldb ) );
- }
-
- static void getrf( T )( size_t m, size_t n, T* a, size_t lda, int* ipiv, ref int info ) {
- debug( lapackCalls )
- writef( "getrf( %s ) ", matrixToString( 'N', m, n, a, lda ) );
-
- static if( isFortranType!T && !forceNaive )
- lapack_.getrf( toi(m), toi(n), a, toi(lda), ipiv, info );
- else
- naive_.getrf( m, n, a, lda, ipiv, info );
-
- debug( lapackCalls )
- writeln( "=> ", matrixToString( 'N', m, n, a, lda ) );
- }
-
- static void trtri( char uplo, char diag, T )( size_t n, T *a, size_t lda, ref int info ) {
- debug( lapackCalls )
- writef( "trtri( %s, %s ) ", uplo, matrixToString( 'N', n, n, a, lda ) );
-
- static if( isFortranType!T && !forceNaive )
- lapack_.trtri( uplo, diag, toi(n), a, toi(lda), info );
- else
- naive_.trtri!( uplo, diag )( n, a, lda, info );
-
- debug( lapackCalls )
- writeln( "=> ", matrixToString( 'N', n, n, a, lda ) );
- }
-
- static void getri( T )( size_t n, T* a, size_t lda, int* ipiv, T* work, size_t lwork, ref int info ) {
- debug( lapackCalls )
- writef( "getri( %s ) ", matrixToString( 'N', n, n, a, lda ) );
-
- static if( isFortranType!T && !forceNaive )
- lapack_.getri( toi(n), a, toi(lda), ipiv, work, toi(lwork), info );
- else
- naive_.getri( n, a, lda, ipiv, work, lwork, info );
-
- debug( lapackCalls )
- writeln( "=> ", matrixToString( 'N', n, n, a, lda ) );
- }
-
- static void potrf( char uplo, T )( size_t n, T* a, size_t lda, ref int info ) {
- debug( lapackCalls )
- writef( "potrf( %s, %s, %s, %s ) ", uplo_, matrixToString( uplo, n, n, a, lda ), lda, info );
-
- static if( isFortranType!T && !forceNaive )
- lapack_.potrf( uplo, toi(n), a, toi(lda), info );
- else
- naive_.potrf!( uplo )( toi(n), a, toi(lda), info );
- }
-
- // Extended LAPACK
- static void xgetrs( char trans, char side, T )( size_t n, size_t nrhs, T *a, size_t lda, int *ipiv, T *b, size_t ldb, ref int info ) {
- debug( lapackCalls )
- writef( "xgetrs( %s, %s, %s, %s ) ", side, matrixToString( trans, n, n, a, lda ), ipiv[ 0 .. n ], matrixToString( trans, n, nrhs, b, ldb ) );
-
- naive_.xgetrs!( trans, side )( n, nrhs, a, lda, ipiv, b, ldb, info );
-
- debug( lapackCalls )
- writeln( "=> ", matrixToString( trans, n, nrhs, b, ldb ) );
- }
-}
-
-private struct naive_ {
- private static void reportNaive_() {
- debug( lapackCalls )
- write( "<n> " );
- }
-
- private static void reportNaiveln_() {
- debug( lapackCalls )
- writeln( "<n> ..." );
- }
-
- static void laswp( T )( size_t n, T *a, size_t lda, size_t k1, size_t k2, int *ipiv, size_t incx ) {
- reportNaiveln_();
-
- // convert FORTRAN indices
- --k1; --k2;
-
- enforce( n >= 0 );
- enforce( incx != 0 );
- enforce( a );
- enforce( ipiv );
-
- if( n == 0 )
- return;
-
- if( incx > 0 ) {
- for( int i = k1; i <= k2 ; ++ i ) {
- int pivot = ipiv[ i ] - 1; // convert FORTRAN index
- if( pivot != i )
- blas.swap( n, a + i, lda, a + pivot, lda );
- }
- } else {
- for( int i = k2; i >= k1 ; -- i ) {
- int pivot = ipiv[ i ] - 1; // convert FORTRAN index
- if( pivot != i )
- blas.swap( n, a + i, lda, a + pivot, lda );
- }
- }
- }
-
- static void xgetrs( char trans_, char side_, T )( size_t n, size_t nrhs, T *a, size_t lda, int *ipiv, T *b, size_t ldb, ref int info ) {
- enum trans = cast(char) toUpper( trans_ );
- enum side = cast(char) toUpper( side_ );
-
- reportNaiveln_();
-
- enforce( n >= 0 );
- enforce( nrhs >= 0 );
- enforce( lda >= max( 1, n ) );
- enforce( ldb >= max( 1, n ) );
-
- if( n == 0 || nrhs == 0 )
- return;
-
- static if( trans == 'N' ) {
- static if( side == 'R' ) {
- blas.trsm!( 'R', 'U', 'N', 'N' )( n, nrhs, One!T, a, lda, b, ldb );
- blas.trsm!( 'R', 'L', 'N', 'U' )( n, nrhs, One!T, a, lda, b, ldb );
- for( int i = n - 1; i >= 0 ; -- i ) {
- int pivot = ipiv[ i ] - 1;
- if( pivot != i )
- blas.swap( n, b + i * ldb, 1, b + pivot * ldb, 1 );
- }
- } else if( side == 'L' ) {
- lapack.laswp( nrhs, b, ldb, 1, n, ipiv, 1 );
- blas.trsm!( 'L', 'L', 'N', 'U' )( n, nrhs, One!T, a, lda, b, ldb );
- blas.trsm!( 'L', 'U', 'N', 'N' )( n, nrhs, One!T, a, lda, b, ldb );
-
- }
- } else {
- static if( side == 'R' ) {
- for( int i = 0; i < n ; ++ i ) {
- int pivot = ipiv[ i ] - 1;
- if( pivot != i )
- blas.swap( n, b + i * ldb, 1, b + pivot * ldb, 1 );
- }
- blas.trsm!( 'R', 'L', trans, 'U' )( n, nrhs, One!T, a, lda, b, ldb );
- blas.trsm!( 'R', 'U', trans, 'N' )( n, nrhs, One!T, a, lda, b, ldb );
- } else static if( side == 'L' ) {
- blas.trsm!( side, 'U', trans, 'N' )( n, nrhs, One!T, a, lda, b, ldb );
- blas.trsm!( side, 'L', trans, 'U' )( n, nrhs, One!T, a, lda, b, ldb );
- lapack.laswp( nrhs, b, ldb, 1, n, ipiv, -1 );
- }
- }
- }
-
- static void gesv( T )( size_t n, size_t nrhs, T *a, size_t lda, int* ipiv, T *b, size_t ldb, ref int info ) {
- reportNaiveln_();
-
- enforce( n >= 0 );
- enforce( nrhs >= 0 );
- enforce( lda >= max( 1, n ) );
- enforce( ldb >= max( 1, n ) );
-
- lapack.getrf( toi(n),toi(n), a, toi(lda), ipiv, info );
- if( info == 0 )
- lapack.getrs!'N'( toi(n), toi(nrhs), a, toi(lda), ipiv, b, toi(ldb), info );
- }
-
- static void getri( T )( size_t n, T* a, size_t lda, int* ipiv, T* work, size_t lwork, ref int info ) {
- reportNaiveln_();
- info = 0;
-
- work[ 0 ] = n * 2;
- if( lwork == -1 ) {
- info = 0;
- return;
- }
-
- if( n == 0 )
- return;
-
- T get( size_t i, size_t j ) {
- return a[ j * lda + i ];
- }
-
- void set( string op = "" )( T x, size_t i, size_t j ) {
- mixin("a[ j * lda + i ] "~op~"= x;");
- }
-
- lapack.trtri!( 'U', 'N' )( n, a, lda, info );
- if( info > 0 )
- return ;
-
- for( int j = toi(n) - 1; j >= 0 ; -- j ) {
- foreach( i ; j + 1 .. n ) {
- work[ i ] = get( i, j );
- set( Zero!T, i, j );
- }
-
-
- if( j < n-1 ) {
- blas.gemv!'N'( n, n - j - 1, MinusOne!T, a + (j+1)*lda,
- lda, work + j + 1, 1, One!T, a + j * lda, 1 );
- }
- }
-
- for( int j = toi(n) - 1 ; j >= 0 ; -- j ) {
- int pivot = ipiv[ j ] - 1; // convert from FORTRAN index
- if( pivot != j )
- blas.swap( n, a + j * lda, 1, a + pivot * lda, 1 );
- }
-
- }
-
- static void trtri( char uplo_, char diag_, T )( size_t n, T *a, size_t lda, ref int info ) {
- reportNaiveln_();
-
- enum char uplo = toUpper(uplo_);
- enum char diag = toUpper(diag_);
-
- T get( size_t i, size_t j ) {
- return a[ j * lda + i ];
- }
-
- void set( string op = "" )( T x, size_t i, size_t j ) {
- mixin("a[ j * lda + i ] "~op~"= x;");
- }
-
- T ajj;
- if( uplo == 'U' ) {
- for( size_t j = 0; j < n; j++ ) {
- if( diag == 'N' ) {
- // assert( get( j, j ) != Zero!T, "fbti: Singular matrix in inverse." );
- if( get( j, j ) == Zero!T ) {
- info = j;
- return;
- }
-
- set( One!T / get(j,j), j, j );
- ajj = -get( j, j );
- } else {
- ajj = MinusOne!T;
- }
-
- blas.trmv!( 'U', 'N', diag )( j, a, lda, a + j*lda, 1 );
- blas.scal( j, ajj, a + j*lda, 1 );
- }
- } else {
- for( size_t j = n - 1 ; j >= 0 ; -- j ) {
- if( diag == 'N' ) {
- // assert( get( j, j ) != Zero!T, "fbti: Singular matrix in inverse." );
- set( One!T / get(j,j), j, j );
- ajj = -get( j, j );
- } else {
- ajj = MinusOne!T;
- }
-
- if( j < n-1 ) {
- blas.trmv!( 'L', 'N', diag )( n - 1 - j, a + (j+1)*lda + j+1, lda, a + j*lda + j+1, 1 );
- blas.scal( n - j, ajj, a + (j+1) * lda + j, 1 );
- }
- }
- }
- }
-
- static void getrf( T )( size_t m, size_t n, T* a, size_t lda, int* pivot, ref int info ) {
- reportNaiveln_();
-
- n = min( m, n );
-
- T get( size_t i, size_t j ) {
- return a[ j * lda + i ];
- }
-
- void set( string op = "" )( T x, size_t i, size_t j ) {
- mixin("a[ j * lda + i ] "~op~"= x;");
- }
-
- for( size_t k = 0; k < n; k++ ) {
- pivot[ k ] = k;
- T maxSoFar = abs( get( k, k ) );
- for( size_t j = k + 1; j < n; j++ ) {
- T cur = abs( get(j, k) );
- if( maxSoFar <= cur ) {
- maxSoFar = cur;
- pivot[ k ] = j;
- }
- }
-
- if( pivot[ k ] != k ) {
- foreach( j ; 0 .. n ) {
- T aux = get(k, j);
- set( get(pivot[k], j), k, j );
- set( aux, pivot[k], j );
- }
- }
-
- if( get(k,k) != Zero!T ) {
-
- foreach( i ; k + 1 .. n )
- set!"/"( get(k,k), i, k );
-
- foreach( i ; k + 1 .. n ) {
- foreach( j ; k + 1 .. n ) {
- set!"-"( get(i,k) * get(k,j), i, j );
- }
- }
- } else if( info == 0 ) {
- info = k + 1;
- }
- ++ pivot[ k ]; // convert to FORTRAN index
- }
- }
-
- static void potrf( char uplo_, T )( size_t n, T* a, size_t lda, ref int info ) {
- reportNaiveln_();import std.stdio;
-
- // Borrowed from Don Clugston's MathExtra library, which he
- // gave me permission to relicense under Boost.
-
- // This algorithm is designed for row major matrices. Depending
- // on whether we're using the lower or upper column, transpose
- // either before or after doing the decomposition to avoid an
- // impedance mismatch.
-
- static if( uplo_ == 'U' ) {
- transposeSquare( a, n, lda );
- }
-
- ref T get( size_t i, size_t j ) nothrow {
- // This is correct because we're treating the matrix as row-
- // major.
- return a[ i * lda + j ];
- }
-
- foreach( i; 0..n ) {
- T sum = get(i, i);
-
- for( sizediff_t k = i - 1; k >= 0; --k ) {
- immutable ik = get( i, k );
- sum -= ik * ik;
- }
-
- auto arr1 = a[ i * lda..i * lda + i ];
-
- if (sum > 0.0) {
- get(i, i) = sqrt( sum );
-
- foreach( j; i + 1..n ) {
- import std.numeric;
- auto arr2 = a[ j * lda..j * lda + i ];
- immutable dot = dotProduct( arr1, arr2 );
-
- sum = get( i, j ) - dot;
- get( j, i ) = sum / get( i, i );
- }
- } else {
- info = toi( i );
- // not positive definite (could be caused by rounding errors)
- get( i, i ) = 0;
- // make this whole row zero so they have no further effect
- foreach( j; i + 1..n ) get( j, i ) = 0;
- }
- }
-
- static if( uplo_ == 'L' ) {
- transposeSquare( a, n, lda );
- }
- }
-
- unittest {
- import scid.matrix;
- import std.stdio;
-
- auto mat = Matrix!double( [
- [14.0, 20, 31],
- [0.0, 62, 79],
- [0.0, 0, 122],
- [-1.0, -1.0, -1.0] // Dummy row to test slicing.
- ] );
-
- auto sliced = mat.view( 0, 3, 0, 3 );
-
- int info;
- potrf!( 'U' )( 3, sliced.data, 4, info );
- assert( info == 0 );
-
- import std.math;
- alias approxEqual ae;
-
- assert( ae( sliced[ 0, 0 ], 3.74166 ) );
- assert( ae( sliced[ 0, 1 ], 5.34522 ) );
- assert( ae( sliced[ 0, 2 ], 8.28510 ) );
- assert( ae( sliced[ 1, 1 ], 5.78174 ) );
- assert( ae( sliced[ 1, 2 ], 6.00412 ) );
- assert( ae( sliced[ 2, 2 ], 4.16025 ) );
- stderr.writeln(sliced.pretty);
- auto mat2 = Matrix!double( [
- [14.0, 0, 0],
- [20.0, 62, 0],
- [31.0, 79, 122],
- [-1.0, -1.0, -1.0] // Dummy row to test slicing.
- ] );
-
- auto sliced2 = mat2.view( 0, 3, 0, 3 );
- potrf!( 'L' )( 3, sliced2.data, 4, info );
- assert( info == 0 );
- foreach( i; 0..3 ) foreach( j; 0..3 ) {
- assert( ae( sliced[ i, j ], sliced2[ j, i ] ) );
- }
- }
-}
-
-// Transposes a square general matrix in physical memory. This is useful for
-// making some matrix factorizations faster by improving memory locality,
-// and often has negligible overhead since most matrix factorizations are
-// O(N^3) and this is O(N^2) where N is the number of rows/columns.
-private void transposeSquare( T )( T* ptr, size_t n, size_t lda ) pure nothrow {
-
- ref T get( size_t i, size_t j ) nothrow {
- return ptr[ j * lda + i ];
- }
-
- foreach( i; 1..n ) {
- foreach( j; 0..i ) {
- swap( get( i, j ), get( j, i ) );
- }
- }
-}
-
-unittest {
- auto mat = [1.0, 2, 3, 4, 5, 6, 7, 8, 9];
- transposeSquare( mat.ptr, 3, 3 );
- assert( mat == [1.0, 4, 7, 2, 5, 8, 3, 6, 9] );
-}
-
+module scid.lapack;
+import scid.blas;
+import scid.common.traits, scid.common.meta;
+import std.algorithm, std.math, std.conv;
+import std.ascii, std.exception;
+
+// debug = lapackCalls;
+//version = nodeps;
+
+debug( lapackCalls ) {
+ import std.stdio;
+ import scid.internal.assertmessages;
+}
+
+
+
+version( nodeps ) {
+ private enum forceNaive = true;
+} else {
+ private enum forceNaive = false;
+ static import scid.bindings.lapack.dlapack;
+ alias scid.bindings.lapack.dlapack lapack_;
+
+}
+
+// Save typing
+int toi(size_t x) { return to!int(x); }
+
+struct lapack {
+ static void laswp( T )( size_t n, T *a, size_t lda, size_t k1, size_t k2, int *ipiv, size_t incx ) {
+ debug( lapackCalls )
+ writef( "laswp( %s, %s, %s, %s ) ", matrixToString( 'n', n, n, a, lda ), k1, k2, stridedToString( ipiv, n, incx ) );
+
+ naive_.laswp( n, a, lda, k1, k2, ipiv, incx );
+
+ debug( lapackCalls ) {
+ writeln( "=> ", matrixToString( 'n', n, n, a, lda ) );
+ }
+
+ }
+
+ static void getrs( char trans, T )( size_t n, size_t nrhs, T *a, size_t lda, int *ipiv, T *b, size_t ldb, ref int info ) {
+ debug( lapackCalls )
+ writef( "getrs( %s, %s, %s ) ", matrixToString( trans, n, n, a, lda ), ipiv[ 0 .. n ], matrixToString( trans, n, nrhs, b, ldb ) );
+
+ static if( isFortranType!T && !forceNaive )
+ lapack_.getrs( trans, toi(n), toi(nrhs), a, toi(lda), ipiv, b, toi(ldb), info );
+ else
+ naive_.xgetrs!(trans, 'L')( n, nrhs, a, lda, ipiv, b, ldb, info );
+
+ debug( lapackCalls )
+ writeln( "=> ", matrixToString( trans, n, nrhs, b, ldb ) );
+ }
+
+ static void gesv( T )( size_t n, size_t nrhs, T *a, size_t lda, int* ipiv, T *b, size_t ldb, ref int info ) {
+ debug( lapackCalls )
+ writef( "gesv( %s, %s, %s ) ", matrixToString( trans, n, n, a, lda ), matrixToString( trans, n, nrhs, b, ldb ) );
+
+ static if( isFortranType!T && !forceNaive )
+ lapack_.gesv( toi(n), toi(nrhs), a, toi(lda), ipiv, b, toi(ldb), info );
+ else
+ naive_.gesv( n, nrhs, a, lda, ipiv, b, ldb, info );
+
+ debug( lapackCalls )
+ writeln( "=> ", matrixToString( trans, n, nrhs, b, ldb ) );
+ }
+
+ static void getrf( T )( size_t m, size_t n, T* a, size_t lda, int* ipiv, ref int info ) {
+ debug( lapackCalls )
+ writef( "getrf( %s ) ", matrixToString( 'N', m, n, a, lda ) );
+
+ static if( isFortranType!T && !forceNaive )
+ lapack_.getrf( toi(m), toi(n), a, toi(lda), ipiv, info );
+ else
+ naive_.getrf( m, n, a, lda, ipiv, info );
+
+ debug( lapackCalls )
+ writeln( "=> ", matrixToString( 'N', m, n, a, lda ) );
+ }
+
+ static void trtri( char uplo, char diag, T )( size_t n, T *a, size_t lda, ref int info ) {
+ debug( lapackCalls )
+ writef( "trtri( %s, %s ) ", uplo, matrixToString( 'N', n, n, a, lda ) );
+
+ static if( isFortranType!T && !forceNaive )
+ lapack_.trtri( uplo, diag, toi(n), a, toi(lda), info );
+ else
+ naive_.trtri!( uplo, diag )( n, a, lda, info );
+
+ debug( lapackCalls )
+ writeln( "=> ", matrixToString( 'N', n, n, a, lda ) );
+ }
+
+ static void getri( T )( size_t n, T* a, size_t lda, int* ipiv, T* work, size_t lwork, ref int info ) {
+ debug( lapackCalls )
+ writef( "getri( %s ) ", matrixToString( 'N', n, n, a, lda ) );
+
+ static if( isFortranType!T && !forceNaive )
+ lapack_.getri( toi(n), a, toi(lda), ipiv, work, toi(lwork), info );
+ else
+ naive_.getri( n, a, lda, ipiv, work, lwork, info );
+
+ debug( lapackCalls )
+ writeln( "=> ", matrixToString( 'N', n, n, a, lda ) );
+ }
+
+ static void potrf( char uplo, T )( size_t n, T* a, size_t lda, ref int info ) {
+ debug( lapackCalls )
+ writef( "potrf( %s, %s, %s, %s ) ", uplo_, matrixToString( uplo, n, n, a, lda ), lda, info );
+
+ static if( isFortranType!T && !forceNaive )
+ lapack_.potrf( uplo, toi(n), a, toi(lda), info );
+ else
+ naive_.potrf!( uplo )( toi(n), a, toi(lda), info );
+ }
+
+ // Extended LAPACK
+ static void xgetrs( char trans, char side, T )( size_t n, size_t nrhs, T *a, size_t lda, int *ipiv, T *b, size_t ldb, ref int info ) {
+ debug( lapackCalls )
+ writef( "xgetrs( %s, %s, %s, %s ) ", side, matrixToString( trans, n, n, a, lda ), ipiv[ 0 .. n ], matrixToString( trans, n, nrhs, b, ldb ) );
+
+ naive_.xgetrs!( trans, side )( n, nrhs, a, lda, ipiv, b, ldb, info );
+
+ debug( lapackCalls )
+ writeln( "=> ", matrixToString( trans, n, nrhs, b, ldb ) );
+ }
+}
+
+private struct naive_ {
+ private static void reportNaive_() {
+ debug( lapackCalls )
+ write( "<n> " );
+ }
+
+ private static void reportNaiveln_() {
+ debug( lapackCalls )
+ writeln( "<n> ..." );
+ }
+
+ static void laswp( T )( size_t n, T *a, size_t lda, size_t k1, size_t k2, int *ipiv, size_t incx ) {
+ reportNaiveln_();
+
+ // convert FORTRAN indices
+ --k1; --k2;
+
+ enforce( n >= 0 );
+ enforce( incx != 0 );
+ enforce( a );
+ enforce( ipiv );
+
+ if( n == 0 )
+ return;
+
+ if( incx > 0 ) {
+ for( auto i = k1; i <= k2 ; ++ i ) {
+ int pivot = ipiv[ i ] - 1; // convert FORTRAN index
+ if( pivot != i )
+ blas.swap( n, a + i, lda, a + pivot, lda );
+ }
+ } else {
+ for( auto i = k2; i >= k1 ; -- i ) {
+ int pivot = ipiv[ i ] - 1; // convert FORTRAN index
+ if( pivot != i )
+ blas.swap( n, a + i, lda, a + pivot, lda );
+ }
+ }
+ }
+
+ static void xgetrs( char trans_, char side_, T )( size_t n, size_t nrhs, T *a, size_t lda, int *ipiv, T *b, size_t ldb, ref int info ) {
+ enum trans = cast(char) toUpper( trans_ );
+ enum side = cast(char) toUpper( side_ );
+
+ reportNaiveln_();
+
+ enforce( n >= 0 );
+ enforce( nrhs >= 0 );
+ enforce( lda >= max( 1, n ) );
+ enforce( ldb >= max( 1, n ) );
+
+ if( n == 0 || nrhs == 0 )
+ return;
+
+ static if( trans == 'N' ) {
+ static if( side == 'R' ) {
+ blas.trsm!( 'R', 'U', 'N', 'N' )( n, nrhs, One!T, a, lda, b, ldb );
+ blas.trsm!( 'R', 'L', 'N', 'U' )( n, nrhs, One!T, a, lda, b, ldb );
+ for( auto i = n - 1; i >= 0 ; -- i ) {
+ int pivot = ipiv[ i ] - 1;
+ if( pivot != i )
+ blas.swap( n, b + i * ldb, 1, b + pivot * ldb, 1 );
+ }
+ } else if( side == 'L' ) {
+ lapack.laswp( nrhs, b, ldb, 1, n, ipiv, 1 );
+ blas.trsm!( 'L', 'L', 'N', 'U' )( n, nrhs, One!T, a, lda, b, ldb );
+ blas.trsm!( 'L', 'U', 'N', 'N' )( n, nrhs, One!T, a, lda, b, ldb );
+
+ }
+ } else {
+ static if( side == 'R' ) {
+ for( auto i = 0; i < n ; ++ i ) {
+ int pivot = ipiv[ i ] - 1;
+ if( pivot != i )
+ blas.swap( n, b + i * ldb, 1, b + pivot * ldb, 1 );
+ }
+ blas.trsm!( 'R', 'L', trans, 'U' )( n, nrhs, One!T, a, lda, b, ldb );
+ blas.trsm!( 'R', 'U', trans, 'N' )( n, nrhs, One!T, a, lda, b, ldb );
+ } else static if( side == 'L' ) {
+ blas.trsm!( side, 'U', trans, 'N' )( n, nrhs, One!T, a, lda, b, ldb );
+ blas.trsm!( side, 'L', trans, 'U' )( n, nrhs, One!T, a, lda, b, ldb );
+ lapack.laswp( nrhs, b, ldb, 1, n, ipiv, -1 );
+ }
+ }
+ }
+
+ static void gesv( T )( size_t n, size_t nrhs, T *a, size_t lda, int* ipiv, T *b, size_t ldb, ref int info ) {
+ reportNaiveln_();
+
+ enforce( n >= 0 );
+ enforce( nrhs >= 0 );
+ enforce( lda >= max( 1, n ) );
+ enforce( ldb >= max( 1, n ) );
+
+ lapack.getrf( toi(n),toi(n), a, toi(lda), ipiv, info );
+ if( info == 0 )
+ lapack.getrs!'N'( toi(n), toi(nrhs), a, toi(lda), ipiv, b, toi(ldb), info );
+ }
+
+ static void getri( T )( size_t n, T* a, size_t lda, int* ipiv, T* work, size_t lwork, ref int info ) {
+ reportNaiveln_();
+ info = 0;
+
+ work[ 0 ] = n * 2;
+ if( lwork == -1 ) {
+ info = 0;
+ return;
+ }
+
+ if( n == 0 )
+ return;
+
+ T get( size_t i, size_t j ) {
+ return a[ j * lda + i ];
+ }
+
+ void set( string op = "" )( T x, size_t i, size_t j ) {
+ mixin("a[ j * lda + i ] "~op~"= x;");
+ }
+
+ lapack.trtri!( 'U', 'N' )( n, a, lda, info );
+ if( info > 0 )
+ return ;
+
+ for( int j = toi(n) - 1; j >= 0 ; -- j ) {
+ foreach( i ; j + 1 .. n ) {
+ work[ i ] = get( i, j );
+ set( Zero!T, i, j );
+ }
+
+
+ if( j < n-1 ) {
+ blas.gemv!'N'( n, n - j - 1, MinusOne!T, a + (j+1)*lda,
+ lda, work + j + 1, 1, One!T, a + j * lda, 1 );
+ }
+ }
+
+ for( int j = toi(n) - 1 ; j >= 0 ; -- j ) {
+ int pivot = ipiv[ j ] - 1; // convert from FORTRAN index
+ if( pivot != j )
+ blas.swap( n, a + j * lda, 1, a + pivot * lda, 1 );
+ }
+
+ }
+
+ static void trtri( char uplo_, char diag_, T )( size_t n, T *a, size_t lda, ref int info ) {
+ reportNaiveln_();
+
+ enum char uplo = toUpper(uplo_);
+ enum char diag = toUpper(diag_);
+
+ T get( size_t i, size_t j ) {
+ return a[ j * lda + i ];
+ }
+
+ void set( string op = "" )( T x, size_t i, size_t j ) {
+ mixin("a[ j * lda + i ] "~op~"= x;");
+ }
+
+ T ajj;
+ if( uplo == 'U' ) {
+ for( size_t j = 0; j < n; j++ ) {
+ if( diag == 'N' ) {
+ // assert( get( j, j ) != Zero!T, "fbti: Singular matrix in inverse." );
+ if( get( j, j ) == Zero!T ) {
+ info = j;
+ return;
+ }
+
+ set( One!T / get(j,j), j, j );
+ ajj = -get( j, j );
+ } else {
+ ajj = MinusOne!T;
+ }
+
+ blas.trmv!( 'U', 'N', diag )( j, a, lda, a + j*lda, 1 );
+ blas.scal( j, ajj, a + j*lda, 1 );
+ }
+ } else {
+ for( size_t j = n - 1 ; j >= 0 ; -- j ) {
+ if( diag == 'N' ) {
+ // assert( get( j, j ) != Zero!T, "fbti: Singular matrix in inverse." );
+ set( One!T / get(j,j), j, j );
+ ajj = -get( j, j );
+ } else {
+ ajj = MinusOne!T;
+ }
+
+ if( j < n-1 ) {
+ blas.trmv!( 'L', 'N', diag )( n - 1 - j, a + (j+1)*lda + j+1, lda, a + j*lda + j+1, 1 );
+ blas.scal( n - j, ajj, a + (j+1) * lda + j, 1 );
+ }
+ }
+ }
+ }
+
+ static void getrf( T )( size_t m, size_t n, T* a, size_t lda, int* pivot, ref int info ) {
+ reportNaiveln_();
+
+ n = min( m, n );
+
+ T get( size_t i, size_t j ) {
+ return a[ j * lda + i ];
+ }
+
+ void set( string op = "" )( T x, size_t i, size_t j ) {
+ mixin("a[ j * lda + i ] "~op~"= x;");
+ }
+
+ for( size_t k = 0; k < n; k++ ) {
+ pivot[ k ] = k;
+ T maxSoFar = abs( get( k, k ) );
+ for( size_t j = k + 1; j < n; j++ ) {
+ T cur = abs( get(j, k) );
+ if( maxSoFar <= cur ) {
+ maxSoFar = cur;
+ pivot[ k ] = j;
+ }
+ }
+
+ if( pivot[ k ] != k ) {
+ foreach( j ; 0 .. n ) {
+ T aux = get(k, j);
+ set( get(pivot[k], j), k, j );
+ set( aux, pivot[k], j );
+ }
+ }
+
+ if( get(k,k) != Zero!T ) {
+
+ foreach( i ; k + 1 .. n )
+ set!"/"( get(k,k), i, k );
+
+ foreach( i ; k + 1 .. n ) {
+ foreach( j ; k + 1 .. n ) {
+ set!"-"( get(i,k) * get(k,j), i, j );
+ }
+ }
+ } else if( info == 0 ) {
+ info = k + 1;
+ }
+ ++ pivot[ k ]; // convert to FORTRAN index
+ }
+ }
+
+ static void potrf( char uplo_, T )( size_t n, T* a, size_t lda, ref int info ) {
+ reportNaiveln_();import std.stdio;
+
+ // Borrowed from Don Clugston's MathExtra library, which he
+ // gave me permission to relicense under Boost.
+
+ // This algorithm is designed for row major matrices. Depending
+ // on whether we're using the lower or upper column, transpose
+ // either before or after doing the decomposition to avoid an
+ // impedance mismatch.
+
+ static if( uplo_ == 'U' ) {
+ transposeSquare( a, n, lda );
+ }
+
+ ref T get( size_t i, size_t j ) nothrow {
+ // This is correct because we're treating the matrix as row-
+ // major.
+ return a[ i * lda + j ];
+ }
+
+ foreach( i; 0..n ) {
+ T sum = get(i, i);
+
+ for( sizediff_t k = i - 1; k >= 0; --k ) {
+ immutable ik = get( i, k );
+ sum -= ik * ik;
+ }
+
+ auto arr1 = a[ i * lda..i * lda + i ];
+
+ if (sum > 0.0) {
+ get(i, i) = sqrt( sum );
+
+ foreach( j; i + 1..n ) {
+ import std.numeric;
+ auto arr2 = a[ j * lda..j * lda + i ];
+ immutable dot = dotProduct( arr1, arr2 );
+
+ sum = get( i, j ) - dot;
+ get( j, i ) = sum / get( i, i );
+ }
+ } else {
+ info = toi( i );
+ // not positive definite (could be caused by rounding errors)
+ get( i, i ) = 0;
+ // make this whole row zero so they have no further effect
+ foreach( j; i + 1..n ) get( j, i ) = 0;
+ }
+ }
+
+ static if( uplo_ == 'L' ) {
+ transposeSquare( a, n, lda );
+ }
+ }
+
+ unittest {
+ import scid.matrix;
+ import std.stdio;
+
+ auto mat = Matrix!double( [
+ [14.0, 20, 31],
+ [0.0, 62, 79],
+ [0.0, 0, 122],
+ [-1.0, -1.0, -1.0] // Dummy row to test slicing.
+ ] );
+
+ auto sliced = mat.view( 0, 3, 0, 3 );
+
+ int info;
+ potrf!( 'U' )( 3, sliced.data, 4, info );
+ assert( info == 0 );
+
+ import std.math;
+ alias approxEqual ae;
+
+ assert( ae( sliced[ 0, 0 ], 3.74166 ) );
+ assert( ae( sliced[ 0, 1 ], 5.34522 ) );
+ assert( ae( sliced[ 0, 2 ], 8.28510 ) );
+ assert( ae( sliced[ 1, 1 ], 5.78174 ) );
+ assert( ae( sliced[ 1, 2 ], 6.00412 ) );
+ assert( ae( sliced[ 2, 2 ], 4.16025 ) );
+ stderr.writeln(sliced.pretty);
+ auto mat2 = Matrix!double( [
+ [14.0, 0, 0],
+ [20.0, 62, 0],
+ [31.0, 79, 122],
+ [-1.0, -1.0, -1.0] // Dummy row to test slicing.
+ ] );
+
+ auto sliced2 = mat2.view( 0, 3, 0, 3 );
+ potrf!( 'L' )( 3, sliced2.data, 4, info );
+ assert( info == 0 );
+ foreach( i; 0..3 ) foreach( j; 0..3 ) {
+ assert( ae( sliced[ i, j ], sliced2[ j, i ] ) );
+ }
+ }
+}
+
+// Transposes a square general matrix in physical memory. This is useful for
+// making some matrix factorizations faster by improving memory locality,
+// and often has negligible overhead since most matrix factorizations are
+// O(N^3) and this is O(N^2) where N is the number of rows/columns.
+private void transposeSquare( T )( T* ptr, size_t n, size_t lda ) pure nothrow {
+
+ ref T get( size_t i, size_t j ) nothrow {
+ return ptr[ j * lda + i ];
+ }
+
+ foreach( i; 1..n ) {
+ foreach( j; 0..i ) {
+ swap( get( i, j ), get( j, i ) );
+ }
+ }
+}
+
+unittest {
+ auto mat = [1.0, 2, 3, 4, 5, 6, 7, 8, 9];
+ transposeSquare( mat.ptr, 3, 3 );
+ assert( mat == [1.0, 4, 7, 2, 5, 8, 3, 6, 9] );
+}
+
View
218 scid/linalg.d
@@ -20,8 +20,8 @@ import scid.common.traits;
import scid.ops.eval;
/**
-Performs Cholesky decomposition of a Hermitian, positive definite matrix.
-Assuming the input matrix is named A, Cholesky decomposition has the
+Performs Cholesky decomposition of a Hermitian, positive definite matrix.
+Assuming the input matrix is named A, Cholesky decomposition has the
following forms:
mat = L * L.t if L is a lower triangular matrix.
@@ -35,7 +35,7 @@ matrices. The triangle used to compute the Cholesky
decomposition is the upper if tri == MatrixTriangle.Upper or the lower
if tri == MatrixTriangle.Lower. The other triangle is ignored.
-Returns:
+Returns:
The matrix U that would be multiplied by its conjugate transpose as
shown above to form mat, or the matrix L that would be multiplied by
@@ -62,7 +62,7 @@ auto result2 = cholesky!(MatrixTriangle.Upper)(mat2);
---
*/
auto cholesky( MatrixTriangle tri = MatrixTriangle.Upper, M )( auto ref M mat )
-if( isMatrix!M ) {
+if( isMatrix!M ) {
alias M.ElementType E;
auto ret = TriangularMatrix!( E, tri )( mat.rows, null );
choleskyImpl( mat, ret );
@@ -71,7 +71,7 @@ if( isMatrix!M ) {
/// Ditto
auto cholesky( MatrixTriangle tri = MatrixTriangle.Upper, M, Allocator )
-( auto ref M mat, Allocator alloc )
+( auto ref M mat, Allocator alloc )
if( isMatrix!M && isAllocator!Allocator ) {
alias M.ElementType E;
alias TriangularMatrix!( E, tri ).Temporary Temp;
@@ -79,29 +79,29 @@ if( isMatrix!M && isAllocator!Allocator ) {
choleskyImpl( mat, ret );
return ret;
}
-
+
private void choleskyImpl( M1, M2 )( ref M1 mat, ref M2 ret ) {
alias M1.ElementType E;
-
+
auto alloc = newRegionAllocator();
auto temp = ExternalMatrixView!E( mat.rows, mat.rows, alloc );
-
+
static void copyUpperTriangle( M1, M2 )( ref M1 src, ref M2 dest ) {
foreach( col; 0..src.columns ) {
foreach( row; 0..col + 1 ) {
dest[ row, col ] = src[ row, col ];
}
- }
+ }
}
-
+
static void copyLowerTriangle( M1, M2 )( ref M1 src, ref M2 dest ) {
foreach( col; 0..src.columns ) {
foreach( row; col..src.columns ) {
dest[ row, col ] = src[ row, col ];
}
- }
+ }
}
-
+
static if( ret.triangle == MatrixTriangle.Upper ) {
copyUpperTriangle( mat, temp );
enum uplo = 'U';
@@ -109,35 +109,35 @@ private void choleskyImpl( M1, M2 )( ref M1 mat, ref M2 ret ) {
copyLowerTriangle( mat, temp );
enum uplo = 'L';
}
-
- int info;
+
+ int info;
lapack.potrf!( uplo )
( temp.length, temp.data, temp.rows, info );
-
+
// TODO: Establish a consistent standard for error handling.
// For now, just return a matrix of NaNs.
if( info > 0 ) {
ret.data[ 0..ret.length ] = E.init;
return;
}
-
+
static if( uplo == 'U' ) {
copyUpperTriangle( temp, ret );
} else {
copyLowerTriangle( temp, ret );
}
-
+
return;
}
-
+
/**
Computes the Cholesky decomposition of a matrix without any copying.
Upon exiting, the upper or lower triangle of mat (depending on the value
-of tri) will contain the result and the other triangle will contain zeros.
+of tri) will contain the result and the other triangle will contain zeros.
mat is interpreted as a symmetric matrix and the upper or lower triangle
(depending on the value of tri) is used to compute the Cholesky decomposition
-and the other triangle is ignored, similarly to cholesky(). However, mat must
-use general matrix storage, since one of the two triangles is used as scratch
+and the other triangle is ignored, similarly to cholesky(). However, mat must
+use general matrix storage, since one of the two triangles is used as scratch
space.
*/
void choleskyDestructive
@@ -148,25 +148,25 @@ if( isMatrix!M && isGeneralMatrixStorage!( M.Storage, M.ElementType ) ) {
"%s rows, %s columns.", mat.rows, mat.columns
));
- int info;
-
+ int info;
+
static if( M.storageOrder == StorageOrder.ColumnMajor ) {
enum uplo = ( tri == MatrixTriangle.Upper ) ? 'U' : 'L';
} else {
- // For row-major storage just flip it around.
+ // For row-major storage just flip it around.
enum uplo = ( tri == MatrixTriangle.Upper ) ? 'L' : 'U';
}
-
+
lapack.potrf!( uplo )
( mat.length, mat.data, mat.leading, info );
-
+
// TODO: Establish a consistent standard for error handling.
// For now, just return a matrix of NaNs.
if( info > 0 ) {
mat.data[ 0..mat.length ] = M.ElementType.init;
return;
}
-
+
foreach( col; 0..mat.columns ) {
static if( tri == MatrixTriangle.Upper ) {
@@ -176,24 +176,24 @@ if( isMatrix!M && isGeneralMatrixStorage!( M.Storage, M.ElementType ) ) {
immutable start = 0;
immutable end = col;
}
-
+
foreach( row; start..end ) {
mat[ row, col ] = 0;
}
}
}
-
+
unittest {
import std.math;
import std.stdio;
// Easy way to get a positive definite matrix to test with:
auto mat = SymmetricMatrix!double( [
- [14.0, 20, 31],
- [20.0, 62, 79],
+ [14.0, 20, 31],
+ [20.0, 62, 79],
[31.0, 79, 122]
] );
-
+
auto ch = cholesky( mat );
alias approxEqual ae; // Save typing.
assert( ae( ch[ 0, 0 ], 3.74166 ) );
@@ -202,31 +202,31 @@ unittest {
assert( ae( ch[ 1, 1 ], 5.78174 ) );
assert( ae( ch[ 1, 2 ], 6.00412 ) );
assert( ae( ch[ 2, 2 ], 4.16025 ) );
-
+
auto alloc = newRegionAllocator();
auto ch2 = cholesky( mat, alloc );
foreach( row; 0..3 ) foreach( col; 0..3 ) {
assert( ch[ row, col ] == ch2[ row, col ]);
}
-
+
auto ch3 = cholesky!( MatrixTriangle.Lower )( mat );
foreach( row; 0..3 ) foreach( col; 0..3 ) {
assert( ch3[ col, row ] == ch2[ row, col ]);
}
-
+
Matrix!double gen;
gen[] = mat;
gen[ 2, 0 ] = gen[ 1, 0 ] = gen[ 2, 1 ] = 0;
-
+
choleskyDestructive( gen );
foreach( row; 0..3 ) foreach( col; 0..3 ) {
assert( ch[ row, col ] == gen[ row, col ]);
}
-
+
Matrix!double gen2;
gen2[] = mat;
gen2[ 0, 2 ] = gen2[ 0, 1 ] = gen2[ 1, 2 ] = 0;
-
+
choleskyDestructive!( MatrixTriangle.Lower )( gen2 );
foreach( row; 0..3 ) foreach( col; 0..3 ) {
assert( gen2[ col, row ] == gen[ row, col ]);
@@ -239,7 +239,7 @@ unittest {
foreach( row; 0..3 ) foreach( col; 0..3 ) {
assert( ch[ row, col ] == gen3[ row, col ]);
}
-
+
Matrix!( double, StorageOrder.RowMajor ) gen4;
gen4[] = mat;
gen4[ 0, 2 ] = gen4[ 0, 1 ] = gen4[ 1, 2 ] = 0;
@@ -252,7 +252,7 @@ unittest {
/**
Solve a positive definite system of equations using Cholesky decomposition.
Assume the system to be solved is mat * ans = vector. Either cholesky
-or choleskyDestructive must first be called on mat to obtain
+or choleskyDestructive must first be called on mat to obtain
decomposedMatrix. Then choleskySolve can be called with decomposedMatrix
and vector to obtain ans. The template parameter tri controls which triangle
the results of the Cholesky decomposition are expected to be in. The other
@@ -274,16 +274,16 @@ auto ans = choleskySolve!( MatrixTriangle.Lower )( mat, vector );
*/
auto choleskySolve( MatrixTriangle tri = MatrixTriangle.Upper, M, V )
( auto ref M decomposedMatrix, auto ref V vector ) {
- alias CommonType!(
+ alias CommonType!(
typeof( decomposedMatrix[0, 0] ), typeof( vector[0]) ) F;
-
+
auto ret = Vector!F( vector.length );
choleskySolve!( tri, M, V , typeof(ret) )( decomposedMatrix, vector, ret );
return ret;
}
/**
-This overload allows ans to be pre-allocated. It must have the same length
+This overload allows ans to be pre-allocated. It must have the same length
as vector.
*/
void choleskySolve( MatrixTriangle tri = MatrixTriangle.Upper, M, V, R )
@@ -291,50 +291,50 @@ void choleskySolve( MatrixTriangle tri = MatrixTriangle.Upper, M, V, R )
assert( decomposedMatrix.rows == vector.length, format(
"decomposedMatrix.rows must be equal to vector.length for " ~
"choleskySolve. (Got %s, %s)", decomposedMatrix.rows, vector.length ) );
-
+
auto alloc = newRegionAllocator();
-
+
// Assume a system A * ans = vector where A is some matrix. A is decomposed
// such that A = decomposedMatrix * decomposedMatrix.t. According to
// Wikipedia (http://en.wikipedia.org/wiki/Cholesky_decomposition),
// we can solve this sytem by solving decomposedMatrix * y = vector for
- // y and then solving decomposedMatrix.t * ans = vector for ans.
- alias CommonType!(
+ // y and then solving decomposedMatrix.t * ans = vector for ans.
+ alias CommonType!(
typeof( decomposedMatrix[0, 0] ), typeof( vector[0]) ) F;
auto y = ExternalVectorView!F( vector.length, alloc );
auto transposed = eval( decomposedMatrix.t );
-
+
static if( tri == MatrixTriangle.Lower ) {
solveLower( decomposedMatrix, vector, y );
solveUpper( transposed, y, ans );
} else {
static assert( tri == MatrixTriangle.Upper );
solveLower( transposed, vector, y );
- solveUpper( decomposedMatrix, y, ans );
+ solveUpper( decomposedMatrix, y, ans );
}
}
unittest {
import std.math;
import scid.matvec;
-
+
auto raw = Matrix!double([
[8.0, 6, 7],
[5.0, 3, 0],
[9.0, 3, 1]
]);
-
+
auto cov = eval( raw.t * raw );
auto upper = cov;
auto lower = cov;
choleskyDestructive( upper );
choleskyDestructive!( MatrixTriangle.Lower )( lower );
auto b = Vector!double( [3.0, 6, 2] );
-
+
auto res1 = choleskySolve(upper, b);
auto res2 = choleskySolve!( MatrixTriangle.Lower )( lower, b );
auto res3 = eval( inv( cov ) * b );
-
+
assert( approxEqual( res1, res2 ));
assert( approxEqual( res3, res2 ));
}
@@ -346,14 +346,14 @@ unittest {
private void solveLower( M, V, R )( auto ref M mat, auto ref V vec, ref R result ) {
assert( result.length == vec.length );
assert( mat.rows == vec.length );
-
+
foreach( i; 0..mat.rows ) {
auto ans = vec[i];
-
+
foreach( j; 0..i ) {
ans -= result[j] * mat[i, j];
}
-
+
result[i] = ans / mat[i, i];
}
}
@@ -362,14 +362,14 @@ private void solveLower( M, V, R )( auto ref M mat, auto ref V vec, ref R result
private void solveUpper( M, V, R )( auto ref M mat, auto ref V vec, ref R result ) {
assert( result.length == vec.length );
assert( mat.rows == vec.length );
-
+
for( size_t i = mat.rows - 1; i != size_t.max; i-- ) {
auto ans = vec[i];
-
+
foreach( j; i + 1..mat.rows ) {
ans -= result[j] * mat[i, j];
}
-
+
result[i] = ans / mat[i, i];
}
}
@@ -383,13 +383,13 @@ enum SvdType {
SvdResult struct will be empty.
*/
valuesOnly,
-
+
/**
Compute the singular values and the first min(m, n) singular vectors,
where m and n are the number of rows and columns in the input matrix x.
*/
economy,
-
+
/**
Compute the singular values and all singular vectors of x.
*/
@@ -403,12 +403,12 @@ as documented below.
struct SvdResult( U, S, VT ) {
/// The left singular vectors of x.
U u;
-
+
/// The singular values of x.
S s;
-
+
/// The conjugate transpose of the right singular values of x.
- VT vt;
+ VT vt;
}
/**
@@ -417,11 +417,11 @@ is a matrix factorization that factors an m by m matrix x into three matrices:
u = An m by m unitary matrix. (The left singular vectors.)
s = A min(m, n) by min(m, n) diagonal matrix. (The singular values.)
-vt = An n by n unitary matrix. (The conjugate transpose of the
+vt = An n by n unitary matrix. (The conjugate transpose of the
right singular vectors.)
This is done such that u * s * vt == x.
-*/
+*/
auto svd( Matrix )( auto ref Matrix x, SvdType type ) {
auto alloc = newRegionAllocator();
auto ext = copyToExternal( x, alloc );
@@ -443,15 +443,15 @@ private auto copyToExternal( M, Allocator )( ref M x, Allocator alloc ) {
foreach( i; 0..x.rows ) foreach( j; 0..x.columns ) {
xCopied[i, j] = x[i, j];
}
-
+
return xCopied;
}
private auto allocateSvdResult( M, A... )( auto ref M x, SvdType type, A alloc ) {
- alias Unqual!( typeof( x[0, 0] ) ) E;
- uint m = to!uint( x.rows );
+ alias Unqual!( typeof( x[0, 0] ) ) E;
+ uint m = to!uint( x.rows );
uint n = to!uint( x.columns );
-
+
char jobType;
static if( A.length ) {
SvdResult!( Matrix!(E).Temporary, DiagonalMatrix!(E).Temporary,
@@ -459,9 +459,9 @@ private auto allocateSvdResult( M, A... )( auto ref M x, SvdType type, A alloc )
} else {
SvdResult!( Matrix!E, DiagonalMatrix!E, Matrix!E ) ret;
}
-
+
ret.s = typeof( ret.s )( min( m, n ) , alloc );
-
+
final switch( type ) {
case SvdType.valuesOnly:
break;
@@ -474,7 +474,7 @@ private auto allocateSvdResult( M, A... )( auto ref M x, SvdType type, A alloc )
ret.vt = typeof( ret.vt )( n, n, alloc );
break;
}
-
+
return ret;
}
@@ -482,15 +482,15 @@ private auto allocateSvdResult( M, A... )( auto ref M x, SvdType type, A alloc )
Perform SVD, but destroy the contents of the input matrix instead of copying it.
x must be column major and use general storage.
*/
-auto svdDestructive( Matrix )( ref Matrix x, SvdType type )
-if( isMatrix!Matrix && Matrix.storageOrder == StorageOrder.ColumnMajor ) {
+auto svdDestructive( Matrix )( ref Matrix x, SvdType type )
+if( isMatrix!Matrix && Matrix.storageOrder == StorageOrder.ColumnMajor ) {
auto ret = allocateSvdResult( x, type );
svdDestructive( x, type, ret.tupleof );
return ret;
}
/// Ditto
-auto svdDestructive( Matrix, Allocator )( ref Matrix x, SvdType type, Allocator alloc )
+auto svdDestructive( Matrix, Allocator )( ref Matrix x, SvdType type, Allocator alloc )
if( isMatrix!Matrix && Matrix.storageOrder == StorageOrder.ColumnMajor ) {
auto ret = allocateSvdResult( x, type, alloc );
svdDestructive( x, type, ret.tupleof );
@@ -508,32 +508,32 @@ type = The type of SVD to be performed.
u = A matrix of left singular vectors. Must be m by m for full SVD, or m by
min(m, n) for economy SVD. Ignored for value-only SVD.
-
+
s = A diagonal matrix of singular values. Must be min(m, n) by min(m, n).
vt = A matrix of right singular values. Must be n by n for full SVD, or
min(m, n) by n for economy SVD. Ignored for value-only SVD.
*/
-void svdDestructive( M, U, S, VT )
+void svdDestructive( M, U, S, VT )
( ref M x, SvdType type, ref U u, ref S s, ref VT vt ) {
// TODO: Lock this function down w/ constraints and stuff.
version( nodeps ) {
static assert( 0, "No naive SVD implementation yet." );
}
-
+
char jobType;
-
+
int m = to!int( x.rows );
int n = to!int( x.columns );
-
+
void checkSize( M )( ref M m, int rows, int cols, string name ) {
enforce( m.rows == rows && m.columns == cols, format(
"Need %s to be of size %s by %s, not %s by %s for SvdType.%s",
name, rows,cols, m.rows, m.columns, type
));
- }
-
- checkSize( s, min(m, n), min(m, n), "s" );
+ }
+
+ checkSize( s, min(m, n), min(m, n), "s" );
final switch( type ) {
case SvdType.valuesOnly:
jobType = 'N';
@@ -541,40 +541,46 @@ void svdDestructive( M, U, S, VT )
case SvdType.economy:
jobType = 'S';
checkSize( u, m, min( m, n ), "u" );
- checkSize( vt, min( m, n ), n, "vt" );
+ checkSize( vt, min( m, n ), n, "vt" );
break;
case SvdType.full:
jobType = 'A';
checkSize( u, m, m, "u" );
checkSize( vt, n, n, "vt" );
break;
- }
+ }
alias typeof( x[0, 0] ) E;
int lwork = -1;
int info;
E nWork;
- E* work;
-
+ E* work;
+
// TODO: Add this to the Lapack struct and make a naive impl.
import scid.bindings.lapack.dlapack;
-
+
// Find out how big a work array we need.
int xlead = to!int(x.leading);
int ulead = to!int(u.leading);
int vlead = to!int(vt.leading);
- gesvd( jobType, jobType, m, n, x.data, xlead, s.data, u.data,
+
+ // This works around some Lapack implementations requiring leading
+ // dimensions to be at least 1 even for unreferenced matrices.
+ if(ulead == 0) ulead = 1;
+ if(vlead == 0) vlead = 1;
+
+ gesvd( jobType, jobType, m, n, x.data, xlead, s.data, u.data,
ulead, vt.data, vlead, &nWork, lwork, info
);
lwork = to!int( nWork );
auto alloc = newRegionAllocator();
work = alloc.uninitializedArray!( E[] )( lwork ).ptr;
- gesvd( jobType, jobType, m, n, x.data, xlead, s.data, u.data,
+ gesvd( jobType, jobType, m, n, x.data, xlead, s.data, u.data,
ulead, vt.data, vlead, work, lwork, info
);
-
- enforce(info == 0, "Lapack error in computing SVD: " ~ to!string(info));
+
+ enforce(info == 0, "Lapack error in computing SVD: " ~ to!string(info));
}
unittest {
@@ -584,41 +590,41 @@ unittest {
auto econ = svd( x, SvdType.economy, alloc );
auto xx = copyToExternal( x, alloc );
auto vals = svdDestructive( xx, SvdType.valuesOnly, alloc );
-
+
bool matApproxEqual( M1, M2 )( ref M1 lhs, ref M2 rhs ) {
if( lhs.rows != rhs.rows ) return false;
if( lhs.columns != rhs.columns ) return false;
-
+
import std.math;
foreach( i; 0..lhs.rows ) foreach( j; 0..lhs.columns ) {
if( !approxEqual( lhs[i, j], rhs[i, j] ) ) return false;
}
-
+
return true;
}
-
+
// Values from octave.
auto s = DiagonalMatrix!double( [11.5525, 1.5938] );
assert( matApproxEqual( s, full.s ) );
assert( matApproxEqual( s, econ.s ) );
assert( matApproxEqual( s, vals.s ) );
-
+
auto u = Matrix!double( [[-0.29586, -0.95523], [-0.95523, 0.29586]] );
assert( matApproxEqual( u, full.u ) );
assert( matApproxEqual( u, econ.u ) );
import std.stdio;
-
- auto vt = Matrix!double(
+
+ auto vt = Matrix!double(
[[-0.60441, -0.29928, -0.73832],
[ 0.70010, -0.64180, -0.31297],
[-0.38019, -0.70606, 0.59744]]
);
-
+
auto vtSliced = vt[0..2][];
assert( matApproxEqual( vt, full.vt ) , full.vt.pretty);
assert( matApproxEqual( vtSliced, econ.vt ) );
-
+
auto xTrans = eval(x.t);
auto fullTrans = svd( xTrans, SvdType.full );
auto econTrans = svd( xTrans, SvdType.economy );
@@ -627,17 +633,17 @@ unittest {
assert( matApproxEqual( s, fullTrans.s ) );
assert( matApproxEqual( s, econTrans.s ) );
assert( matApproxEqual( s, valsTrans.s ) );
-
- auto uTrans = Matrix!double(
+
+ auto uTrans = Matrix!double(
[[ 0.60441, -0.70010, -0.38019],
[ 0.29928, 0.64180, -0.70606],
[ 0.73832, 0.31297, 0.59744]]
);
-
+
auto uTransSliced = uTrans[][0..2];
assert( matApproxEqual( uTrans, fullTrans.u ) );
assert( matApproxEqual( uTransSliced, econTrans.u ) );
-
+
auto vtTrans = eval( -u );
assert( matApproxEqual( vtTrans, fullTrans.vt ) );
assert( matApproxEqual( vtTrans, econTrans.vt ) );
View
934 scid/ops/common.d
@@ -1,467 +1,467 @@
-/** Common functions used to compute matrix and vector operations.
-
- Authors: Cristian Cobzarenco
- Copyright: Copyright (c) 2011, Cristian Cobzarenco. All rights reserved.
- License: Boost License 1.0
-*/
-
-module scid.ops.common;
-
-public import scid.ops.eval;
-
-import std.complex;
+/** Common functions used to compute matrix and vector operations.
+
+ Authors: Cristian Cobzarenco
+ Copyright: Copyright (c) 2011, Cristian Cobzarenco. All rights reserved.
+ License: Boost License 1.0
+*/
+
+module scid.ops.common;
+
+public import scid.ops.eval;
+
+import std.complex;
import std.math;
-import std.traits;
-import scid.ops.expression;
-import scid.vector;
-import scid.matrix;
-
-import std.typecons;
-import std.exception;
-
-import scid.common.traits;
-
-import scid.blas;
-import scid.lapack;
-
-/** Generic complex conjugate that works on both builtin complex numbers and on std.Complex */
-auto gconj( T )( T z ) if( is( T : cdouble ) || is( T : cfloat ) ) {
- return conj( z );
-}
-/// ditto
-auto gconj( T )( T z ) if( is( T : Complex!double ) || is( T : Complex!float ) ) {
- return z.conj;
-}
-
-/** Inverses the value of a Transpose value. */
-template transNot( Transpose tr ) { enum transNot = cast(Transpose) !tr; }
-
-/** Performs exclusive or on Transpose values. */
-template transXor( Transpose a, Transpose b ) { enum transXor = cast(Transpose) (a ^ b); }
-
-/** Transposes a closure (only affects RowVector <-> ColumnVector) with a given Transpose value. */
-template transposeClosure( Closure A, Transpose transposed = Transpose.yes ) {
- static if( transposed ) {
- static if( A == Closure.RowVector ) enum transposeClosure = Closure.ColumnVector;
- else static if( A == Closure.ColumnVector ) enum transposeClosure = Closure.RowVector;
- else enum transposeClosure = A;
- } else
- enum transposeClosure = A;
-}
-
-/** Transposes a vector type (Row <-> Column) based on the given Transpose value. */
-template transposeVectorType( VectorType v, Transpose transposed = Transpose.yes ) {
- static if( transposed ) {
- static if( v == VectorType.Row ) enum transposeVectorType = VectorType.Column;
- else static if( v == VectorType.Column ) enum transposeVectorType = VectorType.Row;
- else enum transposeVectorType = v;
- } else
- enum transposeVectorType = v;
-}
-
-/** Transposes a storage order (RowMajor <-> ColumnMajor) based on the given Transpose value. */
-template transposeStorageOrder( StorageOrder S, Transpose transposed = Transpose.yes ) {
- static if( transposed ) {
- static if( S == StorageOrder.RowMajor ) enum transposeStorageOrder = StorageOrder.ColumnMajor;
- else static if( S == StorageOrder.ColumnMajor ) enum transposeStorageOrder = StorageOrder.RowMajor;
- else enum transposeStorageOrder = S;
- } else
- enum transposeStorageOrder = S;
-}
-
-/** Transposes a vector if the given parameter is Transpose.yes. This simply means conjugate its elements if the
- element type is complex.
-*/
-void vectorTranspose( Transpose trans, V )( ref V vec ) {
- static if( trans && isComplexScalar!(BaseElementType!V) ) {
- auto n = vec.length;
- foreach( i ; 0 .. n )
- vec[ i ] = gconj( vec[ i ] );
- }
-}
-
-/** Mixin template that provides scaledAddition(), scale() and dot() specializations for strided vector storage
- types.
-*/
-mixin template StridedScalingAdditionDot() {
- import scid.common.traits : isStridedVectorStorage;
- void scaledAddition( Transpose tr = Transpose.no, S )( ElementType alpha, auto ref S rhs ) if( isStridedVectorStorage!(S, ElementType) ) {
- stridedScaledAddition!tr( alpha, rhs, this );
- }
-
- void scale( ElementType alpha ) {
- stridedScaling( alpha, this );
- }
-
- auto dot( Transpose tr = Transpose.no, Other )( auto ref Other other ) if( isStridedVectorStorage!(Other, ElementType) ) {
- return stridedDot!tr( this, other );
- }
-}
-
-/** Mixin template that provides scaledAddition() and scale() for general matrix storage types. */
-mixin template GeneralMatrixScalingAndAddition() {
- private import scid.lapack;
- private import scid.internal.regionallocator;
-
- void scaledAddition( Transpose tr = Transpose.no, S )( ElementType alpha, auto ref S rhs ) if( isGeneralMatrixStorage!(S, ElementType) ) {
- assert( rhs.rows == this.rows && rhs.columns == this.columns, "Matrix size mismatch in addition." );
- static assert( isGeneralMatrixStorage!(typeof(this), ElementType) );
- generalMatrixScaledAddition!tr( alpha, rhs, this );
- }
-
- void scale( ElementType alpha ) {
- static assert( isGeneralMatrixStorage!(typeof(this), ElementType) );
- generalMatrixScaling!storageOrder( this.rows, this.columns, alpha, this.data, this.leading );
- }
-
- void invert() {
- import scid.lapack;
- import std.exception;
-
- if( this.empty )
- return;
-
- int info;
- size_t n = this.rows;
- assert( n == this.columns, "Inversion of non-square matrix." );
-
- auto alloc = newRegionAllocator();
- auto ipiv = alloc.uninitializedArray!( int[] )( n );
- auto work = alloc.uninitializedArray!( ElementType[] )( n );
-
- lapack.getrf( n, n, this.data, this.leading, ipiv.ptr, info );
- lapack.getri( n, this.data, this.leading, ipiv.ptr, work.ptr , work.length, info );
-
- enforce( info == 0, "Inversion of singular matrix." );
- }
-
- void solveRight( Transpose transM = Transpose.no, Side side, Dest )( auto ref Dest dest ) if( isStridedVectorStorage!Dest || isGeneralMatrixStorage!Dest ) {
- import scid.blas;
- import scid.lapack;
- import std.exception;
-
- if( this.empty && dest.empty )
- return;
-
- size_t n = this.rows; // check that the matrix is square
- assert( n == this.columns, "Inversion of non-square matrix." );
-
- enum vectorRhs = isStridedVectorStorage!Dest; // if we're solving for a vector
- auto alloc = newRegionAllocator(); // allocator for all the copies we need to perform
- auto ipiv = alloc.uninitializedArray!(int[])( n ).ptr; // array to store the permutation matrix of the LU decomposition
- auto a = alloc.uninitializedArray!(ElementType[])( n * n ).ptr; // array to copy this matrix's data
- int info = 0; // error number of the LAPACK calls
-
- // If the storage orders are different and we need to perform a hermitian transMpose then the result is
- // the same is taking the conjugate without transMposing.
- static if( !vectorRhs )
- enum bool conjugateNoTrans = isComplexScalar!ElementType && transM && storageOrder != storageOrderOf!Dest;
- else
- enum bool conjugateNoTrans = false;
-
- // copy this matrix's data (don't transpose, we'll use getrs's transpose for performance)
- static if( conjugateNoTrans )
- blas.xgecopyc!'n'( n, n, this.cdata, this.leading, a, n );
- else
- blas.xgecopy!'n'( n, n, this.cdata, this.leading, a, n );
-
- static if( vectorRhs ) {
- ElementType *b; // if the rhs is a vector, then make it look like a matrix, by defining
- enum nrhs = 1; // the no. of columns (nrhs) and
- int ldb = dest.length; // the leading dimension
-
- if( dest.stride != 1 ) {
- // if the destination's stride is != 1, we need to copy it to a temp array.
- b = alloc.uninitializedArray!(ElementType[])( ldb ).ptr;
- blas.copy( ldb, dest.cdata, dest.stride, b, 1 );
- } else {
- // if the stride is 1, we can simply use dest's data for b
- b = dest.data;
- }
- } else {
- ElementType *b = dest.data; // if the rhs is a matrix simply grab the parameters from it
- int nrhs = dest.columns;
- int ldb = dest.leading;
- }
-
- // Figure out what kind of transposition we've got (transM == true means it could be hermitian)
- static if( vectorRhs ) {
- enum chTrans = transM ? (isComplexScalar!ElementType ? 'C' : 'T') : 'N';
- } else static if( transposeStorageOrder!(storageOrder, transM) != storageOrderOf!Dest ) {
- static if( !isComplexScalar!ElementType || conjugateNoTrans )
- enum chTrans = 'T';
- else
- enum chTrans = 'C';
- } else {
- enum chTrans = 'N';
- }
-
- enum chSide = (side == Side.Left) ? 'L' : 'R';
-
- lapack.getrf( n, n, a, n, ipiv, info ); // perform LU decomposition
- enforce( info == 0, "Inversion of singular matrix." );
- lapack.xgetrs!(chTrans, chSide)( n, nrhs, a, n, ipiv, b, ldb, info ); // perform inv-mult
-
- static if( vectorRhs ) {
- // copy the data back to dest if needed
- if( dest.stride != 1 )
- blas.copy( ldb, b, 1, dest.data, dest.stride );
- }
- }
-
- void matrixProduct( Transpose transA = Transpose.no, Transpose transB = Transpose.no, A, B )
- ( ElementType alpha, auto ref A a, auto ref B b, ElementType beta ) if( isGeneralMatrixStorage!A && isGeneralMatrixStorage!B ) {
- import scid.blas;
-
- // Are the matrices row major?
- enum aRowMajor = storageOrderOf!A == StorageOrder.RowMajor;
- enum bRowMajor = storageOrderOf!B == StorageOrder.RowMajor;
- enum cRowMajor = storageOrder == StorageOrder.RowMajor;
-
- // Should a and be read minor-first or major-first
- enum aByMinor = transA ^ aRowMajor ^ cRowMajor;
- enum bByMinor = transB ^ bRowMajor ^ cRowMajor;
-
- // Do the matrices have complex elements?
- enum complexElements = isComplexScalar!ElementType;
-
- static if( complexElements ) {
- enum aConj = transA && !aByMinor, aHerm = transA && aByMinor;
- enum bConj = transB && !bByMinor, bHerm = transB && bByMinor;
-
- static if( aByMinor )
- enum aTransChar = bConj || !transA ? 'T' : 'C';
- else
- enum aTransChar = 'N';
-
- static if( bByMinor )
- enum bTransChar = aConj || !transB ? 'T' : 'C';
- else
- enum bTransChar = 'N';
-
- enum conjugateResult = aConj && bHerm || bConj && aHerm || aConj && bConj;
-
- enum aTemp = aConj && !transB;
- enum bTemp = bConj && !transA;
-
- static if( aTemp || bTemp )
- auto alloc = newRegionAllocator();
-
- static if( aTemp ) {
- auto aData = cast(ElementType*)alloc.allocate( a.rows * a.columns );
- auto aLeading = a.minor;
-
- blas.xgecopyc!'N'( a.minor, a.major, a.cdata, a.leading, aData, aLeading );
- } else {
- auto aData = a.cdata, aLeading = a.leading;
- }
-
- static if( bTemp ) {
- auto bData = cast(ElementType*)alloc.allocate( b.rows * b.columns );
- auto bLeading = b.minor;
-
- blas.xgecopyc!'N'( b.minor, b.major, b.cdata, b.leading, bData, bLeading );
- } else {
- auto bData = b.cdata, bLeading = b.leading;
- }
- } else {
- enum conjugateResult = false;
- auto aData = a.cdata, aLeading = a.leading;
- auto bData = b.cdata, bLeading = b.leading;
-
- enum aTransChar = aByMinor ? 'T' : 'N';
- enum bTransChar = bByMinor ? 'T' : 'N';
- }
-
- static if( !cRowMajor ) {
- static if( !aByMinor )
- size_t m = a.minor, k = a.major;
- else
- size_t m = a.major, k = a.minor;
-
- static if( !bByMinor )
- size_t n = b.major;
- else
- size_t n = b.minor;
-
- if( !beta )
- this.resize( m, n, null );
-
- blas.gemm!( aTransChar, bTransChar )( m, n, k, alpha, aData, aLeading, bData, bLeading, beta, this.data, this.leading );
- } else {
- static if( !bByMinor )
- size_t m = b.minor, k = b.major;
- else
- size_t m = b.major, k = b.minor;
-
- static if( !aByMinor )
- size_t n = a.major;
- else
- size_t n = a.minor;
-
- if( !beta )
- this.resize( m, n, null );
-
- blas.gemm!( bTransChar, aTransChar )( m, n, k, alpha, bData, bLeading, aData, aLeading, beta, this.data, this.leading );
- }
-
- static if( conjugateResult )
- blas.xgecopyc!'N'(m,n,this.data,this.leading,this.data,this.leading);
- }
-}
-
-/** Compute the dot product of a row and a column of possibly transposed matrices. */
-auto rowColumnDot( Transpose transA = Transpose.no, Transpose transB = Transpose.no, A, B )( auto ref A a, size_t i, auto ref B b, size_t j ) {
- static if( !transA && !transB ) {
- return evalDot!( transA, transB )( a.row( i ), b.column( j ) );
- } else static if( !transA && transB ) {
- return evalDot!( transA, transB )( a.row( i ), b.row( j ) );
- } else static if( transA && !transB ) {
- return evalDot!( transA, transB )( a.column( i ), b.column( j ) );
- } else {
- return evalDot!( transA, transB )( b.column( i ), b.row( j ) );
- }
-}
-
-/** Specialized copy for strided vector storage types. */
-void stridedCopy( Transpose tr, Source, Dest )( auto ref Source source, auto ref Dest dest ) if( isStridedVectorStorage!(Source,BaseElementType!Dest) && isStridedVectorStorage!Dest ) {
- dest.resize( source.length, null );
- static if( isComplexScalar!(BaseElementType!Source) && tr ) {
- blas.xcopyc( dest.length, source.cdata, source.stride, dest.data, dest.stride );
- } else
- blas.copy( dest.length, source.cdata, source.stride, dest.data, dest.stride );
-
-}
-
-/** Specialized scaled addition (axpy) for strided vector storage types. */
-void stridedScaledAddition( Transpose tr, Scalar, Source, Dest )( Scalar alpha, ref Source source, auto ref Dest dest ) if( isStridedVectorStorage!(Source,Scalar) && is( Scalar : BaseElementType!Dest ) ) {
- assert( source.length == dest.length, "Length mismatch in strided addition." );
- static if( isComplexScalar!(BaseElementType!Source) && tr )
- blas.xaxpyc( dest.length, alpha, source.cdata, source.stride, dest.data, dest.stride );
- else
- blas.axpy( dest.length, alpha, source.cdata, source.stride, dest.data, dest.stride );
-}
-
-/** Specialized scaling for strided vector storage types. */
-void stridedScaling( Scalar, Dest )( Scalar alpha, auto ref Dest dest ) if( isStridedVectorStorage!Dest && is( Scalar : BaseElementType!Dest ) ) {
- blas.scal( dest.length, alpha, dest.data, dest.stride );
-}
-
-/** Specialized dot for strided vector storage types. */
-auto stridedDot( Transpose transA, A, B )( auto ref A a, auto ref B b ) if( isStridedVectorStorage!(A,BaseElementType!B) && isStridedVectorStorage!B ) {
- assert( a.length == b.length, "Length mismatch in strided dot." );
- static if( !isComplexScalar!(BaseElementType!A) )
- auto r = blas.dot( a.length, a.cdata, a.stride, b.cdata, b.stride );
- else static if( transA )
- auto r = blas.dotc( a.length, a.cdata, a.stride, b.cdata, b.stride );
- else
- auto r = blas.dotu( a.length, a.cdata, a.stride, b.cdata, b.stride );
- return r;
-}
-
-/** Specialized scaling for general matrix storage types. */
-void generalMatrixScaling( StorageOrder order, E )( size_t rows, size_t cols, E alpha, E* data, size_t leading ) {
- static if( order == StorageOrder.RowMajor ) {
- alias rows major;
- alias cols minor;
- } else {
- alias rows minor;
- alias cols major;
- }
-
- if( leading == minor ) {
- // scale everything
- blas.scal( rows * cols, alpha, data, 1 );
- } else {
- // scale by-major
- auto e = data + major * leading;
- for( ; data < e ; data += leading )
- blas.scal( minor, alpha, data, 1 );
- }
-}
-
-/** Specialized copying for general matrix storage types. */
-void generalMatrixCopy( Transpose tr, S, D )( auto ref S source, auto ref D dest ) {
- alias BaseElementType!S T;
- alias transposeStorageOrder!(storageOrderOf!S, tr) srcOrder;
- alias storageOrderOf!D dstOrder;
-
- static if( !isComplexScalar!T ) {
- blas.xgecopy!( (srcOrder == dstOrder) ? 'N' : 'T')(
- dest.rows,
- dest.columns,
- source.cdata,
- source.leading,
- dest.data,
- dest.leading
- );
- } else static if( isComplexScalar!T ) {
- static if( srcOrder == dstOrder ) {
- static if( tr ) {
- blas.xgecopyc!'N'( dest.rows, dest.columns,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- } else {
- blas.xgecopy!'N'( dest.rows, dest.columns,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- }
- } else {
- static if( tr ) {
- blas.xgecopy!'C'( dest.rows, dest.columns,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- } else {
- blas.xgecopy!'T'( dest.rows, dest.columns,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- }
- }
- }
-}
-
-/** Specialized scaled addition (axpy) for general matrix storage types. */
-void generalMatrixScaledAddition( Transpose tr, S, D, E )( T alpha, auto ref S source, auto ref D dest ) {
- alias transposeStorageOrder!(storageOrderOf!S, tr) srcOrder;
- alias storageOrderOf!D dstOrder;
-
- static if( !isComplexScalar!T ) {
- blas.xgeaxpy( (srcOrder == dstOrder) ? 't' : 'n',
- dest.rows, dest.columns, alpha,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- } else static if( isComplexScalar!T ) {
- static if( srcOrder == dstOrder ) {
- static if( tr ) {
- blas.xgeaxpyc( 'n', dest.rows, dest.columns, alpha,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- } else {
- blas.xgeaxpy( 'n', dest.rows, dest.columns, alpha,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- }
- } else {
- static if( tr ) {
- blas.xgeaxpy( 'c', dest.rows, dest.columns, alpha,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- } else {
- blas.xgeaxpy( 't', dest.rows, dest.columns, alpha,
- source.cdata, source.leading,
- dest.data, dest.leading
- );
- }
- }
- }
+import std.traits;
+import scid.ops.expression;
+import scid.vector;
+import scid.matrix;
+
+import std.typecons;
+import std.exception;
+
+import scid.common.traits;
+
+import scid.blas;
+import scid.lapack;
+
+/** Generic complex conjugate that works on both builtin complex numbers and on std.Complex */
+auto gconj( T )( T z ) if( is( T : cdouble ) || is( T : cfloat ) ) {
+ return conj( z );
+}
+/// ditto
+auto gconj( T )( T z ) if( is( T : Complex!double ) || is( T : Complex!float ) ) {
+ return z.conj;
+}
+
+/** Inverses the value of a Transpose value. */
+template transNot( Transpose tr ) { enum transNot = cast(Transpose) !tr; }
+
+/** Performs exclusive or on Transpose values. */
+template transXor( Transpose a, Transpose b ) { enum transXor = cast(Transpose) (a ^ b); }
+
+/** Transposes a closure (only affects RowVector <-> ColumnVector) with a given Transpose value. */
+template transposeClosure( Closure A, Transpose transposed = Transpose.yes ) {
+ static if( transposed ) {
+ static if( A == Closure.RowVector ) enum transposeClosure = Closure.ColumnVector;
+ else static if( A == Closure.ColumnVector ) enum transposeClosure = Closure.RowVector;
+ else enum transposeClosure = A;
+ } else
+ enum transposeClosure = A;
+}
+
+/** Transposes a vector type (Row <-> Column) based on the given Transpose value. */
+template transposeVectorType( VectorType v, Transpose transposed = Transpose.yes ) {
+ static if( transposed ) {
+ static if( v == VectorType.Row ) enum transposeVectorType = VectorType.Column;
+ else static if( v == VectorType.Column ) enum transposeVectorType = VectorType.Row;
+ else enum transposeVectorType = v;
+ } else
+ enum transposeVectorType = v;
+}
+
+/** Transposes a storage order (RowMajor <-> ColumnMajor) based on the given Transpose value. */
+template transposeStorageOrder( StorageOrder S, Transpose transposed = Transpose.yes ) {
+ static if( transposed ) {
+ static if( S == StorageOrder.RowMajor ) enum transposeStorageOrder = StorageOrder.ColumnMajor;
+ else static if( S == StorageOrder.ColumnMajor ) enum transposeStorageOrder = StorageOrder.RowMajor;
+ else enum transposeStorageOrder = S;
+ } else
+ enum transposeStorageOrder = S;
+}
+
+/** Transposes a vector if the given parameter is Transpose.yes. This simply means conjugate its elements if the
+ element type is complex.
+*/
+void vectorTranspose( Transpose trans, V )( ref V vec ) {
+ static if( trans && isComplexScalar!(BaseElementType!V) ) {
+ auto n = vec.length;
+ foreach( i ; 0 .. n )
+ vec[ i ] = gconj( vec[ i ] );
+ }
+}
+
+/** Mixin template that provides scaledAddition(), scale() and dot() specializations for strided vector storage
+ types.
+*/
+mixin template StridedScalingAdditionDot() {
+ import scid.common.traits : isStridedVectorStorage;
+ void scaledAddition( Transpose tr = Transpose.no, S )( ElementType alpha, auto ref S rhs ) if( isStridedVectorStorage!(S, ElementType) ) {
+ stridedScaledAddition!tr( alpha, rhs, this );
+ }
+
+ void scale( ElementType alpha ) {
+ stridedScaling( alpha, this );
+ }
+
+ auto dot( Transpose tr = Transpose.no, Other )( auto ref Other other ) if( isStridedVectorStorage!(Other, ElementType) ) {
+ return stridedDot!tr( this, other );
+ }
+}
+
+/** Mixin template that provides scaledAddition() and scale() for general matrix storage types. */
+mixin template GeneralMatrixScalingAndAddition() {
+ private import scid.lapack;
+ private import scid.internal.regionallocator;
+
+ void scaledAddition( Transpose tr = Transpose.no, S )( ElementType alpha, auto ref S rhs ) if( isGeneralMatrixStorage!(S, ElementType) ) {
+ assert( rhs.rows == this.rows && rhs.columns == this.columns, "Matrix size mismatch in addition." );
+ static assert( isGeneralMatrixStorage!(typeof(this), ElementType) );
+ generalMatrixScaledAddition!tr( alpha, rhs, this );
+ }
+
+ void scale( ElementType alpha ) {
+ static assert( isGeneralMatrixStorage!(typeof(this), ElementType) );
+ generalMatrixScaling!storageOrder( this.rows, this.columns, alpha, this.data, this.leading );
+ }
+
+ void invert() {
+ import scid.lapack;
+ import std.exception;
+
+ if( this.empty )
+ return;
+
+ int info;
+ size_t n = this.rows;
+ assert( n == this.columns, "Inversion of non-square matrix." );
+
+ auto alloc = newRegionAllocator();
+ auto ipiv = alloc.uninitializedArray!( int[] )( n );
+ auto work = alloc.uninitializedArray!( ElementType[] )( n );
+
+ lapack.getrf( n, n, this.data, this.leading, ipiv.ptr, info );
+ lapack.getri( n, this.data, this.leading, ipiv.ptr, work.ptr , work.length, info );
+
+ enforce( info == 0, "Inversion of singular matrix." );
+ }
+
+ void solveRight( Transpose transM = Transpose.no, Side side, Dest )( auto ref Dest dest ) if( isStridedVectorStorage!Dest || isGeneralMatrixStorage!Dest ) {
+ import scid.blas;
+ import scid.lapack;
+ import std.exception;
+
+ if( this.empty && dest.empty )
+ return;
+
+ size_t n = this.rows; // check that the matrix is square
+ assert( n == this.columns, "Inversion of non-square matrix." );
+
+ enum vectorRhs = isStridedVectorStorage!Dest; // if we're solving for a vector
+ auto alloc = newRegionAllocator(); // allocator for all the copies we need to perform
+ auto ipiv = alloc.uninitializedArray!(int[])( n ).ptr; // array to store the permutation matrix of the LU decomposition
+ auto a = alloc.uninitializedArray!(ElementType[])( n * n ).ptr; // array to copy this matrix's data
+ int info = 0; // error number of the LAPACK calls
+
+ // If the storage orders are different and we need to perform a hermitian transMpose then the result is
+ // the same is taking the conjugate without transMposing.
+ static if( !vectorRhs )
+ enum bool conjugateNoTrans = isComplexScalar!ElementType && transM && storageOrder != storageOrderOf!Dest;
+ else
+ enum bool conjugateNoTrans = false;
+
+ // copy this matrix's data (don't transpose, we'll use getrs's transpose for performance)
+ static if( conjugateNoTrans )
+ blas.xgecopyc!'n'( n, n, this.cdata, this.leading, a, n );
+ else
+ blas.xgecopy!'n'( n, n, this.cdata, this.leading, a, n );
+
+ static if( vectorRhs ) {
+ ElementType *b; // if the rhs is a vector, then make it look like a matrix, by defining
+ enum nrhs = 1; // the no. of columns (nrhs) and
+ auto ldb = dest.length; // the leading dimension
+
+ if( dest.stride != 1 ) {
+ // if the destination's stride is != 1, we need to copy it to a temp array.
+ b = alloc.uninitializedArray!(ElementType[])( ldb ).ptr;
+ blas.copy( ldb, dest.cdata, dest.stride, b, 1 );
+ } else {
+ // if the stride is 1, we can simply use dest's data for b
+ b = dest.data;
+ }
+ } else {
+ ElementType *b = dest.data; // if the rhs is a matrix simply grab the parameters from it
+ auto nrhs = dest.columns;
+ auto ldb = dest.leading;
+ }
+
+ // Figure out what kind of transposition we've got (transM == true means it could be hermitian)
+ static if( vectorRhs ) {
+ enum chTrans = transM ? (isComplexScalar!ElementType ? 'C' : 'T') : 'N';
+ } else static if( transposeStorageOrder!(storageOrder, transM) != storageOrderOf!Dest ) {
+ static if( !isComplexScalar!ElementType || conjugateNoTrans )
+ enum chTrans = 'T';
+ else
+ enum chTrans = 'C';
+ } else {
+ enum chTrans = 'N';
+ }
+
+ enum chSide = (side == Side.Left) ? 'L' : 'R';
+
+ lapack.getrf( n, n, a, n, ipiv, info ); // perform LU decomposition
+ enforce( info == 0, "Inversion of singular matrix." );
+ lapack.xgetrs!(chTrans, chSide)( n, nrhs, a, n, ipiv, b, ldb, info ); // perform inv-mult
+
+ static if( vectorRhs ) {
+ // copy the data back to dest if needed
+ if( dest.stride != 1 )
+ blas.copy( ldb, b, 1, dest.data, dest.stride );
+ }
+ }
+
+ void matrixProduct( Transpose transA = Transpose.no, Transpose transB = Transpose.no, A, B )
+ ( ElementType alpha, auto ref A a, auto ref B b, ElementType beta ) if( isGeneralMatrixStorage!A && isGeneralMatrixStorage!B ) {
+ import scid.blas;
+
+ // Are the matrices row major?
+ enum aRowMajor = storageOrderOf!A == StorageOrder.RowMajor;
+ enum bRowMajor = storageOrderOf!B == StorageOrder.RowMajor;
+ enum cRowMajor = storageOrder == StorageOrder.RowMajor;
+
+ // Should a and be read minor-first or major-first
+ enum aByMinor = transA ^ aRowMajor ^ cRowMajor;
+ enum bByMinor = transB ^ bRowMajor ^ cRowMajor;
+
+ // Do the matrices have complex elements?
+ enum complexElements = isComplexScalar!ElementType;
+
+ static if( complexElements ) {
+ enum aConj = transA && !aByMinor, aHerm = transA && aByMinor;
+ enum bConj = transB && !bByMinor, bHerm = transB && bByMinor;
+
+ static if( aByMinor )
+ enum aTransChar = bConj || !transA ? 'T' : 'C';
+ else
+ enum aTransChar = 'N';
+
+ static if( bByMinor )
+ enum bTransChar = aConj || !transB ? 'T' : 'C';
+ else
+ enum bTransChar = 'N';
+
+ enum conjugateResult = aConj && bHerm || bConj && aHerm || aConj && bConj;
+
+ enum aTemp = aConj && !transB;
+ enum bTemp = bConj && !transA;
+
+ static if( aTemp || bTemp )
+ auto alloc = newRegionAllocator();
+
+ static if( aTemp ) {
+ auto aData = cast(ElementType*)alloc.allocate( a.rows * a.columns );
+ auto aLeading = a.minor;
+
+ blas.xgecopyc!'N'( a.minor, a.major, a.cdata, a.leading, aData, aLeading );
+ } else {
+ auto aData = a.cdata, aLeading = a.leading;
+ }