Skip to content

Commit

Permalink
fix eigensolver
Browse files Browse the repository at this point in the history
fix omp pragmas with debug line info
  • Loading branch information
jcosborn committed Feb 14, 2024
1 parent 756b49f commit 947147b
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 90 deletions.
6 changes: 4 additions & 2 deletions src/base/omp.nim
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ else:
#proc forceOmpOn() {.omp.}
template ompPragma(p:string):untyped =
#forceOmpOn()
{. emit:["#pragma omp ", p] .}
#{. emit:["#pragma omp ", p] .}
{. emit:["_Pragma(\"omp ", p, "\");"] .}
template ompBlock*(p:string; body:untyped):untyped =
#{. emit:"#pragma omp " & p .}
#{. emit:"{ /* Inserted by ompBlock " & p & " */".}
{. emit:["#pragma omp ", p] .}
#{. emit:["#pragma omp ", p] .}
ompPragma(p)
block:
body
#{. emit:"} /* End ompBlock " & p & " */".}
Expand Down
6 changes: 6 additions & 0 deletions src/bench/benchLinalg.nim
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ proc test(lat:auto) =
for e in v2:
v2[e] += m1[e] * row(m2[e],0)

bench(mvf, mb+2*vb, mb+2*vb):
for e in v2:
var t {.noInit.}: evalType(v1[e])
perm(t, 1, v1[e])
v2[e] := m1[e] * t

bench(nc*mvf, 3*mb, 3*mb):
m3 := m1 * m2

Expand Down
40 changes: 20 additions & 20 deletions src/eigens/hisqev.nim
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ proc sort[T](t: var seq[EigTable[T]], a=0, bb= -1) =
jmin = j
if jmin!=i: swap(t[i], t[jmin])

proc sett[T](t: var EigTable[T], op: any) =
proc sett[T](t: var EigTable[T], op: auto) =
mixin `:=`
t.vn = t.v.even.norm2
if t.vn==0:
Expand All @@ -58,7 +58,7 @@ proc sett[T](t: var EigTable[T], op: any) =
t.converged = false
t.locked = false

proc maketable[T,U](t: var seq[EigTable[T]], v: var seq[U], op: any) =
proc maketable[T,U](t: var seq[EigTable[T]], v: var seq[U], op: auto) =
for i in 0..<v.len:
let tv = t[i].v
t[i].v = v[i]
Expand All @@ -67,7 +67,7 @@ proc maketable[T,U](t: var seq[EigTable[T]], v: var seq[U], op: any) =
#echo i, " ", t[i].sv
sort(t)

proc setterr(t: var EigTable, op: any) =
proc setterr(t: var EigTable, op: auto) =
mixin `:=`
let Dv = op.newVector
op.apply(Dv.odd, t.v.even)
Expand All @@ -78,16 +78,16 @@ proc setterr(t: var EigTable, op: any) =
let r2 = Dv.even.norm2/t.vn
t.err = abs(t.sv - sqrt(abs(t.sv*t.sv-sqrt(r2))))

proc geterr(v: var seq[EigTable], op: any) =
proc geterr(v: var seq[EigTable], op: auto) =
for i in 0..<v.len:
setterr(v[i], op)

# x = x - (<y.x>/<y.y>) y
proc projectOutV(x: any, y: EigTable) =
proc projectOutV(x: auto, y: EigTable) =
let c = y.v.even.dot(x)/y.vn
x.even -= c*y.v

proc projectLocked(t: any, v: any) =
proc projectLocked(t: auto, v: auto) =
for i in 0..<t.len:
if not t[i].locked: break
for j in 0..<v.len:
Expand All @@ -97,7 +97,7 @@ proc projectOut(x,y: EigTable) =
let c = y.v.even.dot(x.v)/y.vn
x.v.even -= c*y.v

proc ortho1(t: var any; imn,imx: int, op: var LinOp) =
proc ortho1(t: var auto; imn,imx: int, op: var LinOp) =
threads(t):
for j in 0..<imn:
for i in imn..imx:
Expand All @@ -111,7 +111,7 @@ proc ortho1(t: var any; imn,imx: int, op: var LinOp) =
t[i].projectOut(t[j])
sett(t[i], op)

proc ortho2(t: var any; imn,imx: int, op: LinOp) =
proc ortho2(t: var auto; imn,imx: int, op: LinOp) =
for i in imn..imx:
var i0 = i
for j in (i+1)..imx:
Expand All @@ -130,7 +130,7 @@ proc ortho2(t: var any; imn,imx: int, op: LinOp) =
sett(t[j], op)

# proc orthonormalize(v, vg, "even")
proc rayleighRitz2(t: var any, a,b: int) =
proc rayleighRitz2(t: var auto, a,b: int) =
mixin dot
let n = b-a+1
let n2 = n*n
Expand Down Expand Up @@ -159,7 +159,7 @@ proc rayleighRitz2(t: var any, a,b: int) =
#echo t[i].vn
t[a+i].sv = sqrt(ev[i])

proc rayleighRitz(t: var any, a,b: int, op: any) =
proc rayleighRitz(t: var auto, a,b: int, op: auto) =
mixin adj
tic()
mixin dot, simdSum, rankSum, `:=`
Expand Down Expand Up @@ -261,7 +261,7 @@ proc sortdown(t: var seq[EigTable], n0,n: int): int =
imin
template sortdown(t: var seq[EigTable], n: int): int = sortdown(t, 0, n)

proc merge2(t1: var seq[EigTable], t2: seq[EigTable], rrbs: int, op: any) =
proc merge2(t1: var seq[EigTable], t2: seq[EigTable], rrbs: int, op: auto) =
for i in range(t2.len):
t1.add t2[i]

Expand All @@ -286,7 +286,7 @@ proc merget(vt1: var seq[EigTable]; vt2: var seq[EigTable];
ng,nmx,rrbs: int, op: var LinOp) =
tic()
var nt = vt1.len + vt2.len
var ngd = min(ng,nt)
#var ngd = min(ng,nt)
var nmax = min(nmx,nt)
var t = newSeq[type(vt1[0])](nt)
var i1,i2: int
Expand Down Expand Up @@ -318,7 +318,7 @@ proc merget(vt1: var seq[EigTable]; vt2: var seq[EigTable];
while rrMin < ng:
tic()
rrMax = rrMin + rrLen - 1
var rrMinNext = (rrMin + rrMax + 1) div 2
#var rrMinNext = (rrMin + rrMax + 1) div 2
if rrMax >= nt: rrMax = nt-1

echo "rrMin: ", rrMin, " rrMax: ", rrMax
Expand Down Expand Up @@ -362,7 +362,7 @@ proc merget(vt1: var seq[EigTable]; vt2: var seq[EigTable];
for i in 0..<vt1.len: vt1[i] = t[i]
toc("merge end")

proc svd(op: any, src: any, v: var any, sits: int, emin,emax: float): int =
proc svd(op: auto, src: auto, v: var auto, sits: int, emin,emax: float): int =
let n = v.len
var sv = newSeq[float](n)
var qv = newSeq[type(v[0].even)](n)
Expand Down Expand Up @@ -403,7 +403,7 @@ proc initOpts*(eo: var EigOpts) =
## op: operator object
## opts: options onject
## vv: seq of vectors
proc hisqev*(op: var LinOp, opts: any, vv: any): auto =
proc hisqev*(op: var LinOp, opts: auto, vv: auto): auto =
let ng = opts.nev
let nvt = opts.nvecs
let relerr = opts.relerr
Expand Down Expand Up @@ -542,7 +542,7 @@ proc hisqev*(op: var LinOp, opts: any, vv: any): auto =
echo "total time = $1 seconds"%[t1|-6]
vt1

proc hisqev*(op: var LinOp, opts: any): auto =
proc hisqev*(op: var LinOp, opts: auto): auto =
var v = newSeq[type(op.newVector)](0)
result = hisqev(op, opts, v)

Expand Down Expand Up @@ -591,7 +591,7 @@ when isMainModule:
r: type(rs)
lo: type(lo)
var op = MyOp(r:rs,s:s,lo:lo)
template rand(op: MyOp, v: any) =
template rand(op: MyOp, v: auto) =
gaussian(v, op.r)
template newVector(op: MyOp): untyped =
op.lo.ColorVector()
Expand Down Expand Up @@ -643,11 +643,11 @@ when isMainModule:
if myRank==0:
src{0}[0] := 1

proc getResid(r: any, d,s: any) =
proc getResid(r: auto, d,s: auto) =
apply(op, r.odd, d.even)
applyAdj(op, t.even, r.odd)
r.even := s - 4.0*(t + m2*d)
proc rsolve(dt: any, sc: any, m: float, sp: var SolverParams) =
proc rsolve(dt: auto, sc: auto, m: float, sp: var SolverParams) =
getResid(t2, dt, sc)
let s1 = sc.even.norm2
let s2 = t2.even.norm2
Expand Down Expand Up @@ -703,7 +703,7 @@ when isMainModule:

#[
var tv = op.newVector
proc unc(vs: any) =
proc unc(vs: auto) =
let n = vs.len
var vt = newSeq[EigTable[type(vs[0])]](n)
for i in 0..<n:
Expand Down
74 changes: 47 additions & 27 deletions src/eigens/linalgFuncs.nim
Original file line number Diff line number Diff line change
Expand Up @@ -33,21 +33,21 @@ proc newDvec*(n: int): dvec =
result.dvec_alloc(n)
template `&`*(x: dvec): auto = cast[ptr carray[float]](addr x[0])
template `&`*(x: ptr carray[float]): auto = addr x[][0]
template vec_size(x: dvec): int = x.len
#template vec_size(x: dvec): int = x.len
template `[]`*(x: dvec, i: int): untyped = x.dat[i]
template `[]=`*(x: dvec, i: int, y: untyped): untyped = x.dat[i] = y
template dvec_get*(x: dvec, i: int): float64 = x[i]
template dvec_set*(x: dvec, i: int, y: float64) = x[i] = y
template dsubvec(x: dvec, y: dvec, i,n: int) =
x.new
x.dat = cast[type(x.dat)](addr(y[i]))
x.`len` = n
x.stride = 1
x.isSub = true
template v_eq_zero(x: dvec) =
for i in 0..<x.len: x[i] = 0.0
template v_eq_v(x,y: dvec) =
for i in 0..<x.len: x[i] = y[i]
#template dsubvec(x: dvec, y: dvec, i,n: int) =
# x.new
# x.dat = cast[type(x.dat)](addr(y[i]))
# x.`len` = n
# x.stride = 1
# x.isSub = true
#template v_eq_zero(x: dvec) =
# for i in 0..<x.len: x[i] = 0.0
#template v_eq_v(x,y: dvec) =
# for i in 0..<x.len: x[i] = y[i]
template norm2_v(x: dvec): float =
var r = 0.0
for i in 0..<x.len: r += x[i]*x[i]
Expand All @@ -56,12 +56,12 @@ template dot*(x,y: dvec): float =
var r = 0.0
for i in 0..<x.len: r += x[i]*y[i]
r
template v_eq_r_times_v(x: dvec, r: float, y: dvec) =
for i in 0..<x.len: x[i] = r*y[i]
#template v_eq_r_times_v(x: dvec, r: float, y: dvec) =
# for i in 0..<x.len: x[i] = r*y[i]
template v_peq_r_times_v(x: dvec, r: float, y: dvec) =
for i in 0..<x.len: x[i] += r*y[i]
template v_meq_r_times_v(x: dvec, r: float, y: dvec) =
for i in 0..<x.len: x[i] -= r*y[i]
#template v_meq_r_times_v(x: dvec, r: float, y: dvec) =
# for i in 0..<x.len: x[i] -= r*y[i]
template daxpy*(a: float, x: dvec, y: dvec) = v_peq_r_times_v(y, a, x)
proc normalize*(x: dvec) =
let n2 = norm2_v(x)
Expand All @@ -81,6 +81,7 @@ template dmat_alloc*(x: dmat, nr,nc: int) =
x.isSub = false
proc newDmat*(nr,nc: int): dmat =
result.dmat_alloc(nr, nc)
#template `&`(x: dmat): auto = cast[ptr float](addr(x.dat[0]))

proc zmat_free(x: zmat) =
if not x.isSub:
Expand All @@ -103,7 +104,7 @@ template `[]`*(x: dmat|zmat, i,j: int): untyped = x.dat[i+j*x.nrows]
template `[]=`*(x: dmat|zmat, i,j: int, y: untyped): untyped =
x.dat[i+j*x.nrows] = y
template dmat_get*(x: dmat, i,j: int): float64 = x[i,j]
template dmat_set(x: dmat, i,j: int, y: float64) = x[i,j] = y
#template dmat_set(x: dmat, i,j: int, y: float64) = x[i,j] = y
template norm2*(x: dmat): float =
var r = 0.0
for i in 0..<x.nrows:
Expand Down Expand Up @@ -215,8 +216,8 @@ proc zgeigsv*(m: ptr zmat; d: ptr zvec; vl: ptr zmat; vr: ptr zmat; n: cint) =
# A x = lambda B x
proc zeigsgv*(a: ptr float64; b: ptr float64; e: ptr float64; n: int) =
var itype = 1.fint
var jobz = "V"
var uplo = "L"
var jobz = cstring "V"
var uplo = cstring "L"
var nn = fint(n)
var an = addr nn
#var aa = cast[ptr dcomplex](a)
Expand Down Expand Up @@ -285,8 +286,8 @@ proc svdbi*(ev: ptr carray[float64], a: ptr carray[float64],

proc svdbi*(ev: ptr carray[float64], a: ptr carray[float64],
b: ptr carray[float64], n,nv: int) =
var rnge = "I"
var order = "E"
var rnge = cstring "I"
var order = cstring "E"
var nn = fint(2*n)
var vl = 0.0
var vu = 9e99
Expand Down Expand Up @@ -327,7 +328,7 @@ proc svdbi*(ev: ptr carray[float64], a: ptr carray[float64],

proc svdBidiag1*(d: ptr carray[float64], e: ptr carray[float64],
v: ptr carray[float64], u: ptr carray[float64], n,k: int) =
let uplo = "U"
let uplo = cstring "U"
var nn = fint(n)
var an = addr nn
#var nv = ffint(k)
Expand Down Expand Up @@ -366,8 +367,8 @@ proc svdBidiag1*(d: ptr carray[float64], e: ptr carray[float64],

proc svdBidiag*(d: ptr carray[float64], e: ptr carray[float64],
v: ptr carray[float64], u: ptr carray[float64], n,k: int) =
let uplo = "U"
let compq = "I"
let uplo = cstring "U"
let compq = cstring "I"
var nn = fint(n)
var work = cast[ptr float64](alloc((3*n*n+4*n)*sizeof(float64)))
var iwork = cast[ptr fint](alloc(2*8*n*sizeof(fint)))
Expand Down Expand Up @@ -401,9 +402,9 @@ proc svdBidiag*(d: ptr carray[float64], e: ptr carray[float64],
proc svdBidiag3*(d: ptr carray[float64], e: ptr carray[float64],
v: ptr carray[float64], u: ptr carray[float64], n,k: int) =
let nv = fint(max(k,1))
let uplo = "U"
let jobz = if k>0: "V" else: "N"
let rnge = if k>0: "I" else: "A"
let uplo = cstring "U"
let jobz = cstring(if k>0: "V" else: "N")
let rnge = cstring(if k>0: "I" else: "A")
var nn = fint(n)
var vl = 0.0
var vu = 1e99
Expand Down Expand Up @@ -437,7 +438,7 @@ proc svdBidiag3*(d: ptr carray[float64], e: ptr carray[float64],
if not u.isNil:
for i in 0..<k:
#let ii = 2*n*(k-1-i)
let ii = 2*n*i
let ii = 2*n*(k-1-i)
for j in 0..<n:
u[i*n+j] = z[ii+j]
v[i*n+j] = z[ii+n+j]
Expand All @@ -453,6 +454,25 @@ proc svdBidiag3*(d: ptr carray[float64], e: ptr carray[float64],
dealloc iwork
#echo "here2"

## svd of bidiagonal matrix
## ev: singular values
## m: A ma = e m
## k: number of singular values wanted
proc svdBi3*(ev: auto; m: dmat; ma: dmat; a: auto; b: auto; k: int;
nx: int; nax: int; emin: float64; emax: float64): int =
let n = a.len;
var d = cast[ptr carray[float64]](alloc(n*sizeof(float64)))
var e = cast[ptr carray[float64]](alloc(n*sizeof(float64)))
for i in 0..<n:
d[i] = a[i]
if i<n-1: e[i] = b[i]
svdBidiag3(d, e, m.dat, ma.dat, n, k)
for i in 0..<k:
ev[i] = d[i]
dealloc(d)
dealloc(e)
result = k

proc dsvdbi2*(a: dvec, b: dvec, n: int) =
var d = cast[ptr carray[float64]](alloc(n*sizeof(float64)))
var e = cast[ptr carray[float64]](alloc(n*sizeof(float64)))
Expand Down
Loading

0 comments on commit 947147b

Please sign in to comment.