Skip to content

Commit

Permalink
fix eigensolver
Browse files Browse the repository at this point in the history
  • Loading branch information
jcosborn committed Sep 5, 2023
1 parent bc25510 commit 7af929d
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 9 deletions.
5 changes: 5 additions & 0 deletions src/base/stdUtils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ proc `-`*[T1,T2](x: seq[T1], y: openArray[T2]): auto {.inline,noInit.} =
r[i] = x[i] - y[i]
r

proc `:=`*[T](r: var openArray[T], x: openArray[T]) {.inline.} =
let n = r.len
for i in 0..<n:
r[i] := x[i]

proc `+=`*[T](r: var openArray[T], x: openArray[T]) {.inline.} =
let n = r.len
for i in 0..<n:
Expand Down
25 changes: 18 additions & 7 deletions src/eigens/linalgFuncs.nim
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ template dvec_alloc*(x: var dvec, n: int) =
x.isSub = false
proc newDvec*(n: int): dvec =
result.dvec_alloc(n)
template `&`*(x: dvec): untyped = cast[ptr carray[float]](addr x[0])
template `&`*(x: dvec): auto = cast[ptr carray[float]](addr x[0])
template `&`*(x: ptr carray[float]): auto = addr x[][0]
template vec_size(x: dvec): int = x.len
template `[]`*(x: dvec, i: int): untyped = x.dat[i]
template `[]=`*(x: dvec, i: int, y: untyped): untyped = x.dat[i] = y
Expand Down Expand Up @@ -147,6 +148,7 @@ proc zewrk*(n: int): ptr zheevTmp =
w.rwork = cast[ptr cdouble](alloc(3*n*sizeof(cdouble)))
return addr(w)

template `&`(x: zmat): auto = cast[ptr float](addr(x.dat[0]))
proc zeigs*(m: ptr float64; d: ptr float64; n: int) =
var info: fint
var t = zewrk(n)
Expand All @@ -155,6 +157,8 @@ proc zeigs*(m: ptr float64; d: ptr float64; n: int) =
var mm = cast[ptr dcomplex](m)
zheev("V", "L", an, mm, an, d, t.work, addr(t.lwork), t.rwork, addr(info))

proc zeigs*(m: zmat; d: dvec; n: int) = zeigs(&m, & &d, n)

type
zgeev_tmp* = object
work*: ptr dcomplex
Expand Down Expand Up @@ -520,17 +524,24 @@ when isMainModule:
]#

proc testSvdbd(n: int) =
var v = newSeq[float](n)
var d = newSeq[float](n)
var e = newSeq[float](n)
#var v = newSeq[float](n)
#var d = newSeq[float](n)
#var e = newSeq[float](n)
var v = newDvec(n)
var d = newDvec(n)
var e = newDvec(n)
template `&`(x: seq): untyped = cast[ptr carray[float]](addr x[0])
for i in 0..<n:
d[i] = (i+1).float
e[i] = -(i+1).float
svdbi(&v, &d, &e, n)
svdbi(&v, &d, &e, n)
template svdb(ev,a,b,k:auto) = svdbi(&ev,&a,&b,k)
#template svdb(ev,a,b,k:auto) = svdbi1(&ev,a,b,k)
#template svdb(ev,a,b,k:auto) = svdbi(&ev,a,b,k,k)
#template svdb(ev,a,b,k:auto) = dsvdbi2(a,b,k)
svdb(v, d, e, n)
svdb(v, d, e, n)
let t0 = epochTime()
svdbi(&v, &d, &e, n)
svdb(v, d, e, n)
let t1 = epochTime()
#for i in 0..<n:
# echo i, ": ", v[i]
Expand Down
4 changes: 2 additions & 2 deletions src/eigens/svdbi4.nim
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,12 @@ proc svdBi4*(e: any; m: dmat; ma: dmat; a: any; b: any; k: int;
getu(a, b, v[i], va[i])
var rsq = checksv(v[i], va[i], a, b, ev)
#echo i, ": ", e[i], " ", rsq
if ig > 0:
if ig > -1:
#printf0("svdvec %3i %3i %5i %13g %13g %13g %13g\x0A",
# i, ib, ig, ev, x, rsq, dvec_get(tg, i0))
echo "svdvec $# $# $# $# $# $# $#" %
[$i, $ib, $ig, $ev, $x, $rsq, $tg[i0]]
if ig > 10 or rsq < rsqstop:
if ig >= k-1 or rsq < rsqstop:
if ig == 0 and x < dotstop: ib = i
break
if ig == 0:
Expand Down
58 changes: 58 additions & 0 deletions src/physics/stagD.nim
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,39 @@ template stagDM*(sd:StaggeredD; r:Field; g:openArray[Field2];
#threadBarrier()
toc("boundaryB")

# modified D: Df + Db
template stagDFBX*(sd:StaggeredD; r:Field; g:openArray[Field2];
x:Field3; expFlops:int; exp:untyped) =
tic()
for mu in 0..<g.len:
startSB(sd.sf[mu], x[ix])
toc("startShiftF")
for mu in 0..<g.len:
startSB(sd.sb[mu], g[mu][ix].adj*x[ix])
toc("startShiftB")
block:
for irr in r[sd.subset]:
var ir{.inject.} = irr
var rir{.inject,noInit.}:evalType(load1(r[ir]))
exp
for mu in 0..<g.len:
localSB(sd.sf[mu], ir, imadd(rir, g[mu][ir], it), load1(x[ix]))
for mu in 0..<g.len:
localSB(sd.sb[mu], ir, iadd(rir, it), g[mu][ix].adj*x[ix])
assign(r[ir], rir)
toc("local", flops=(expFlops+g.len*(72+66+6))*sd.subset.len)
for mu in 0..<g.len:
template f(ir2,it: untyped): untyped =
imadd(r[ir2], g[mu][ir2], it)
boundarySB2(sd.sf[mu], f)
toc("boundaryF")
for mu in 0..<g.len:
template f(ir2,it: untyped): untyped =
iadd(r[ir2], it)
boundarySB2(sd.sb[mu], f)
#threadBarrier()
toc("boundaryB")

# r = a*r + b*x + (2D)*x
proc stagD2*(sd:StaggeredD; r:SomeField; g:openArray[Field2];
x:Field3; a:SomeNumber; b:SomeNumber2) =
Expand Down Expand Up @@ -393,6 +426,31 @@ proc stagD2ee*(sde,sdo:StaggeredD; r:Field; g:openArray[Field2];
# msubVSVV(r[ir], m2, x[ir], r[ir])
#r[sde.sub] := 0.25*r

# r = m2 + Deo * Doe
# modified D: Df + Db
proc stagD2eeFB*(sde,sdo:StaggeredD; r:Field; g:openArray[Field2];
x:Field; m2:SomeNumber) =
tic()
var t{.global.}:evalType(x)
if t==nil:
threadBarrier()
if threadNum==0:
t = newOneOf(x)
threadBarrier()
toc("stagD2ee init")
block:
stagDFBX(sdo, t, g, x, 0):
rir := 0
toc("stagD2ee DP")
threadBarrier()
#t := -t
threadBarrier()
toc("stagD2ee barrier")
block:
stagDFBX(sde, r, g, t, 6):
rir := (4.0*m2)*x[ir]
toc("stagD2ee DM")

#[
# r = m2 - Deo * Doe
proc stagD2eeN*(sde,sdo:StaggeredD; r:Field; g:openArray[Field2];
Expand Down

0 comments on commit 7af929d

Please sign in to comment.