Skip to content
This repository has been archived by the owner on Nov 24, 2018. It is now read-only.

Commit

Permalink
native: cleanup Dlasy2 for 2x2 case
Browse files Browse the repository at this point in the history
  • Loading branch information
vladimir-ch committed Jun 9, 2016
1 parent 4fc6a17 commit 18eb973
Showing 1 changed file with 59 additions and 55 deletions.
114 changes: 59 additions & 55 deletions native/dlasy2.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,86 +174,90 @@ func (impl Implementation) Dlasy2(tranl, tranr bool, isgn, n1, n2 int, tl []floa
smin = math.Max(smin, math.Max(math.Abs(tl[ldtl]), math.Abs(tl[ldtl+1])))
smin = math.Max(eps*smin, smlnum)

var t16 [16]float64 // t16 is used as a 4×4 row-major matrix.
t16[0*4+0] = tl[0] + sgn*tr[0]
t16[1*4+1] = tl[ldtl+1] + sgn*tr[0]
t16[2*4+2] = tl[0] + sgn*tr[ldtr+1]
t16[3*4+3] = tl[ldtl+1] + sgn*tr[ldtr+1]
var t [4][4]float64
t[0][0] = tl[0] + sgn*tr[0]
t[1][1] = tl[0] + sgn*tr[ldtr+1]
t[2][2] = tl[ldtl+1] + sgn*tr[0]
t[3][3] = tl[ldtl+1] + sgn*tr[ldtr+1]
if tranl {
t16[0*4+1] = tl[ldtl]
t16[1*4+0] = tl[1]
t16[2*4+3] = tl[ldtl]
t16[3*4+2] = tl[1]
t[0][2] = tl[ldtl]
t[1][3] = tl[ldtl]
t[2][0] = tl[1]
t[3][1] = tl[1]
} else {
t16[0*4+1] = tl[1]
t16[1*4+0] = tl[ldtl]
t16[2*4+3] = tl[1]
t16[3*4+2] = tl[ldtl]
t[0][2] = tl[1]
t[1][3] = tl[1]
t[2][0] = tl[ldtl]
t[3][1] = tl[ldtl]
}
if tranr {
t16[0*4+2] = sgn * tr[1]
t16[1*4+3] = sgn * tr[1]
t16[2*4+0] = sgn * tr[ldtr]
t16[3*4+1] = sgn * tr[ldtr]
t[0][1] = sgn * tr[1]
t[1][0] = sgn * tr[ldtr]
t[2][3] = sgn * tr[1]
t[3][2] = sgn * tr[ldtr]
} else {
t16[0*4+2] = sgn * tr[ldtr]
t16[1*4+3] = sgn * tr[ldtr]
t16[2*4+0] = sgn * tr[1]
t16[3*4+1] = sgn * tr[1]
t[0][1] = sgn * tr[ldtr]
t[1][0] = sgn * tr[1]
t[2][3] = sgn * tr[ldtr]
t[3][2] = sgn * tr[1]
}

var btmp [4]float64
btmp[0] = b[0]
btmp[1] = b[ldb]
btmp[2] = b[1]
btmp[1] = b[1]
btmp[2] = b[ldb]
btmp[3] = b[ldb+1]

// Perform elimination.
var jpiv [4]int
var jpiv [4]int // jpiv records any column swaps for pivoting.
for i := 0; i < 3; i++ {
var (
xmax float64
ipsv, jpsv int
)
for ip := i; ip < 4; ip++ {
for jp := i; jp < 4; jp++ {
if math.Abs(t16[ip*4+jp]) >= xmax {
xmax = math.Abs(t16[ip*4+jp])
if math.Abs(t[ip][jp]) >= xmax {
xmax = math.Abs(t[ip][jp])
ipsv = ip
jpsv = jp
}
}
}
if ipsv != i {
// Swap rows ipsv and i.
bi.Dswap(4, t16[ipsv*4:], 1, t16[i*4:], 1)
// The pivot is not in the top row of the unprocessed
// block, swap rows ipsv and i of t and btmp.
t[ipsv], t[i] = t[i], t[ipsv]
btmp[ipsv], btmp[i] = btmp[i], btmp[ipsv]
}
if jpsv != i {
// Swap columns jpsv and i.
bi.Dswap(4, t16[jpsv:], 4, t16[i:], 4)
// The pivot is not in the left column of the
// unprocessed block, swap columns jpsv and i of t.
for k := 0; k < 4; k++ {
t[k][jpsv], t[k][i] = t[k][i], t[k][jpsv]
}
}
jpiv[i] = jpsv
if math.Abs(t16[i*4+i]) < smin {
if math.Abs(t[i][i]) < smin {
ok = false
t16[i*4+i] = smin
t[i][i] = smin
}
for j := i + 1; j < 4; j++ {
t16[j*4+i] /= t16[i*4+i]
btmp[j] -= t16[j*4+i] * btmp[i]
for k := i + 1; k < 4; k++ {
t16[j*4+k] -= t16[j*4+i] * t16[i*4+k]
for k := i + 1; k < 4; k++ {
t[k][i] /= t[i][i]
btmp[k] -= t[k][i] * btmp[i]
for j := i + 1; j < 4; j++ {
t[k][j] -= t[k][i] * t[i][j]
}
}
}
if math.Abs(t16[3*4+3]) < smin {
t16[3*4+3] = smin
if math.Abs(t[3][3]) < smin {
t[3][3] = smin
}
scale = 1
if 8*smlnum*math.Abs(btmp[0]) > math.Abs(t16[0*4+0]) ||
8*smlnum*math.Abs(btmp[1]) > math.Abs(t16[1*4+1]) ||
8*smlnum*math.Abs(btmp[2]) > math.Abs(t16[2*4+2]) ||
8*smlnum*math.Abs(btmp[3]) > math.Abs(t16[3*4+3]) {
if 8*smlnum*math.Abs(btmp[0]) > math.Abs(t[0][0]) ||
8*smlnum*math.Abs(btmp[1]) > math.Abs(t[1][1]) ||
8*smlnum*math.Abs(btmp[2]) > math.Abs(t[2][2]) ||
8*smlnum*math.Abs(btmp[3]) > math.Abs(t[3][3]) {

maxbtmp := math.Max(math.Abs(btmp[0]), math.Abs(btmp[1]))
maxbtmp = math.Max(maxbtmp, math.Max(math.Abs(btmp[2]), math.Abs(btmp[3])))
Expand All @@ -263,24 +267,24 @@ func (impl Implementation) Dlasy2(tranl, tranr bool, isgn, n1, n2 int, tl []floa
btmp[2] *= scale
btmp[3] *= scale
}
// Compute the solution of the upper triangular system t * tmp = btmp.
var tmp [4]float64
for i := 0; i < 4; i++ {
k := 3 - i
temp := 1 / t16[k*4+k]
tmp[k] = btmp[k] * temp
for j := k + 1; j < 4; j++ {
tmp[k] -= temp * t16[k*4+j] * tmp[j]
for i := 3; i >= 0; i-- {
temp := 1 / t[i][i]
tmp[i] = btmp[i] * temp
for j := i + 1; j < 4; j++ {
tmp[i] -= temp * t[i][j] * tmp[j]
}
}
for i := 0; i < 3; i++ {
if jpiv[2-i] != 2-i {
tmp[2-i], tmp[jpiv[2-i]] = tmp[jpiv[2-i]], tmp[2-i]
for i := 2; i >= 0; i-- {
if jpiv[i] != i {
tmp[i], tmp[jpiv[i]] = tmp[jpiv[i]], tmp[i]
}
}
x[0] = tmp[0]
x[ldx] = tmp[1]
x[1] = tmp[2]
x[1] = tmp[1]
x[ldx] = tmp[2]
x[ldx+1] = tmp[3]
xnorm = math.Max(math.Abs(tmp[0])+math.Abs(tmp[2]), math.Abs(tmp[1])+math.Abs(tmp[3]))
xnorm = math.Max(math.Abs(tmp[0])+math.Abs(tmp[1]), math.Abs(tmp[2])+math.Abs(tmp[3]))
return scale, xnorm, ok
}

0 comments on commit 18eb973

Please sign in to comment.