substitutions from PR:
    rov -> rv
    Au -> a_u
    Ad -> a_d
    nomw -> ww
    taut -> tau_t
    Au_new -> a_u_new

| rv = r0^T v                           <- dot
| alpha = rho / rv
| u_mp1 = u_m - alpha * v
sigma = (theta^2 / alpha) * eta;
w = w - alpha * a_u
d = pu_m + sigma * d
a_d = a_u + sigma * a_d
ww = w^T w                              <- dot
theta = ww / tau_t
c_mp1 = 1 / (1 + theta)
tau_t = tau_t * sqrt(theta) * c_mp1
eta = c_mp1^2 * alpha
x = x + eta * d
r = r - eta * a_d
tau = r^T r                             <- dot
***                                     <- convergence check
| rho = r0^T w                          <- dot
| beta = rho / rho_old
| u_mp1 = w + beta * u_m
| rho_old = rho
pu_m = m^-1 * u_mp1                     <- apply
a_u_new = a * pu_m                      <- apply
| v = a_u_new + beta * (a_u + beta * v)
a_u = a_u_new
u_m = u_mp1


(lines starting with | are executed only on even iterations)


----------------- end original implementation --------------------------

Transformations done:
    -   unroll 2 iterations
    -   resolve if statements
    -   anotate variables
    -   point out obivous problems with code

Variable annotations:
[name] - "local" variable (not passed through iterations)
{name} - "private" variable (used only within a single merged kernel)
$name$ - only needed for convergence check
NAME   - constant variable

Comment types:
OPT    - possible optimization
ERROR  - something looks wrong
NOTE   - somthing seems odd

[rv] = R0^T v                           <- dot
[alpha] = rho / [rv]                                            ! OPT  : here rho = rho_old, s/rho/rho_old makes rho local
[u_mp1] = u_m - [alpha] * v                                     ! ERROR: the result is never used
{sigma} = (theta^2 / [alpha]) * eta;                            ! OPT  : sigma can be stored in registers only
w = w - [alpha] * a_u
d = pu_m + {sigma} * d
a_d = a_u + {sigma} * a_d
[ww] = w^T w                            <- dot
theta = [ww] / tau_t                                            ! NOTE : cyclic loop body rotation: [theta], [eta], alpha?
{c_mp1} = 1 / (1 + theta)
tau_t = tau_t * sqrt(theta) * {c_mp1}
eta = {c_mp1}^2 * [alpha]
x = x + eta * d
$r$ = $r$ - eta * a_d                                           ! NOTE  : no need to compute r?
[$tau$] = $r$^T $r$                     <- dot                  ! NOTE  : only needed for convergence check
***                                     <- convergence check
rho = R0^T w                            <- dot
[beta] = rho / rho_old
[u_mp1] = w + [beta] * u_m
rho_old = rho
pu_m = M^-1 [u_mp1]                     <- apply
[a_u_new] = A pu_m                      <- apply
v = [a_u_new] + [beta] * (a_u + [beta] * v)
a_u = [a_u_new]                                                 ! OPT  : unnecessary, use a_u_new instead
u_m = [u_mp1]

{sigma} = (theta^2 / [alpha]) * eta;
w = w - [alpha] * a_u
d = pu_m + {sigma} * d
a_d = a_u + {sigma} * a_d
[ww] = w^T w                            <- dot
theta = [ww] / tau_t
{c_mp1} = 1 / (1 + theta)
tau_t = tau_t * sqrt(theta) * {c_mp1}
eta = {c_mp1}^2 * [alpha]
x = x + eta * d
$r$ = $r$ - eta * a_d                                           ! NOTE  : no need to compute r?
[tau] = $r$^T $r$                       <- dot                  ! NOTE  : only needed for convergence check
***                                     <- convergence check
pu_m = M^-1 [u_mp1]                     <- apply
[a_u_new] = A pu_m                      <- apply
a_u = [a_u_new]                                                 ! OPT  : unnecessary, assign to a_u directly
u_m = [u_mp1]                                                   ! OPT  : u_m is already equal to u_mp1