In [4]:
class tree:
    def __str__(self):
        return self.recursive_str()
    
    def recursive_str(self, depth=0):
        depth += 1
        string = self.string_id
        for child in self.children:
            string += "\n" + depth*4*" " + "{}".format(child.recursive_str(depth=depth))
        return string
       
    def __repr__(self):
        return self.recursive_repr()
    
    def recursive_repr(self):
        string = self.string_id + "("
        for child in self.children:
            string += "{},".format(child.recursive_repr())
        string += ")"
        return string
  

In [7]:
     
class Operation(tree):
    def __mul__(self, other):
        return Mult(self, other)
    def __sub__(self, other):
        return Subtract(self, other)


class reduction(tree):
    def __init__(self, formula, tagIJ):
        self.formula = formula
        self.children = [formula]
        self.tagIJ = tagIJ
        
        
    
class ZeroaryOp(Operation):
    string_id = "ZeroaryOp"
    def __init__(self):
        self.children = []
        self.vars = {}

class UnaryOp(Operation):
    string_id = "UnaryOp"
    def __init__(self, arg):
        self.children = [arg]
        self.vars = arg.vars
    def __call__(self, dtype):
        string  = "{TYPE} out[{DIM}];".format(TYPE=dtype, DIM=self.dim)
        string += self.children[0](dtype)
        string += self.children[0](dtype)
        
class BinaryOp(Operation):
    string_id = "BinaryOp"
    num_children = 1
    def __init__(self, arg0, arg1):
        self.children = [arg0, arg1]
        self.vars = arg0.vars.union(arg1.vars)
        


In [8]:
class Var(ZeroaryOp):
    string_id = "Var"
    def __init__(self, ind, dim, cat):
        super().__init__()
        self.ind = ind
        self.dim = dim
        self.cat = cat
        self.vars = {self}
    def recursive_str(self, depth=0):
        return "Var({},{},{})".format(self.ind, self.dim, self.cat)
    def recursive_repr(self):
        return self.recursive_str()
    def __eq__(self, other):
        return self.ind == other.ind and self.dim == other.dim and self.cat == other.cat 
    def __hash__(self):
        return hash((self.ind,self.dim,self.cat))
    
class VectorizedScalarUnaryOp(UnaryOp):
    string_id = "VectorizedScalarUnaryOp"
    @property
    def dim(self):
        return self.children[0].dim
    def Op(self):
        return VectApply()
        

class VectorizedScalarBinaryOp(BinaryOp):
    string_id = "VectorizedScalarBinaryOp"
    @property
    def dim(self):
        return max(self.children[0].dim, self.children[1].dim)
        

class Exp(VectorizedScalarUnaryOp):
    string_id = "Exp"
    
class Minus(VectorizedScalarUnaryOp):
    string_id = "Minus"

class Square(VectorizedScalarUnaryOp):
    string_id = "Square"

class Sum(UnaryOp):
    string_id = "Sum"
    dim = 1

class Mult(VectorizedScalarBinaryOp):
    string_id = "Mult"

class Subtract(VectorizedScalarBinaryOp):
    string_id = "Subtract"


In [9]:
def VectAssign(out, dim, val):
    return f"#pragma unroll for(int k=0; k<{dim}; k++) {out}[k] = cast_to<{dtypeacc}>({val});"
        
class Sum_Reduction(reduction):
    string_id = "Sum_Reduction"
    def __init__(self, formula, tagIJ):
        super().__init__(formula, tagIJ)
        self.dim = formula.dim
    def InitializeReduction(self, tmp):
        return VectAssign(tmp, self.dim, "0.0f")
    def ReducePairScalar(self, tmp, xi):
        return f"{tmp} += cast_to<{dtypeacc}>({xi});"        
    

In [12]:
    
    
redformula_string = "Sum_Reduction((Exp(Minus(Sum(Square((Var(0,3,0) - Var(1,3,1))*Var(1,3,1))))) * Var(2,1,1)),0)"

redformula = eval(redformula_string)
print(redformula)
print(redformula.formula.vars)
print(list(redformula.formula.vars)[0])

dtypeacc = "float"
dtype = "float"

print(redformula.InitializeReduction("tmp"))
print(redformula.ReducePairScalar("tmp", "xi"))

Sum_Reduction
    Mult
        Exp
            Minus
                Sum
                    Square
                        Mult
                            Subtract
                                Var(0,3,0)
                                Var(1,3,1)
                            Var(1,3,1)
        Var(2,1,1)
{Var(2,1,1), Var(1,3,1), Var(0,3,0)}
Var(2,1,1)
#pragma unroll for(int k=0; k<1; k++) tmp[k] = cast_to<float>(0.0f);
tmp += cast_to<float>(xi);


In [27]:
def load_vars(dims, inds, i, xi, px):
    string = ""
    k = 0
    for u in range(len(dims)):
        for v in range(dims[u]):
            string += f"{xi}[{k}] = {px}[{inds[u]}][{i}*{dims[u]}+{v}];\n"
            k+=1
    return string

string = load_vars((2,2,3),(7,8,9),5,"xi","px")
print(string)

xi[0] = px[7][5*2+0];
xi[1] = px[7][5*2+1];
xi[2] = px[8][5*2+0];
xi[3] = px[8][5*2+1];
xi[4] = px[9][5*3+0];
xi[5] = px[9][5*3+1];
xi[6] = px[9][5*3+2];



In [29]:
f = open("link_autodiff.cpp")
string = f.read()
mydict = { 
    "TYPE" : "float",
    "TYPEACC" : "float",
    "DIMRED" : 3,
    "DIMX" : 2,
    "DIMY" : 2,
    "DIMOUT" : 3,
    "DIMFOUT" : 3,
    "InitializeReduction" : redformula.InitializeReduction("tmp"),
    "ReducePairShort" : "ReducePairShort",
    "FinalizeOutput" : "FinalizeOutput",
    "loadx" : "loadx",
    "loady" : "loady",
    "call" : "call",
    }
print(string.format(**mydict))

extern "C" int CpuReduc(int nx, int ny, float* gamma, float** args) {
#pragma omp parallel for
    for (int i = 0; i < nx; i++) {
    float fout[3], xi[2], yj[2];
    float acc[DIMRED];
    loadx // load< DIMSX, INDSI >(i, xi, args);
      #pragma unroll for(int k=0; k<1; k++) tmp[k] = cast_to<float>(0.0f); // typename FUN::template InitializeReduction< TYPEACC, TYPE >()(acc);   // acc = 0
      for (int j = 0; j < ny; j++) {
        loady // load< DIMSY, INDSJ >(j, yj, args);
        call // call< DIMSX, DIMSY, DIMSP >(fun, fout, xi, yj, pp);
        ReducePairShort // typename FUN::template ReducePairShort< TYPEACC, TYPE >()(acc, fout, j); // acc += fout
      }
        FinalizeOutput // typename FUN::template FinalizeOutput< TYPEACC, TYPE >()(acc, out + i * DIMOUT, i);
    }
    return 0;
}


