implement OP_DIV

dibyendumajumdar · Mar 15, 2015 · 4101af2 · 4101af2
1 parent ef74e15
commit 4101af2
Show file tree

Hide file tree

Showing 10 changed files with 170 additions and 50 deletions.
diff --git a/include/ravi_llvmcodegen.h b/include/ravi_llvmcodegen.h
@@ -538,7 +538,7 @@ class RaviCodeGenerator {
                      llvm::Value *proto, int A, int B, int C, int j);
 
   void emit_ARITH(RaviFunctionDef *def, llvm::Value *L_ci, llvm::Value *proto,
-                int A, int B, int C, OpCode op, TMS tms);
+                  int A, int B, int C, OpCode op, TMS tms);
 
   void emit_UNMF(RaviFunctionDef *def, llvm::Value *L_ci, llvm::Value *proto,
                  int A, int B);

diff --git a/ravi-tests/README.rst b/ravi-tests/README.rst
@@ -19,6 +19,7 @@ Following performance tests were obtained from the `The Computer Programming Lan
 * mandel.lua
 * mandel.ravi - with optional types
 * mandel1.ravi - modified so that it can be JIT compiled
+* mandel1.lua - can be JIT compiled but also compatible with Lua
 
 Example LLVM and Ravi bytecode listings
 ---------------------------------------

diff --git a/ravi-tests/mandel1.lua b/ravi-tests/mandel1.lua
@@ -0,0 +1,57 @@
+-- The Computer Language Benchmarks Game
+-- http://benchmarksgame.alioth.debian.org/
+-- contributed by Mike Pall
+
+
+local function domandel(pfunc)
+  local width = 4000
+  local height, wscale = width, 2.0/width
+  local m, limit2 = 50, 4.0
+  --local write, char = io.write, string.char
+
+  for y=0,height-1 do
+    local Ci = 2.0*y / height - 1
+    for xb=0,width-1,8 do
+      local bits = 0
+      local xbb = xb+7
+      local xblimit 
+      if xbb < width then
+        xblimit = xbb
+      else
+        xblimit = width-1
+      end
+      for x=xb,xblimit do
+        bits = bits + bits
+        local Zr, Zi, Zrq, Ziq = 0.0, 0.0, 0.0, 0.0
+        local Cr = x * wscale - 1.5
+        for i=1,m do
+          local Zri = Zr*Zi
+          Zr = Zrq - Ziq + Cr
+          Zi = Zri + Zri + Ci
+          Zrq = Zr*Zr
+          Ziq = Zi*Zi
+          if Zrq + Ziq > limit2 then
+            bits = bits + 1
+            break
+          end
+        end
+      end
+      if xbb >= width then
+        for x=width,xbb do bits = bits + bits + 1 end
+     end
+     --pfunc(bits)
+    end
+  end
+end
+
+local function dummy(x) 
+end
+
+if ravi then
+  assert(ravi.compile(domandel))
+end
+
+t1 = os.clock()
+domandel(print)
+t2 = os.clock()
+print(t2-t1)
diff --git a/ravi-tests/mandel1.ravi b/ravi-tests/mandel1.ravi
@@ -47,7 +47,7 @@ end
 local function dummy(x) 
 end
 
-domandel(dummy)
+assert(ravi.compile(domandel))
 
 t1 = os.clock()
 domandel(print)

diff --git a/ravi-tests/ravi_tests1.ravi b/ravi-tests/ravi_tests1.ravi
@@ -329,3 +329,15 @@ assert(math.abs(testmul(1.5,1.6)-2.4) < 1e-12)
 assert(math.abs(testmul("1.5",1.6)-2.4) < 1e-12)
 assert(math.abs(testmul("1.5","1.6")-2.4) < 1e-12)
 print("test 25 OK")
+
+
+-- test 26
+function testdiv(a,b)
+  return a/b
+end
+assert(ravi.compile(testdiv))
+assert(testdiv(2,2) == 1.0)
+assert(math.abs(testdiv(1.5,1.6)-0.9375) < 1e-12)
+assert(math.abs(testdiv("1.5",1.6)-0.9375) < 1e-12)
+assert(math.abs(testdiv("1.5","1.6")-0.9375) < 1e-12)
+print("test 26 OK")
diff --git a/readthedocs/ravi-benchmarks.rst b/readthedocs/ravi-benchmarks.rst
@@ -32,3 +32,39 @@ There are a number of reasons why Ravi's performance is not as good as Luajit.
    from being useful in constrained devices - although ahead of time
    compilation could be used in such cases.
 
+Ideas
+-----
+There are a number of improvements possible. Below are some of my thoughts.
+
+Optimizing Fornum loops
+-----------------------
+The Lua fornum loops create an `extra "external" variable <http://www.lua.org/manual/5.3/manual.html#3.3.5>`_ that has the name given by the user. 
+However an internal variable is actually used as the loop index. The external variable is updated at every iteration - this entails several IR 
+instructions. The obvious optimization is to eliminate this variable by making the loop index available as a readonly value. If for backward 
+compatiblity it is necessary to allow updates to the external variable then a compromise would be analyse the Lua program and only create the
+external variable if necessary.
+
+The Value Storage
+-----------------
+In Lua the type of the value and the data associated with a value are stored in separate fields. Luajit however overlays the storage by utilizing
+the `technique known as NaN tagging <http://lua-users.org/lists/lua-l/2009-11/msg00089.html>`_. The Luajit model is not suited for Lua 5.3 as in this version 64-int integers are natively supported by Lua. 
+
+There is however still a possibility that NaN tagging can be used to improve performance of values that hold doubles. The following scheme should work.
+
+Let the first 8 bytes hold a double value. And let the other values be held in the second 8 bytes.
+Then the NaN tagging technique can be used to overlay the type information with the double part.
+This would allow operations involving doubles to be faster as an extra step to set the type can be avoided. This would mean greater
+performance in floating point operations which are important in many domains.
+
+Above scheme has the additional advantage that it can be extended to support complex numbers.
+
+* First 8 bytes could be a double representing the real part.
+* Second 8 bytes could be a double representing the imaginary part.
+
+If a value is a not a complex number then the real part will either be
+NaN, or if the real part is a double then the imaginary part will be a
+NaN.
+
+The problem of course is that NaN tagging may not be viable in mainstream Lua as it is probably a non-portable technique. It could also 
+introduce incompatibility between Lua and Ravi especially if Ravi supported complex numbers.
+
diff --git a/readthedocs/ravi-jit-status.rst b/readthedocs/ravi-jit-status.rst
@@ -47,7 +47,7 @@ Note that if a Lua functions contains a bytecode that cannot be be JITed then th
 +-------------------------+----------+--------------------------------------------------+
 | OP_POW                  | NO       | R(A) := RK(B) ^ RK(C)                            |
 +-------------------------+----------+--------------------------------------------------+
-| OP_DIV                  | NO       | R(A) := RK(B) / RK(C)                            |
+| OP_DIV                  | YES      | R(A) := RK(B) / RK(C)                            |
 +-------------------------+----------+--------------------------------------------------+
 | OP_IDIV                 | NO       | R(A) := RK(B) // RK(C)                           |
 +-------------------------+----------+--------------------------------------------------+

diff --git a/src/README.rst b/src/README.rst
@@ -18,10 +18,11 @@ The LLVM JIT implementation is in following sources:
 * ravi_llvmtypes.cpp - contains LLVM type definitions for Lua objects 
 * ravi_llvmcodegen.cpp - LLVM JIT compiler - main driver for compiling Lua bytecodes into LLVM IR, also contains implementations of opcodes like OP_JMP
 * ravi_llvmload.cpp - implements OP_LOADK and OP_MOVE, and related operations
-* ravi_llvmcomp.cpp - implements OP_EQ, OP_LT and OP_LE.
+* ravi_llvmcomp.cpp - implements OP_EQ, OP_LT, OP_LE, OP_TEST and OP_TESTSET.
 * ravi_llvmreturn.cpp - implements OP_RETURN
 * ravi_llvmforprep.cpp - implements OP_FORPREP
 * ravi_llvmforloop.cpp - implements OP_FORLOOP
-* ravi_llvmarith1.cpp - implements various arithmetic operations
+* ravi_llvmarith1.cpp - implements various type specialized arithmetic operations - these are Ravi extensions
+* ravi_llvmarith2.cpp - implements Lua opcodes such as OP_ADD, OP_SUB, OP_MUL, OP_DIV
 * ravi_llvmcall.cpp - implements OP_CALL
 * ravi_llvmtable.cpp - implements OP_GETTABLE, OP_SETTABLE etc. table operations
diff --git a/src/ravi_llvmarith2.cpp b/src/ravi_llvmarith2.cpp
@@ -26,7 +26,7 @@
 
 namespace ravi {
 
-// OP_ADD, OP_SUB and OP_MUL
+// OP_ADD, OP_SUB, OP_MUL and OP_DIV
 void RaviCodeGenerator::emit_ARITH(RaviFunctionDef *def, llvm::Value *L_ci,
                                    llvm::Value *proto, int A, int B, int C,
                                    OpCode op, TMS tms) {
@@ -55,58 +55,61 @@ void RaviCodeGenerator::emit_ARITH(RaviFunctionDef *def, llvm::Value *L_ci,
   llvm::Value *rb_type = emit_load_type(def, rb);
   llvm::Value *rc_type = emit_load_type(def, rc);
 
-  llvm::Value *cmp1 = def->builder->CreateICmpEQ(
-      rb_type, def->types->kInt[LUA_TNUMINT], "rb.is.integer");
-  llvm::Value *cmp2 = def->builder->CreateICmpEQ(
-      rc_type, def->types->kInt[LUA_TNUMINT], "rc.is.integer");
-
-  llvm::Value *andvalue = def->builder->CreateAnd(cmp1, cmp2);
-
-  // Check if both RB and RC are integers
-  llvm::BasicBlock *then_block =
-      llvm::BasicBlock::Create(def->jitState->context(), "if.integer", def->f);
-  llvm::BasicBlock *else_block =
-      llvm::BasicBlock::Create(def->jitState->context(), "if.not.integer");
   llvm::BasicBlock *float_op =
       llvm::BasicBlock::Create(def->jitState->context(), "float.op");
   llvm::BasicBlock *try_meta =
       llvm::BasicBlock::Create(def->jitState->context(), "try_meta");
   llvm::BasicBlock *done_block =
       llvm::BasicBlock::Create(def->jitState->context(), "done");
-  def->builder->CreateCondBr(andvalue, then_block, else_block);
-  def->builder->SetInsertPoint(then_block);
 
-  // Both are integers
-  llvm::Instruction *lhs = emit_load_reg_i(def, rb);
-  llvm::Instruction *rhs = emit_load_reg_i(def, rc);
-
-  llvm::Value *result = nullptr;
-  switch (op) {
-  case OP_ADD:
-    result = def->builder->CreateAdd(lhs, rhs, "", false, true);
-    break;
-  case OP_SUB:
-    result = def->builder->CreateSub(lhs, rhs, "", false, true);
-    break;
-  case OP_MUL:
-    result = def->builder->CreateMul(lhs, rhs, "", false, true);
-    break;
-  default:
-    lua_assert(0);
+  if (op != OP_DIV) {
+    llvm::Value *cmp1 = def->builder->CreateICmpEQ(
+        rb_type, def->types->kInt[LUA_TNUMINT], "rb.is.integer");
+    llvm::Value *cmp2 = def->builder->CreateICmpEQ(
+        rc_type, def->types->kInt[LUA_TNUMINT], "rc.is.integer");
+
+    llvm::Value *andvalue = def->builder->CreateAnd(cmp1, cmp2);
+
+    // Check if both RB and RC are integers
+    llvm::BasicBlock *then_block = llvm::BasicBlock::Create(
+        def->jitState->context(), "if.integer", def->f);
+    llvm::BasicBlock *else_block =
+        llvm::BasicBlock::Create(def->jitState->context(), "if.not.integer");
+    def->builder->CreateCondBr(andvalue, then_block, else_block);
+    def->builder->SetInsertPoint(then_block);
+
+    // Both are integers
+    llvm::Instruction *lhs = emit_load_reg_i(def, rb);
+    llvm::Instruction *rhs = emit_load_reg_i(def, rc);
+
+    llvm::Value *result = nullptr;
+    switch (op) {
+    case OP_ADD:
+      result = def->builder->CreateAdd(lhs, rhs, "", false, true);
+      break;
+    case OP_SUB:
+      result = def->builder->CreateSub(lhs, rhs, "", false, true);
+      break;
+    case OP_MUL:
+      result = def->builder->CreateMul(lhs, rhs, "", false, true);
+      break;
+    default:
+      lua_assert(0);
+    }
+
+    emit_store_reg_i(def, result, ra);
+    emit_store_type(def, ra, LUA_TNUMINT);
+
+    def->builder->CreateBr(done_block);
+
+    // Not integer
+    def->f->getBasicBlockList().push_back(else_block);
+    def->builder->SetInsertPoint(else_block);
   }
 
-  emit_store_reg_i(def, result, ra);
-  emit_store_type(def, ra, LUA_TNUMINT);
-
-  def->builder->CreateBr(done_block);
-
-  // Not integer
-  def->f->getBasicBlockList().push_back(else_block);
-  def->builder->SetInsertPoint(else_block);
-
   // Is RB a float?
-  cmp1 = def->builder->CreateICmpEQ(rb_type, def->types->kInt[LUA_TNUMFLT],
-                                    "rb.is.float");
+  llvm::Value *cmp1 = def->builder->CreateICmpEQ(
+      rb_type, def->types->kInt[LUA_TNUMFLT], "rb.is.float");
 
   llvm::BasicBlock *convert_rb =
       llvm::BasicBlock::Create(def->jitState->context(), "convert.rb");
@@ -184,12 +187,13 @@ void RaviCodeGenerator::emit_ARITH(RaviFunctionDef *def, llvm::Value *L_ci,
   def->f->getBasicBlockList().push_back(float_op);
   def->builder->SetInsertPoint(float_op);
 
-  lhs = def->builder->CreateLoad(nb);
+  llvm::Instruction *lhs = def->builder->CreateLoad(nb);
   lhs->setMetadata(llvm::LLVMContext::MD_tbaa, def->types->tbaa_longlongT);
 
-  rhs = def->builder->CreateLoad(nc);
+  llvm::Instruction *rhs = def->builder->CreateLoad(nc);
   rhs->setMetadata(llvm::LLVMContext::MD_tbaa, def->types->tbaa_longlongT);
 
+  llvm::Value *result = nullptr;
   // Add and set RA
   switch (op) {
   case OP_ADD:
@@ -201,6 +205,9 @@ void RaviCodeGenerator::emit_ARITH(RaviFunctionDef *def, llvm::Value *L_ci,
   case OP_MUL:
     result = def->builder->CreateFMul(lhs, rhs);
     break;
+  case OP_DIV:
+    result = def->builder->CreateFDiv(lhs, rhs);
+    break;
   default:
     lua_assert(0);
   }

diff --git a/src/ravi_llvmcodegen.cpp b/src/ravi_llvmcodegen.cpp
@@ -241,6 +241,7 @@ bool RaviCodeGenerator::canCompile(Proto *p) {
     case OP_ADD:
     case OP_SUB:
     case OP_MUL:
+    case OP_DIV:
     case OP_SETTABLE:
     case OP_GETTABLE:
     case OP_RAVI_MOVEI:
@@ -737,6 +738,11 @@ void RaviCodeGenerator::compile(lua_State *L, Proto *p) {
       emit_MULII(&def, L_ci, proto, A, B, C);
     } break;
 
+    case OP_DIV: {
+      int B = GETARG_B(i);
+      int C = GETARG_C(i);
+      emit_ARITH(&def, L_ci, proto, A, B, C, OP_DIV, TM_DIV);
+    } break;
     case OP_RAVI_DIVFF: {
       int B = GETARG_B(i);
       int C = GETARG_C(i);