Permalink
Browse files

Parallelfilter (#128)

* parallel filterseq
* parallel DMA reader
* add fifo checking
* added reset checking
* lots of extra correctness checking
  • Loading branch information...
jameshegarty committed Oct 18, 2018
1 parent f6d19f9 commit e5b8196a350f6d66a486f346071eea86faf5602b
Showing with 1,286 additions and 390 deletions.
  1. +1 −2 examples/descriptor_core.lua
  2. +182 −11 examples/examplescommon.lua
  3. +8 −4 examples/examplescommonTerra.t
  4. BIN examples/gold/soc_filterseq.bmp
  5. +1 −0 examples/gold/soc_filterseq.regout.lua
  6. +1 −0 examples/gold/soc_filterseq.terra.cycles.txt
  7. +1 −0 examples/gold/soc_filterseq8.bmp
  8. +1 −0 examples/gold/soc_filterseq8.regout.lua
  9. +1 −0 examples/gold/soc_filterseq8.terra.cycles.txt
  10. +1 −0 examples/gold/soc_parread.bmp
  11. +1 −0 examples/gold/soc_parread.regout.lua
  12. +1 −0 examples/gold/soc_parread.terra.cycles.txt
  13. BIN examples/gold/soc_sort.bmp
  14. +1 −0 examples/gold/soc_sort.regout.lua
  15. +1 −0 examples/gold/soc_sort.terra.cycles.txt
  16. BIN examples/gold/soc_unaligned.bmp
  17. +1 −0 examples/gold/soc_unaligned.regout.lua
  18. +1 −0 examples/gold/soc_unaligned.terra.cycles.txt
  19. BIN examples/gold/soc_underflow.bmp
  20. +1 −0 examples/gold/soc_underflow.regout.lua
  21. +1 −0 examples/gold/soc_underflow.terra.cycles.txt
  22. +2 −1 examples/harnessSOC.lua
  23. +1 −1 examples/harnessTerra.t
  24. +19 −4 examples/harnessTerraSOC.t
  25. +2 −2 examples/lk_tr_core.lua
  26. +1 −1 examples/makefile
  27. +2 −5 examples/soc_2in.lua
  28. +31 −0 examples/soc_filterseq.lua
  29. +70 −0 examples/soc_filterseq8.lua
  30. +25 −0 examples/soc_parread.lua
  31. +24 −0 examples/soc_sort.lua
  32. +27 −0 examples/soc_unaligned.lua
  33. +36 −0 examples/soc_underflow.lua
  34. +0 −1 examples/tmux_wide_handshake.lua
  35. +2 −5 misc/fixed_new.lua
  36. +10 −3 misc/fixed_new_terra.t
  37. +9 −12 modules/soc.lua
  38. +1 −1 platform/verilatorSOC/compile
  39. +18 −5 platform/verilatorSOC/harness.cpp
  40. +13 −7 platform/verilatorSOC/harness.h
  41. BIN platform/zu9vivadoSOC/processimage_mpsoc_SOC
  42. +48 −46 platform/zu9vivadoSOC/processimage_mpsoc_SOC.c
  43. +84 −204 rigel.lua
  44. +1 −0 src/common.lua
  45. +2 −2 src/fpgamodules.lua
  46. +100 −11 src/generators.lua
  47. +271 −49 src/modules.lua
  48. +97 −5 src/modulesTerra.t
  49. +1 −1 src/systolic.lua
  50. +13 −5 src/systolicTerra.t
  51. +1 −1 src/typecheck.lua
  52. +169 −0 src/types.lua
  53. +1 −1 src/typesTerra.t
@@ -12,7 +12,6 @@ local C = require "examplescommon"
local descriptor = {}
function descriptor.addPos()
local descType = types.float(32)
local PTYPE = R.tuple{types.array2d(descType,TILES_X*TILES_Y*8),R.tuple{R.uint16,R.uint16}}
@@ -48,7 +47,7 @@ function norm()
local desc_sum = R.index{input=R.index{input=inp0, key=1 }, key=0}
local desc0 = rigel.apply("d0lift",RM.makeHandshake(sift.fixedLift(R.int32)), R.index{input=R.index{input=inp1,key=0 },key=0} )
local desc = rigel.apply("pt",RM.packTuple{R.float,R.float},rigel.concat("PTT",{desc0,desc_sum}))
local desc = rigel.apply("pt",RM.packTuple({R.float,R.float},true),rigel.concat("PTT",{desc0,desc_sum}))
local desc = rigel.apply("ptt",RM.makeHandshake(sift.fixedDiv(R.float)),desc)
return R.defineModule{input=inp,output=desc}
end
@@ -215,16 +215,52 @@ C.multiplyConst = memoize(function(A,constValue)
return partial
end)
------------------------------
C.GT = memoize(function(A,B)
err( types.isType(A), "C.GT: A must be type")
err( types.isType(B), "C.GT: B must be type")
C.tokenCounter = memoize(function(A)
err( types.isType(A), "C.multiply: A must be type")
local partial = RM.lift( J.sanitize("GT_A"..tostring(A).."_B"..tostring(B)), types.tuple {A,B}, types.bool(), 1,
function(sinp) return S.gt(S.index(sinp,0),S.index(sinp,1)) end )
return partial
end)
------------
-- return A*B as a darkroom FN. A,B are types
-- returns something of type outputType
C.GTConst = memoize(function(A,constValue)
err( types.isType(A), "C.GTConst: A must be type")
local partial = RM.lift( J.sanitize("GT_const_A"..tostring(A).."_value"..tostring(constValue)), A, types.bool(), 1,
function(sinp) return S.gt(sinp,S.constant(constValue,A)) end )
return partial
end)
C.Not = RM.lift( "Not", types.bool(), types.bool(), 0, function(sinp) return S.__not(sinp) end )
C.And = RM.lift( "And", types.tuple{types.bool(),types.bool()}, types.bool(), 0, function(sinp) return S.__and(S.index(sinp,0),S.index(sinp,1)) end )
C.tokenCounter = memoize(function(A,str,X)
err( types.isType(A), "C.tokenCounter: A must be type")
err( types.isHandshake(A),"C.tokenCounter: A must be handshake")
assert(X==nil)
local partial = RM.lift( J.sanitize("tokencounter_A"..tostring(A)), A, A, 1,
if str==nil then str="" end
assert(type(str)=="string")
-- print("TC",str)
-- assert(false)
local partial = RM.lift( J.sanitize("tokencounter_A"..tostring(A)).."_"..str, A, A, 1,
function(sinp) assert(false) end,
function() return CT.tokenCounter(A) end,
function() return CT.tokenCounter(A,str) end,
"C.tokenCounter" )
partial.terraModule = CT.tokenCounter(A)
if terralib~=nil then
partial.terraModule = CT.tokenCounter(A,str)
end
return partial
end)
@@ -1198,7 +1234,7 @@ C.cropHelperSeq = memoize(function( A, W, H, T, L, R, B, Top, X )
err(type(T)=="number","T must be number")
if L%T==0 and R%T==0 then return modules.cropSeq( A, W, H, T, L, R, B, Top ) end
err( (W-L-R)%T==0, "cropSeqHelper, (W-L-R)%T~=0")
err( (W-L-R)%T==0, "cropSeqHelper, (W-L-R)%T~=0, W="..tostring(W)..", L="..tostring(L)..", R="..tostring(R)..", T="..tostring(T))
local RResidual = R%T
local inp = rigel.input( types.array2d( A, T ) )
@@ -1225,7 +1261,7 @@ C.stencilLinebuffer = memoize(function( A, w, h, T, xmin, xmax, ymin, ymax, fram
err(w>0,"stencilLinebuffer: w must be >0");
err(h>0,"stencilLinebuffer: h must be >0");
err(xmin<=xmax,"stencilLinebuffer: xmin("..tostring(xmin)..")>xmax("..tostring(xmax)..")")
err(ymin<=ymax,"stencilLinebuffer: ymin>ymax")
err(ymin<=ymax,"stencilLinebuffer: ymin("..tostring(ymin)..") must be <= ymax("..tostring(ymax)..")")
err(xmax==0,"stencilLinebuffer: xmax must be 0")
err(ymax==0,"stencilLinebuffer: ymax must be 0")
@@ -1298,9 +1334,9 @@ end)
C.unpackStencil = memoize(function( A, stencilW, stencilH, T, arrHeight, framed, framedW, framedH, X )
assert(types.isType(A))
assert(type(stencilW)=="number")
assert(stencilW>0)
err(stencilW>0,"unpackStencil: stencilW must be >0, but is:"..tostring(stencilW))
assert(type(stencilH)=="number")
assert(stencilH>0)
err(stencilH>0,"unpackStencil: stencilH must be >0, but is:"..tostring(stencilH))
assert(type(T)=="number")
assert(T>=1)
err(arrHeight==nil, "Error: NYI - unpackStencil on non-height-1 arrays")
@@ -1347,6 +1383,7 @@ end)
-- if index==true, then we return a value, not an array
-- indices are inclusive
C.slice = memoize(function( inputType, idxLow, idxHigh, idyLow, idyHigh, index, X )
err( types.isType(inputType),"slice first argument must be type" )
err( type(idxLow)=="number", "slice idxLow must be number")
@@ -1616,7 +1653,7 @@ C.handshakeToHandshakeFramed = memoize(
sm:addFunction( S.lambda("ready", r, r, "ready") )
local I = S.parameter("process_input", R.lower(A) )
sm:addFunction( S.lambda("process",I,I,"process_output") )
sm:addFunction( S.lambda("reset", S.parameter("r",types.null()), nil, "reset_out") )
--sm:addFunction( S.lambda("reset", S.parameter("r",types.null()), nil, "reset_out") )
return sm
end
function res.makeTerra()
@@ -1645,6 +1682,140 @@ C.changeRateFramed = memoize(function(A, W, H, ratio, ser, X)
assert(false)
end)
-- given a comparison operator (op:{A,A}->bool), returns a function
-- of type A[2]->A[2]
C.sortCompare = memoize(
function(A,op)
local G = require "generators"
return G.Module{"SortCompare_"..tostring(A).."_op"..tostring(op.name), types.array2d(A,2),
function(inp)
local res = op(inp[0],inp[1])
return G.Sel(res,inp,G.TupleToArray(inp[1],inp[0]))
end}
end)
-- takes in an array whose two halves are sorted. Returns full array sorted.
-- see http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/networks/oemen.htm
C.oddEvenMerge = memoize(
function(A,N,op)
local G = require "generators"
assert(N>=2)
assert(J.isPowerOf2(N))
if N==2 then
return C.sortCompare(A,op)
else
return G.Module{"OddEvenMerge_"..tostring(A).."_N"..tostring(N).."_op"..tostring(op.name), types.array2d(A,N),
function(inp)
local even,odd = {},{}
for i=0,(N/2)-1 do
table.insert(even,inp[i*2])
table.insert(odd,inp[i*2+1])
end
local rEven, rOdd = G.TupleToArray(R.concat(even)), G.TupleToArray(R.concat(odd))
local Rec = C.oddEvenMerge(A,N/2,op)
local oEven, oOdd = Rec(rEven), Rec(rOdd)
local res = {oEven[0]}
for i=0,(N/2)-2 do
local o = C.sortCompare(A,op)(G.TupleToArray(oOdd[i],oEven[i+1]))
table.insert(res,o[0])
table.insert(res,o[1])
end
table.insert(res,oOdd[(N/2)-1])
local res = G.TupleToArray(R.concat(res))
return res
end}
end
end)
C.oddEvenMergeSort = memoize(
function(A,N,op)
local G = require "generators"
assert(N>0)
assert(J.isPowerOf2(N))
if N==1 then
return G.Identity{types.array2d(A,N)}
else
return G.Module{"OddEvenMergeSort_"..tostring(A).."_N"..tostring(N).."_op"..tostring(op.name), types.array2d(A,N),
function(inp)
local l,r = G.Slice{{0,(N/2)-1}}(inp), G.Slice{{N/2,N-1}}(inp)
l,r = C.oddEvenMergeSort(A,N/2,op)(l), C.oddEvenMergeSort(A,N/2,op)(r)
local res = C.flatten2(A,N)(l,r)
res = C.oddEvenMerge(A,N,op)(res)
return res
end}
end
end)
C.StridedReader = memoize(
function(filename,totalBytes,itemBytes,stride,offset,readPort,readAddr)
-- stride,offset is given as # of items
local G = require "generators"
local SOC = require "soc"
assert(totalBytes%(itemBytes*stride)==0)
assert(itemBytes%8==0)
local Nreads = (totalBytes/(itemBytes*stride))
local res = G.Module{"StridedReader_totalBytes"..tostring(totalBytes).."_itemBytes"..tostring(itemBytes).."_stride"..tostring(stride).."_offset"..tostring(offset), types.HandshakeTrigger,
function(inp)
print("MAKESTRIDED",stride,offset)
local cnt = C.triggerUp(Nreads)(inp)
local cnt = G.HS{RM.counter(types.uint(32), Nreads)}(cnt)
cnt = G.HS{G.Mul{stride*itemBytes}}(cnt)
local addr = G.HS{G.Add{offset*itemBytes}}(cnt)
--return SOC.read(filename,totalBytes,types.bits(itemBytes*8))(addr)
return SOC.axiReadBytes(filename,itemBytes,readPort,readAddr)(addr)
end}
return res
end)
-- generate N DMA controllers to be able to read things with higher BW than a single axi port can
C.AXIReadPar = memoize(
function(filename,W,H,ty,V) -- Nbits: # of bits to read in parallel
local G = require "generators"
local SOC = require "soc"
assert( (ty:verilogBits()*V)%64==0)
local N = (ty:verilogBits()*V)/64
assert((W*H)%N==0)
local startAddr = SOC.currentAddr
local startPort = SOC.currentMAXIReadPort
local totalBytes = W*H*(ty:verilogBits()/8)
local res = G.Module{"AXIReadPar_"..tostring(W), types.HandshakeTrigger,
function(inp)
local inpb = G.FanOut{N}(inp)
local out = {}
for i=0,N-1 do
print("IMPBI",inpb[i],i,inpb)
local tmp = C.StridedReader(filename,totalBytes,8,N,i,SOC.currentMAXIReadPort,SOC.currentAddr)(inpb[i])
tmp = G.FIFO{128}(tmp)
table.insert(out, tmp)
SOC.currentMAXIReadPort = SOC.currentMAXIReadPort+1
end
SOC.currentAddr = SOC.currentAddr+totalBytes
out = G.FanIn(unpack(out))
print("OUTT",out.type)
return G.HS{C.bitcast(out.type.params.A,types.array2d(ty,V))}(out)
end}
res.globalMetadata["MAXI"..startPort.."_read_W"] = W
res.globalMetadata["MAXI"..startPort.."_read_H"] = H
res.globalMetadata["MAXI"..startPort.."_read_V"] = V
res.globalMetadata["MAXI"..startPort.."_read_type"] = tostring(ty)
res.globalMetadata["MAXI"..startPort.."_read_bitsPerPixel"] = ty:verilogBits()
res.globalMetadata["MAXI"..startPort.."_read_address"] = startAddr
-- avoid double loading the image
for i=1,N-1 do
res.globalMetadata["MAXI"..i.."_read_filename"] = nil
end
return res
end)
return C
@@ -160,19 +160,23 @@ function CT.multiplyConst(A,constValue)
end
end
function CT.tokenCounter(A)
function CT.tokenCounter(A,str)
if str==nil then str="" end
assert(type(str)=="string")
local struct TokenCounter { cnt:uint, ready:bool }
terra TokenCounter:reset() self.cnt=0 end
terra TokenCounter:process( a : &A:toTerraType(), out : &A:toTerraType() )
@out = @a
if valid(a) and self.ready then
self.cnt = self.cnt+1
cstdio.printf("CNT %d\n",self.cnt)
cstdio.printf(["CNT "..str..": %d\n"],self.cnt)
end
end
terra TokenCounter:calculateReady(readyDownstream:bool)
self.ready = readyDownstream
terra TokenCounter:calculateReady(readyDownstream:bool)
self.ready = readyDownstream
end
return MT.new(TokenCounter)
Binary file not shown.
@@ -0,0 +1 @@
return {}
@@ -0,0 +1 @@
return {}
@@ -0,0 +1 @@
return {}
Binary file not shown.
@@ -0,0 +1 @@
return {}
@@ -0,0 +1 @@
1042
Binary file not shown.
@@ -0,0 +1 @@
return {}
Binary file not shown.
@@ -0,0 +1 @@
return {}
@@ -53,6 +53,7 @@ return function(fn,t)
if fn.globalMetadata["MAXI"..i.."_read_H"]~=nil then table.insert(rlist,"H="..fn.globalMetadata["MAXI"..i.."_read_H"]) end
if fn.globalMetadata["MAXI"..i.."_read_bitsPerPixel"]~=nil then table.insert(rlist,"bitsPerPixel="..fn.globalMetadata["MAXI"..i.."_read_bitsPerPixel"]) end
if fn.globalMetadata["MAXI"..i.."_read_V"]~=nil then table.insert(rlist,"V="..fn.globalMetadata["MAXI"..i.."_read_V"]) end
J.err(fn.globalMetadata["MAXI"..i.."_read_address"]~=nil,"Error: AXI port "..tostring(i).." was given a filename, but no address?")
table.insert(rlist,"address=0x"..string.format("%x",fn.globalMetadata["MAXI"..i.."_read_address"]))
table.insert(inputList, "{"..table.concat(rlist,",").."}")
end
@@ -90,7 +91,7 @@ return function(fn,t)
f:close()
elseif backend=="terra" then
local doTerraSim = require("harnessTerraSOC")
doTerraSim(fn)
doTerraSim(fn,t)
else
print("backend",backend)
assert(false)
@@ -44,7 +44,7 @@ local terraWrapper = J.memoize(function(fn,inputFilename,inputType,tapType,tapVa
out = R.apply("HARNESS_inner", fn )
end
out = R.apply("fwrite", RM.makeHandshake(RM.fwriteSeq(outputFilename,outputType,nil,false),nil,true), out )
out = R.apply("fwrite", RM.makeHandshake(RM.fwriteSeq(outputFilename,outputType,nil,false,true),nil,true), out )
if harnessoption==2 then
out = R.statements{out,dramAddr}
Oops, something went wrong.

0 comments on commit e5b8196

Please sign in to comment.