Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

We’re showing branches in this repository, but you can also compare across forks.

base: 0cfdc4c0dd
...
compare: d5882c57ff
  • 3 commits
  • 9 files changed
  • 0 commit comments
  • 1 contributor
32 hw/src/alu.vhd
View
@@ -53,7 +53,8 @@ end alu;
architecture behavior of alu is
- signal mul_reg, mul_next : std_logic_vector(DATA_WIDTH-1 downto 0);
+ signal mul0_reg, mul0_next : std_logic_vector(DATA_WIDTH-1 downto 0);
+ signal mul1_reg, mul1_next : std_logic_vector(DATA_WIDTH-1 downto 0);
signal add_tmp, sub_tmp: std_logic_vector(DATA_WIDTH downto 0);
signal eq_tmp, lt_tmp : std_logic;
@@ -68,15 +69,17 @@ begin -- behavior
sync: process (clk, reset)
begin -- process sync
if reset = '0' then -- asynchronous reset (active low)
- mul_reg <= (others => '0');
+ mul0_reg <= (others => '0');
+ mul1_reg <= (others => '0');
elsif clk'event and clk = '1' then -- rising clock edge
if ena = '1' then
- mul_reg <= mul_next;
+ mul0_reg <= mul0_next;
+ mul1_reg <= mul1_next;
end if;
end if;
end process sync;
- alu: process (op, fl_in, mul_reg, rb_in, ro_in, ba, memdata,
+ alu: process (op, fl_in, mul0_reg, mul1_reg, rb_in, ro_in, ba, memdata,
add_tmp, sub_tmp, eq_tmp, lt_tmp, fpu_rddata)
variable valid : std_logic;
variable mul_tmp : std_logic_vector(2*DATA_WIDTH-1 downto 0);
@@ -114,7 +117,8 @@ begin -- behavior
ro_wren <= '0';
ro_out <= op.rddata0(PC_WIDTH-1 downto 0);
- mul_next <= mul_reg;
+ mul0_next <= mul0_reg;
+ mul1_next <= mul1_reg;
mul_tmp := std_logic_vector(unsigned(op.rddata0) * unsigned(op.rddata1));
if unsigned(op.rddata0) = 0 then
@@ -165,7 +169,11 @@ begin -- behavior
(ROTATE_LEFT(unsigned(op.rddata0),
to_integer(unsigned(op.rddata1(DATA_WIDTH_BITS-1 downto 0)))));
when ALU_MUL =>
- mul_next <= mul_tmp(DATA_WIDTH-1 downto 0);
+ if op.wraddr(0) = '1' then
+ mul0_next <= mul_tmp(DATA_WIDTH-1 downto 0);
+ else
+ mul1_next <= mul_tmp(DATA_WIDTH-1 downto 0);
+ end if;
when ALU_CARR =>
wren <= valid;
wrdata <= (others => '0');
@@ -353,7 +361,11 @@ begin -- behavior
end case;
when ALU_LDMUL =>
wren <= valid;
- wrdata <= mul_reg;
+ if op.rdaddr0(0) = '1' then
+ wrdata <= std_logic_vector(unsigned(mul0_reg)+unsigned(op.rddata1));
+ else
+ wrdata <= std_logic_vector(unsigned(mul1_reg)+unsigned(op.rddata1));
+ end if;
when ALU_LDFP =>
-- just fetch data, actual reading is done in FPU
wren <= valid;
@@ -365,7 +377,11 @@ begin -- behavior
fl_out(to_integer(unsigned(op.wraddr(FLAG_BITS-1 downto 0)))) <= op.rddata0(0);
when ALU_STMUL =>
if valid = '1' then
- mul_next <= op.rddata0;
+ if op.wraddr(0) = '1' then
+ mul0_next <= op.rddata0;
+ else
+ mul1_next <= op.rddata0;
+ end if;
end if;
when ALU_STFP =>
-- just deliver data, actual writing is done in FPU
14 hw/src/decode.vhd
View
@@ -529,10 +529,10 @@ begin -- behavior
when "00110" => op(i).op <= ALU_LDMEMHS;
when "00111" => op(i).op <= ALU_LDMEMBU;
when "01000" => op(i).op <= ALU_LDMEMBS;
- when "01001" => op(i).op <= ALU_LDMUL;
- when "01010" => op(i).op <= ALU_LDRB;
- when "01011" => op(i).op <= ALU_LDRO;
- when "01100" => op(i).op <= ALU_LDBA;
+ when "01001" | "01010" => op(i).op <= ALU_LDMUL;
+ when "01011" => op(i).op <= ALU_LDRB;
+ when "01100" => op(i).op <= ALU_LDRO;
+ when "01101" => op(i).op <= ALU_LDBA;
when "10000" | "10001" | "10010" | "10011" |
"10100" | "10101" | "10110" | "10111" |
"11000" | "11001" | "11010" | "11011" |
@@ -563,9 +563,9 @@ begin -- behavior
when "101101" => -- STX
case bundle_reg(i).dest is
when "00000" | "00001" | "00010" | "00011" => op(i).op <= ALU_STCOND;
- when "01001" => op(i).op <= ALU_STMUL;
- when "01010" => op(i).op <= ALU_STRB;
- when "01011" => op(i).op <= ALU_STRO;
+ when "01001" | "01010" => op(i).op <= ALU_STMUL;
+ when "01011" => op(i).op <= ALU_STRB;
+ when "01100" => op(i).op <= ALU_STRO;
when "10000" | "10001" | "10010" | "10011" |
"10100" | "10101" | "10110" | "10111" |
"11000" | "11001" | "11010" | "11011" |
13 tools/asm/lexer.l
View
@@ -246,26 +246,31 @@ $membs {
return EXT;
}
-$mul[0-9] {
+$mul[0-9]\.0 {
yylval.intval = 9;
return EXT;
}
-$rb {
+$mul[0-9]\.1 {
yylval.intval = 10;
return EXT;
}
-$ro {
+$rb {
yylval.intval = 11;
return EXT;
}
-$ba {
+$ro {
yylval.intval = 12;
return EXT;
}
+$ba {
+ yylval.intval = 13;
+ return EXT;
+ }
+
$c[0-9] {
yylval.intval = yytext[2]-'0';
return EXT;
4 tools/asm/parser.y
View
@@ -468,7 +468,7 @@ AsmOp : Condition THREEOP REG ',' Constant DEST REG
$$.op = $2;
$$.fmt.B.src1 = $3;
$$.fmt.B.src2.imm = $5;
- $$.fmt.B.dest = 0; /* destinition is implicit */
+ $$.fmt.B.dest = $7;
$$.fmt.B.imm = 1;
$$.fmt.B.cond = $1;
}
@@ -477,7 +477,7 @@ AsmOp : Condition THREEOP REG ',' Constant DEST REG
$$.op = $2;
$$.fmt.B.src1 = $3;
$$.fmt.B.src2.reg = $5;
- $$.fmt.B.dest = 0; /* destinition is implicit */
+ $$.fmt.B.dest = $7;
$$.fmt.B.imm = 0;
$$.fmt.B.cond = $1;
}
1  tools/etc/Makefile.rules
View
@@ -1,6 +1,7 @@
PREFIX=/usr/local/lemberg
LIBLLSYMS=__divdf3,__divsf3,__divdi3,__divsi3,__moddi3,__modsi3,__udivdi3,__udivsi3,__umoddi3,__umodsi3,__ashldi3,__ashrdi3,__lshrdi3,__muldi3,memset,memmove,memcpy
+LIBLLSYMS_SOFTFLOAT=__adddf3,__subdf3,__muldf3,__addsf3,__subsf3,__mulsf3,__eqdf2,__gedf2,__gtdf2,__ledf2,__ltdf2,__nedf2,__unorddf2,__eqsf2,__gesf2,__gtsf2,__lesf2,__ltsf2,__nesf2,__unordsf2,__floatsidf,__floatunsidf,__fixdfsi,__fixunsdfsi,__floatsisf,__floatunssisf,__fixsfsi,__extendsfdf2,__truncdfsf2
LLVM_LD=llvm-ld -b $@ $^ \
${PREFIX}/lib/libllsyms.o ${LLVM_LDFLAGS} \
27 tools/libll/libllsyms.s
View
@@ -28,12 +28,7 @@ declare float @__addsf3(float %a, float %b) nounwind readnone
declare float @__subsf3(float %a, float %b) nounwind readnone
declare float @__mulsf3(float %a, float %b) nounwind readnone
declare float @__divsf3(float %a, float %b) nounwind readnone
-
-declare double @__floatsidf(i32 %a) nounwind readnone
-declare double @__floatunsidf(i32 %a) nounwind readnone
-declare i32 @__fixdfsi(double %a) nounwind readnone
-declare i32 @__fixunsdfsi(double %a) nounwind readnone
-
+
declare i32 @__eqdf2(double %a, double %b) nounwind readnone
declare i32 @__gedf2(double %a, double %b) nounwind readnone
declare i32 @__gtdf2(double %a, double %b) nounwind readnone
@@ -41,3 +36,23 @@ declare i32 @__ledf2(double %a, double %b) nounwind readnone
declare i32 @__ltdf2(double %a, double %b) nounwind readnone
declare i32 @__nedf2(double %a, double %b) nounwind readnone
declare i32 @__unorddf2(double %a, double %b) nounwind readnone
+
+declare i32 @__eqsf2(float %a, float %b) nounwind readnone
+declare i32 @__gesf2(float %a, float %b) nounwind readnone
+declare i32 @__gtsf2(float %a, float %b) nounwind readnone
+declare i32 @__lesf2(float %a, float %b) nounwind readnone
+declare i32 @__ltsf2(float %a, float %b) nounwind readnone
+declare i32 @__nesf2(float %a, float %b) nounwind readnone
+declare i32 @__unordsf2(float %a, float %b) nounwind readnone
+
+declare double @__floatsidf(i32 %a) nounwind readnone
+declare double @__floatunsidf(i32 %a) nounwind readnone
+declare i32 @__fixdfsi(double %a) nounwind readnone
+declare i32 @__fixunsdfsi(double %a) nounwind readnone
+
+declare float @__floatsisf(i32 %a) nounwind readnone
+declare float @__floatunsisf(i32 %a) nounwind readnone
+declare i32 @__fixsfsi(float %a) nounwind readnone
+
+declare double @__extendsfdf2(float %a) nounwind readnone
+declare float @__truncdfsf2(double %a) nounwind readnone
9 tools/llvm/lib/Target/Lemberg/LembergClusterizer.cpp
View
@@ -88,6 +88,15 @@ namespace {
&& classes[rhs] == Lemberg::MulRegisterClass) {
return true;
}
+ // big immediates come next
+ if (classes[lhs] == Lemberg::AImmRegisterClass
+ && classes[rhs] != Lemberg::AImmRegisterClass) {
+ return false;
+ }
+ if (classes[lhs] != Lemberg::AImmRegisterClass
+ && classes[rhs] == Lemberg::AImmRegisterClass) {
+ return true;
+ }
// prefer registers with fewer neighbors
if (hood[lhs]->size() != hood[rhs]->size()) {
return hood[lhs]->size() > hood[rhs]->size();
30 tools/llvm/lib/Target/Lemberg/LembergInstrInfo.td
View
@@ -600,12 +600,29 @@ def MULai: F1<(outs Mul:$dst), (ins pred:$p, A:$src1, i32imm:$src2),
"$p mul\t$src1, $src2 -> $$$dst",
[(set Mul:$dst, (LembergMul A:$src1, imm5:$src2))],
IIAlu>;
-
-// Avoid spilling for multiplication registers
+// Keep loading to general purpose register close
def : Pat<(mul A:$src1, A:$src2),
- (COPY_TO_REGCLASS (MULaa A:$src1, A:$src2), A)>;
+ (COPY_TO_REGCLASS (MULaa A:$src1, A:$src2), A)>;
def : Pat<(mul A:$src1, imm5:$src2),
- (COPY_TO_REGCLASS (MULai A:$src1, imm5:$src2), A)>;
+ (COPY_TO_REGCLASS (MULai A:$src1, imm5:$src2), A)>;
+// Multiply-accumulate when loading multiplication result
+def MACa: F1<(outs A:$dst), (ins pred:$p, Mul:$src1, A:$src2),
+ "$p ldx\t$$$src1, $src2 -> $dst",
+ [(set A:$dst, (add Mul:$src1, A:$src2))],
+ IIAlu>;
+def MACi: F1<(outs A:$dst), (ins pred:$p, Mul:$src1, i32imm:$src2),
+ "$p ldx\t$$$src1, $src2 -> $dst",
+ [(set A:$dst, (add Mul:$src1, uimm5:$src2))],
+ IIAlu>;
+// Match MAC patterns
+def : Pat<(add (mul A:$src1, A:$src2), A:$src3),
+ (MACa (MULaa A:$src1, A:$src2), A:$src3)>;
+def : Pat<(add (mul A:$src1, imm5:$src2), A:$src3),
+ (MACa (MULai A:$src1, imm5:$src2), A:$src3)>;
+def : Pat<(add (mul A:$src1, A:$src2), uimm5:$src3),
+ (MACi (MULaa A:$src1, A:$src2), uimm5:$src3)>;
+def : Pat<(add (mul A:$src1, imm5:$src2), uimm5:$src3),
+ (MACi (MULai A:$src1, imm5:$src2), uimm5:$src3)>;
// Logic operations
defm AND: OP3<and, "and", uimm5>;
@@ -1331,8 +1348,9 @@ let isCall=1, hasDelaySlot=1,
R0_20, R1_20, R2_20, R3_20,
R0_21, R1_21, R2_21, R3_21,
R0_22, R1_22, R2_22, R3_22,
- C0, C1, C2, C3,
- MUL0, MUL1, MUL2, MUL3,
+ C1, C2, C3,
+ MUL0_0, MUL1_0, MUL2_0, MUL3_0,
+ MUL0_1, MUL1_1, MUL2_1, MUL3_1,
MEM, MEMHU, MEMHS, MEMBU, MEMBS,
F0, F1, F2, F3,
F4, F5, F6, F7, F8, F9,
27 tools/llvm/lib/Target/Lemberg/LembergRegisterInfo.td
View
@@ -147,10 +147,14 @@ let Namespace = "Lemberg" in {
def MEMBS : LembergGlobalReg< 8, "membs">, DwarfRegNum<[88]>;
// Local special registers
- def MUL0 : LembergLocalReg< 9, 0, "mul0">, DwarfRegNum<[89]>;
- def MUL1 : LembergLocalReg< 9, 1, "mul1">, DwarfRegNum<[89]>;
- def MUL2 : LembergLocalReg< 9, 2, "mul2">, DwarfRegNum<[89]>;
- def MUL3 : LembergLocalReg< 9, 3, "mul3">, DwarfRegNum<[89]>;
+ def MUL0_0 : LembergLocalReg< 9, 0, "mul0.0">, DwarfRegNum<[89]>;
+ def MUL1_0 : LembergLocalReg< 9, 1, "mul1.0">, DwarfRegNum<[89]>;
+ def MUL2_0 : LembergLocalReg< 9, 2, "mul2.0">, DwarfRegNum<[89]>;
+ def MUL3_0 : LembergLocalReg< 9, 3, "mul3.0">, DwarfRegNum<[89]>;
+ def MUL0_1 : LembergLocalReg< 9, 0, "mul0.1">, DwarfRegNum<[90]>;
+ def MUL1_1 : LembergLocalReg< 9, 1, "mul1.1">, DwarfRegNum<[90]>;
+ def MUL2_1 : LembergLocalReg< 9, 2, "mul2.1">, DwarfRegNum<[90]>;
+ def MUL3_1 : LembergLocalReg< 9, 3, "mul3.1">, DwarfRegNum<[90]>;
// Global special registers for call/return
def RB : LembergGlobalReg< 10, "rb">, DwarfRegNum<[90]>;
@@ -489,13 +493,15 @@ let CopyCost = -1 in {
}
// Multiplication results
- def Mul : RegisterClass<"Lemberg", [i32], 32, [MUL0, MUL1, MUL2, MUL3] >;
+ def Mul : RegisterClass<"Lemberg", [i32], 32,
+ [MUL0_0, MUL1_0, MUL2_0, MUL3_0,
+ MUL0_1, MUL1_1, MUL2_1, MUL3_1]>;
// Classes for clusterizing multiplication results
- def M0 : RegisterClass<"Lemberg", [i32], 32, [MUL0] >;
- def M1 : RegisterClass<"Lemberg", [i32], 32, [MUL1] >;
- def M2 : RegisterClass<"Lemberg", [i32], 32, [MUL2] >;
- def M3 : RegisterClass<"Lemberg", [i32], 32, [MUL3] >;
+ def M0 : RegisterClass<"Lemberg", [i32], 32, [MUL0_0, MUL0_1] >;
+ def M1 : RegisterClass<"Lemberg", [i32], 32, [MUL1_0, MUL1_1] >;
+ def M2 : RegisterClass<"Lemberg", [i32], 32, [MUL2_0, MUL2_1] >;
+ def M3 : RegisterClass<"Lemberg", [i32], 32, [MUL3_0, MUL3_1] >;
// Class for memory result
def Mem : RegisterClass<"Lemberg", [i32], 32, [MEM, MEMHU, MEMHS, MEMBU, MEMBS] >
@@ -529,7 +535,8 @@ let CopyCost = -1 in {
// All registers that are loaded through ldx
def X : RegisterClass<"Lemberg", [i32], 32,
- [MUL0, MUL1, MUL2, MUL3,
+ [MUL0_0, MUL1_0, MUL2_0, MUL3_0,
+ MUL0_1, MUL1_1, MUL2_1, MUL3_1,
C1, C2, C3,
F0, F1, F2, F3, F4, F5, F6, F7,
F8, F9, F10, F11, F12, F13, F14, F15,

No commit comments for this range

Something went wrong with that request. Please try again.