Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base: 0cfdc4c0dd
...
compare: d5882c57ff
  • 3 commits
  • 9 files changed
  • 0 commit comments
  • 1 contributor
View
32 hw/src/alu.vhd
@@ -53,7 +53,8 @@ end alu;
architecture behavior of alu is
- signal mul_reg, mul_next : std_logic_vector(DATA_WIDTH-1 downto 0);
+ signal mul0_reg, mul0_next : std_logic_vector(DATA_WIDTH-1 downto 0);
+ signal mul1_reg, mul1_next : std_logic_vector(DATA_WIDTH-1 downto 0);
signal add_tmp, sub_tmp: std_logic_vector(DATA_WIDTH downto 0);
signal eq_tmp, lt_tmp : std_logic;
@@ -68,15 +69,17 @@ begin -- behavior
sync: process (clk, reset)
begin -- process sync
if reset = '0' then -- asynchronous reset (active low)
- mul_reg <= (others => '0');
+ mul0_reg <= (others => '0');
+ mul1_reg <= (others => '0');
elsif clk'event and clk = '1' then -- rising clock edge
if ena = '1' then
- mul_reg <= mul_next;
+ mul0_reg <= mul0_next;
+ mul1_reg <= mul1_next;
end if;
end if;
end process sync;
- alu: process (op, fl_in, mul_reg, rb_in, ro_in, ba, memdata,
+ alu: process (op, fl_in, mul0_reg, mul1_reg, rb_in, ro_in, ba, memdata,
add_tmp, sub_tmp, eq_tmp, lt_tmp, fpu_rddata)
variable valid : std_logic;
variable mul_tmp : std_logic_vector(2*DATA_WIDTH-1 downto 0);
@@ -114,7 +117,8 @@ begin -- behavior
ro_wren <= '0';
ro_out <= op.rddata0(PC_WIDTH-1 downto 0);
- mul_next <= mul_reg;
+ mul0_next <= mul0_reg;
+ mul1_next <= mul1_reg;
mul_tmp := std_logic_vector(unsigned(op.rddata0) * unsigned(op.rddata1));
if unsigned(op.rddata0) = 0 then
@@ -165,7 +169,11 @@ begin -- behavior
(ROTATE_LEFT(unsigned(op.rddata0),
to_integer(unsigned(op.rddata1(DATA_WIDTH_BITS-1 downto 0)))));
when ALU_MUL =>
- mul_next <= mul_tmp(DATA_WIDTH-1 downto 0);
+ if op.wraddr(0) = '1' then
+ mul0_next <= mul_tmp(DATA_WIDTH-1 downto 0);
+ else
+ mul1_next <= mul_tmp(DATA_WIDTH-1 downto 0);
+ end if;
when ALU_CARR =>
wren <= valid;
wrdata <= (others => '0');
@@ -353,7 +361,11 @@ begin -- behavior
end case;
when ALU_LDMUL =>
wren <= valid;
- wrdata <= mul_reg;
+ if op.rdaddr0(0) = '1' then
+ wrdata <= std_logic_vector(unsigned(mul0_reg)+unsigned(op.rddata1));
+ else
+ wrdata <= std_logic_vector(unsigned(mul1_reg)+unsigned(op.rddata1));
+ end if;
when ALU_LDFP =>
-- just fetch data, actual reading is done in FPU
wren <= valid;
@@ -365,7 +377,11 @@ begin -- behavior
fl_out(to_integer(unsigned(op.wraddr(FLAG_BITS-1 downto 0)))) <= op.rddata0(0);
when ALU_STMUL =>
if valid = '1' then
- mul_next <= op.rddata0;
+ if op.wraddr(0) = '1' then
+ mul0_next <= op.rddata0;
+ else
+ mul1_next <= op.rddata0;
+ end if;
end if;
when ALU_STFP =>
-- just deliver data, actual writing is done in FPU
View
14 hw/src/decode.vhd
@@ -529,10 +529,10 @@ begin -- behavior
when "00110" => op(i).op <= ALU_LDMEMHS;
when "00111" => op(i).op <= ALU_LDMEMBU;
when "01000" => op(i).op <= ALU_LDMEMBS;
- when "01001" => op(i).op <= ALU_LDMUL;
- when "01010" => op(i).op <= ALU_LDRB;
- when "01011" => op(i).op <= ALU_LDRO;
- when "01100" => op(i).op <= ALU_LDBA;
+ when "01001" | "01010" => op(i).op <= ALU_LDMUL;
+ when "01011" => op(i).op <= ALU_LDRB;
+ when "01100" => op(i).op <= ALU_LDRO;
+ when "01101" => op(i).op <= ALU_LDBA;
when "10000" | "10001" | "10010" | "10011" |
"10100" | "10101" | "10110" | "10111" |
"11000" | "11001" | "11010" | "11011" |
@@ -563,9 +563,9 @@ begin -- behavior
when "101101" => -- STX
case bundle_reg(i).dest is
when "00000" | "00001" | "00010" | "00011" => op(i).op <= ALU_STCOND;
- when "01001" => op(i).op <= ALU_STMUL;
- when "01010" => op(i).op <= ALU_STRB;
- when "01011" => op(i).op <= ALU_STRO;
+ when "01001" | "01010" => op(i).op <= ALU_STMUL;
+ when "01011" => op(i).op <= ALU_STRB;
+ when "01100" => op(i).op <= ALU_STRO;
when "10000" | "10001" | "10010" | "10011" |
"10100" | "10101" | "10110" | "10111" |
"11000" | "11001" | "11010" | "11011" |
View
13 tools/asm/lexer.l
@@ -246,26 +246,31 @@ $membs {
return EXT;
}
-$mul[0-9] {
+$mul[0-9]\.0 {
yylval.intval = 9;
return EXT;
}
-$rb {
+$mul[0-9]\.1 {
yylval.intval = 10;
return EXT;
}
-$ro {
+$rb {
yylval.intval = 11;
return EXT;
}
-$ba {
+$ro {
yylval.intval = 12;
return EXT;
}
+$ba {
+ yylval.intval = 13;
+ return EXT;
+ }
+
$c[0-9] {
yylval.intval = yytext[2]-'0';
return EXT;
View
4 tools/asm/parser.y
@@ -468,7 +468,7 @@ AsmOp : Condition THREEOP REG ',' Constant DEST REG
$$.op = $2;
$$.fmt.B.src1 = $3;
$$.fmt.B.src2.imm = $5;
- $$.fmt.B.dest = 0; /* destinition is implicit */
+ $$.fmt.B.dest = $7;
$$.fmt.B.imm = 1;
$$.fmt.B.cond = $1;
}
@@ -477,7 +477,7 @@ AsmOp : Condition THREEOP REG ',' Constant DEST REG
$$.op = $2;
$$.fmt.B.src1 = $3;
$$.fmt.B.src2.reg = $5;
- $$.fmt.B.dest = 0; /* destinition is implicit */
+ $$.fmt.B.dest = $7;
$$.fmt.B.imm = 0;
$$.fmt.B.cond = $1;
}
View
1  tools/etc/Makefile.rules
@@ -1,6 +1,7 @@
PREFIX=/usr/local/lemberg
LIBLLSYMS=__divdf3,__divsf3,__divdi3,__divsi3,__moddi3,__modsi3,__udivdi3,__udivsi3,__umoddi3,__umodsi3,__ashldi3,__ashrdi3,__lshrdi3,__muldi3,memset,memmove,memcpy
+LIBLLSYMS_SOFTFLOAT=__adddf3,__subdf3,__muldf3,__addsf3,__subsf3,__mulsf3,__eqdf2,__gedf2,__gtdf2,__ledf2,__ltdf2,__nedf2,__unorddf2,__eqsf2,__gesf2,__gtsf2,__lesf2,__ltsf2,__nesf2,__unordsf2,__floatsidf,__floatunsidf,__fixdfsi,__fixunsdfsi,__floatsisf,__floatunssisf,__fixsfsi,__extendsfdf2,__truncdfsf2
LLVM_LD=llvm-ld -b $@ $^ \
${PREFIX}/lib/libllsyms.o ${LLVM_LDFLAGS} \
View
27 tools/libll/libllsyms.s
@@ -28,12 +28,7 @@ declare float @__addsf3(float %a, float %b) nounwind readnone
declare float @__subsf3(float %a, float %b) nounwind readnone
declare float @__mulsf3(float %a, float %b) nounwind readnone
declare float @__divsf3(float %a, float %b) nounwind readnone
-
-declare double @__floatsidf(i32 %a) nounwind readnone
-declare double @__floatunsidf(i32 %a) nounwind readnone
-declare i32 @__fixdfsi(double %a) nounwind readnone
-declare i32 @__fixunsdfsi(double %a) nounwind readnone
-
+
declare i32 @__eqdf2(double %a, double %b) nounwind readnone
declare i32 @__gedf2(double %a, double %b) nounwind readnone
declare i32 @__gtdf2(double %a, double %b) nounwind readnone
@@ -41,3 +36,23 @@ declare i32 @__ledf2(double %a, double %b) nounwind readnone
declare i32 @__ltdf2(double %a, double %b) nounwind readnone
declare i32 @__nedf2(double %a, double %b) nounwind readnone
declare i32 @__unorddf2(double %a, double %b) nounwind readnone
+
+declare i32 @__eqsf2(float %a, float %b) nounwind readnone
+declare i32 @__gesf2(float %a, float %b) nounwind readnone
+declare i32 @__gtsf2(float %a, float %b) nounwind readnone
+declare i32 @__lesf2(float %a, float %b) nounwind readnone
+declare i32 @__ltsf2(float %a, float %b) nounwind readnone
+declare i32 @__nesf2(float %a, float %b) nounwind readnone
+declare i32 @__unordsf2(float %a, float %b) nounwind readnone
+
+declare double @__floatsidf(i32 %a) nounwind readnone
+declare double @__floatunsidf(i32 %a) nounwind readnone
+declare i32 @__fixdfsi(double %a) nounwind readnone
+declare i32 @__fixunsdfsi(double %a) nounwind readnone
+
+declare float @__floatsisf(i32 %a) nounwind readnone
+declare float @__floatunsisf(i32 %a) nounwind readnone
+declare i32 @__fixsfsi(float %a) nounwind readnone
+
+declare double @__extendsfdf2(float %a) nounwind readnone
+declare float @__truncdfsf2(double %a) nounwind readnone
View
9 tools/llvm/lib/Target/Lemberg/LembergClusterizer.cpp
@@ -88,6 +88,15 @@ namespace {
&& classes[rhs] == Lemberg::MulRegisterClass) {
return true;
}
+ // big immediates come next
+ if (classes[lhs] == Lemberg::AImmRegisterClass
+ && classes[rhs] != Lemberg::AImmRegisterClass) {
+ return false;
+ }
+ if (classes[lhs] != Lemberg::AImmRegisterClass
+ && classes[rhs] == Lemberg::AImmRegisterClass) {
+ return true;
+ }
// prefer registers with fewer neighbors
if (hood[lhs]->size() != hood[rhs]->size()) {
return hood[lhs]->size() > hood[rhs]->size();
View
30 tools/llvm/lib/Target/Lemberg/LembergInstrInfo.td
@@ -600,12 +600,29 @@ def MULai: F1<(outs Mul:$dst), (ins pred:$p, A:$src1, i32imm:$src2),
"$p mul\t$src1, $src2 -> $$$dst",
[(set Mul:$dst, (LembergMul A:$src1, imm5:$src2))],
IIAlu>;
-
-// Avoid spilling for multiplication registers
+// Keep loading to general purpose register close
def : Pat<(mul A:$src1, A:$src2),
- (COPY_TO_REGCLASS (MULaa A:$src1, A:$src2), A)>;
+ (COPY_TO_REGCLASS (MULaa A:$src1, A:$src2), A)>;
def : Pat<(mul A:$src1, imm5:$src2),
- (COPY_TO_REGCLASS (MULai A:$src1, imm5:$src2), A)>;
+ (COPY_TO_REGCLASS (MULai A:$src1, imm5:$src2), A)>;
+// Multiply-accumulate when loading multiplication result
+def MACa: F1<(outs A:$dst), (ins pred:$p, Mul:$src1, A:$src2),
+ "$p ldx\t$$$src1, $src2 -> $dst",
+ [(set A:$dst, (add Mul:$src1, A:$src2))],
+ IIAlu>;
+def MACi: F1<(outs A:$dst), (ins pred:$p, Mul:$src1, i32imm:$src2),
+ "$p ldx\t$$$src1, $src2 -> $dst",
+ [(set A:$dst, (add Mul:$src1, uimm5:$src2))],
+ IIAlu>;
+// Match MAC patterns
+def : Pat<(add (mul A:$src1, A:$src2), A:$src3),
+ (MACa (MULaa A:$src1, A:$src2), A:$src3)>;
+def : Pat<(add (mul A:$src1, imm5:$src2), A:$src3),
+ (MACa (MULai A:$src1, imm5:$src2), A:$src3)>;
+def : Pat<(add (mul A:$src1, A:$src2), uimm5:$src3),
+ (MACi (MULaa A:$src1, A:$src2), uimm5:$src3)>;
+def : Pat<(add (mul A:$src1, imm5:$src2), uimm5:$src3),
+ (MACi (MULai A:$src1, imm5:$src2), uimm5:$src3)>;
// Logic operations
defm AND: OP3<and, "and", uimm5>;
@@ -1331,8 +1348,9 @@ let isCall=1, hasDelaySlot=1,
R0_20, R1_20, R2_20, R3_20,
R0_21, R1_21, R2_21, R3_21,
R0_22, R1_22, R2_22, R3_22,
- C0, C1, C2, C3,
- MUL0, MUL1, MUL2, MUL3,
+ C1, C2, C3,
+ MUL0_0, MUL1_0, MUL2_0, MUL3_0,
+ MUL0_1, MUL1_1, MUL2_1, MUL3_1,
MEM, MEMHU, MEMHS, MEMBU, MEMBS,
F0, F1, F2, F3,
F4, F5, F6, F7, F8, F9,
View
27 tools/llvm/lib/Target/Lemberg/LembergRegisterInfo.td
@@ -147,10 +147,14 @@ let Namespace = "Lemberg" in {
def MEMBS : LembergGlobalReg< 8, "membs">, DwarfRegNum<[88]>;
// Local special registers
- def MUL0 : LembergLocalReg< 9, 0, "mul0">, DwarfRegNum<[89]>;
- def MUL1 : LembergLocalReg< 9, 1, "mul1">, DwarfRegNum<[89]>;
- def MUL2 : LembergLocalReg< 9, 2, "mul2">, DwarfRegNum<[89]>;
- def MUL3 : LembergLocalReg< 9, 3, "mul3">, DwarfRegNum<[89]>;
+ def MUL0_0 : LembergLocalReg< 9, 0, "mul0.0">, DwarfRegNum<[89]>;
+ def MUL1_0 : LembergLocalReg< 9, 1, "mul1.0">, DwarfRegNum<[89]>;
+ def MUL2_0 : LembergLocalReg< 9, 2, "mul2.0">, DwarfRegNum<[89]>;
+ def MUL3_0 : LembergLocalReg< 9, 3, "mul3.0">, DwarfRegNum<[89]>;
+ def MUL0_1 : LembergLocalReg< 9, 0, "mul0.1">, DwarfRegNum<[90]>;
+ def MUL1_1 : LembergLocalReg< 9, 1, "mul1.1">, DwarfRegNum<[90]>;
+ def MUL2_1 : LembergLocalReg< 9, 2, "mul2.1">, DwarfRegNum<[90]>;
+ def MUL3_1 : LembergLocalReg< 9, 3, "mul3.1">, DwarfRegNum<[90]>;
// Global special registers for call/return
def RB : LembergGlobalReg< 10, "rb">, DwarfRegNum<[90]>;
@@ -489,13 +493,15 @@ let CopyCost = -1 in {
}
// Multiplication results
- def Mul : RegisterClass<"Lemberg", [i32], 32, [MUL0, MUL1, MUL2, MUL3] >;
+ def Mul : RegisterClass<"Lemberg", [i32], 32,
+ [MUL0_0, MUL1_0, MUL2_0, MUL3_0,
+ MUL0_1, MUL1_1, MUL2_1, MUL3_1]>;
// Classes for clusterizing multiplication results
- def M0 : RegisterClass<"Lemberg", [i32], 32, [MUL0] >;
- def M1 : RegisterClass<"Lemberg", [i32], 32, [MUL1] >;
- def M2 : RegisterClass<"Lemberg", [i32], 32, [MUL2] >;
- def M3 : RegisterClass<"Lemberg", [i32], 32, [MUL3] >;
+ def M0 : RegisterClass<"Lemberg", [i32], 32, [MUL0_0, MUL0_1] >;
+ def M1 : RegisterClass<"Lemberg", [i32], 32, [MUL1_0, MUL1_1] >;
+ def M2 : RegisterClass<"Lemberg", [i32], 32, [MUL2_0, MUL2_1] >;
+ def M3 : RegisterClass<"Lemberg", [i32], 32, [MUL3_0, MUL3_1] >;
// Class for memory result
def Mem : RegisterClass<"Lemberg", [i32], 32, [MEM, MEMHU, MEMHS, MEMBU, MEMBS] >
@@ -529,7 +535,8 @@ let CopyCost = -1 in {
// All registers that are loaded through ldx
def X : RegisterClass<"Lemberg", [i32], 32,
- [MUL0, MUL1, MUL2, MUL3,
+ [MUL0_0, MUL1_0, MUL2_0, MUL3_0,
+ MUL0_1, MUL1_1, MUL2_1, MUL3_1,
C1, C2, C3,
F0, F1, F2, F3, F4, F5, F6, F7,
F8, F9, F10, F11, F12, F13, F14, F15,

No commit comments for this range

Something went wrong with that request. Please try again.