# Instalando extensão Verilog

In [1]:
!pip install git+https://github.com/lesc-ufv/cad4u
!git clone https://github.com/lesc-ufv/cad4u
%load_ext plugin

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/lesc-ufv/cad4u
  Cloning https://github.com/lesc-ufv/cad4u to /tmp/pip-req-build-ksnhzo1r
  Running command git clone -q https://github.com/lesc-ufv/cad4u /tmp/pip-req-build-ksnhzo1r
fatal: destination path 'cad4u' already exists and is not an empty directory.


# Instalando riscV-assembler

In [2]:
!pip install riscv-assembler

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# Montador RiscV
Executando códigos para ser reaproveitados abaixo

In [3]:
%%writefile simple.s
add x1,x1,x1

Overwriting simple.s


In [4]:
from riscv_assembler.convert import AssemblyConverter
!rm -f -r simple/*
def assemble():
  cnv = AssemblyConverter(output_type = "t") #just text
  cnv.convert("simple.s")
  # Using readlines()
  file1 = open('simple/txt/simple.txt', 'r')
  Lines = file1.readlines()
  file1 = open('im_data.txt', 'w')


  for line in Lines:
    decimal_representation = int(line, 2)
    hexadecimal_string = hex(decimal_representation)
    file1.write(hexadecimal_string[2:]+'\n')

  file1.close()

# Risc-V

# Arquivos de registradores

In [5]:
%%writefile rm_data.txt
0
01
02
03
04
05
06
07
08
09
0A
0B
0C
0D
0E
0F
10
11
12
13

Overwriting rm_data.txt


## Memória de dados

In [6]:
%%writefile dm_data.txt
0
1
2
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19

Overwriting dm_data.txt


## Memória de instruções

#### Exemplo de programa

In [7]:
%%writefile simple.s
j x1, exit
add x1,x2,x3
add x4,x5,x6
add x7,x8,x9
sub x10,x11,x12
add x1,x1,x1
exit:
add x2, x3, x4

Overwriting simple.s


In [8]:
from riscv_assembler.convert import AssemblyConverter
!rm -f -r simple/*
cnv = AssemblyConverter(output_type = "t") #just text
cnv.convert("simple.s")
# Using readlines()
file1 = open('simple/txt/simple.txt', 'r')
Lines = file1.readlines()
file1 = open('im_data.txt', 'w')


for line in Lines:
   decimal_representation = int(line, 2)
   hexadecimal_string = hex(decimal_representation)
   file1.write(hexadecimal_string[2:]+'\n')

file1.close()

------Writing to Text file------
Output file: simple.txt
Number of instructions: 7


In [9]:
!cat simple/txt/simple.txt

10000000001001111011000001101111
00000000001100010000000010110011
00000000011000101000001000110011
00000000100101000000001110110011
01000000110001011000010100110011
00000000000100001000000010110011
00000000010000011000000100110011


In [10]:
!cat im_data.txt

8027b06f
3100b3
628233
9403b3
40c58533
1080b3
418133


### Ou digite suas instruções RiscV diretamente, em modo hexadecimal

In [11]:
%%writefile im_data.txt
20090001
01294820
01e9502a
1140fffd
2129ffff
2129ffff
2129ffff

Overwriting im_data.txt


# Código dos 5 estágios do RiscV

## Arquivo verilog registrador genérico

In [12]:
%%writefile regr.v
module regr (	input clk,input clear,input hold,
	input wire [N-1:0] in,
	output reg [N-1:0] out);

	parameter N = 1;

	always @(posedge clk) begin
		if (clear)
			out <= {N{1'b0}};
		else if (hold)
			out <= out;
		else
			out <= in;
	end
endmodule


Overwriting regr.v


## Arquivo verilog de registrador e memória de instruções

In [13]:
%%writefile im_reg.v
module im(input wire			clk,input wire 	[31:0] 	addr,output wire [31:0] 	data);
	parameter NMEM = 128;   // Number of memory entries,
							// not the same as the memory size
	parameter IM_DATA = "im_data.txt";  // file to read data from
	reg [31:0] mem [0:127];  // 32-bit memory with 128 entries
	initial begin
		$readmemh(IM_DATA, mem, 0, NMEM-1);
	end
	assign data = mem[addr[8:2]][31:0];
endmodule

module regm(		input wire			clk,input wire  [4:0]	read1, read2,
		output wire [31:0]	data1, data2,input wire			regwrite, input wire	[4:0]	wrreg,input wire	[31:0]	wrdata);

	parameter NMEM = 20;   // Number of memory entries,
							// not the same as the memory size
	parameter RM_DATA = "rm_data.txt";  // file to read data from

	reg [31:0] mem [0:31];  // 32-bit memory with 32 entries

	initial begin
		$readmemh(RM_DATA, mem, 0, NMEM-1);
	end
	reg [31:0] _data1, _data2;
	always @(*) begin
		if (read1 == 5'd0)
			_data1 = 32'd0;
		else if ((read1 == wrreg) && regwrite)
			_data1 = wrdata;
		else
			_data1 = mem[read1][31:0];
	end

	always @(*) begin
		if (read2 == 5'd0)
			_data2 = 32'd0;
		else if ((read2 == wrreg) && regwrite)
			_data2 = wrdata;
		else
			_data2 = mem[read2][31:0];
	end

	assign data1 = _data1;
	assign data2 = _data2;

	always @(posedge clk) begin
		if (regwrite && wrreg != 5'd0) begin
			// write a non $zero register
			mem[wrreg] <= wrdata;
		end
	end
endmodule


Overwriting im_reg.v


## Arquivo verilog memória de dados

In [14]:
%%writefile datam.v
module dm(
		input wire			clk,
		input wire	[6:0]	addr,
		input wire			rd, wr,
		input wire 	[31:0]	wdata,
		output wire	[31:0]	rdata);
	parameter NMEM = 20;   // Number of memory entries,
							// not the same as the memory size
	parameter RM_DATA = "dm_data.txt";  // file to read data from

	reg [31:0] mem [0:127];  // 32-bit memory with 128 entries

        initial begin
		$readmemh(RM_DATA, mem, 0, NMEM-1);
	end
		

	always @(posedge clk) begin
		if (wr) begin
			mem[addr] <= wdata;
		end
	end

	assign rdata = wr ? wdata : mem[addr];
	// During a write, avoid the one cycle delay by reading from 'wdata'

endmodule




Overwriting datam.v


## Alu e controle da Alu

In [15]:
%%writefile alu.v
module alu(
		input				[3:0]		ctl,
		input				[31:0]	a, b,
		output reg	[31:0]	out,
		output							zero);

	wire [31:0] sub_ab;
	wire [31:0] add_ab;
	wire 				oflow_add;
	wire 				oflow_sub;
	wire 				oflow;
	wire 				slt;

	assign zero = (0 == out);

	assign sub_ab = a - b;
	assign add_ab = a + b;
	assign oflow_add = (a[31] == b[31] && add_ab[31] != a[31]) ? 1 : 0;
	assign oflow_sub = (a[31] == b[31] && sub_ab[31] != a[31]) ? 1 : 0;
	assign oflow = (ctl == 4'b0010) ? oflow_add : oflow_sub;
	// set if less than, 2s compliment 32-bit numbers
	assign slt = oflow_sub ? ~(a[31]) : a[31];
	always @(*) begin
		case (ctl)
			4'b0000: out <= a & b;			/* and, andi */
			4'b0001: out <= a | b;			/* or, ori */
			4'b0010: out <= add_ab;			/* add, addi */
			4'b0011: out <= a << b;			/* sll, slli */
			4'b0110: out <= sub_ab;			/* sub */
			4'b0111: out <= a ^ b;			/* xor, xori */
			// Problema 2 - Inclusão necessária para implementar a operação srli
			4'b1000: out <= a >> b;			/* srl, srli */
			4'b1010: out <= a >>> b;		/* sra, srai */
			default: out <= {32{1'bx}};
		endcase
	end

endmodule


module alu_control(
		input wire [3:0] funct,
		input wire [1:0] aluop,
		output reg [3:0] aluctl);

	reg [3:0] _funct;

	always @(*) begin
		case(funct[3:0])
			4'b0000:  _funct = 4'b0010;		/* add, addi */
			4'b1000:  _funct = 4'b0110;		/* sub */
			// Problema 1 - Alterações necessárias para implementar andi
			4'b0111:  _funct = 4'b0000;		/* and, andi */
			4'b0110:  _funct = 4'b0001;		/* or, ori */
			4'b0001: 	_funct = 4'b0011;		/* sll, slli */
			4'b0010:  _funct = 4'b0100;		/* slt, slti */
			4'b0011:  _funct = 4'b0101;		/* sltu, sltui */
			4'b0100:	_funct = 4'b0111;		/* xor, xori */
			// Problema 2 - Alterações necessárias para implementar srli
			4'b0101:  _funct = 4'b1000;		/* srl, srli */
			4'b1101:  _funct = 4'b1010;	  /* sra, srai */
			default: _funct = 4'bxxxx;//al
		endcase
	end

	always @(*) begin
		case(aluop)
			2'b00: aluctl = 4'b0010;		/* add */
			2'b01: aluctl = 4'b0110;
			2'b10: aluctl = _funct;			/* add, sub, and, or, sll, slt, sltu, xor, srl, sra */
			// Problemas 1 e 2 - Alteração para realizar operações com imediato
			2'b11: aluctl = _funct;			/* addi, andi, ori, slli, slti, sltui, xori, srli, srai */
			default: aluctl = 0;
		endcase
	end

endmodule


Overwriting alu.v


## Unidade de controle

In [16]:
%%writefile control.v
module control(
		input  wire	[6:0]	opcode,
		output reg				branch_eq, branch_ne, branch_lt,
		output reg [1:0]	aluop,
		output reg				memread, memwrite, memtoreg,
		output reg				regdst, regwrite, alusrc,
		output reg				jump,
    output reg [31:0] ImmGen,
    input [31:0] inst);
    wire[2:0] f3 = inst[14:12]; //funct3 para diferenciar as instruções de branch
	always @(*) begin
		/* defaults */
		aluop[1:0]	<= 2'b10;
		alusrc			<= 1'b0;
		branch_eq		<= 1'b0;
		branch_ne		<= 1'b0;
		memread			<= 1'b0;
		memtoreg		<= 1'b0;
		memwrite		<= 1'b0;
		regdst			<= 1'b1;
		regwrite		<= 1'b1;
		jump				<= 1'b0;

		case (opcode)
     7'b0000011: begin /* L-Type: lb, lh, lw, lbu, lhu */
        	alusrc   		<= 1'b1;
          aluop[1:0]	<= 2'b00;
        	memtoreg 		<= 1'b1;                 
        	regwrite 		<= 1'b1;    
        	memread  		<= 1'b1;
        	ImmGen   		<= {{20{inst[31]}},inst[31:20]};
      	end
			7'b0010011: begin	/* I-Type: addi, slli, slti, sltiu, xori, srli, srai, ori, andi */
					// Problemas 1 e 2 - Alteração necessária para permitir ALUOp = 11
					aluop[1:0]  <= 2'b11;
					alusrc 			<= 1'b1;
					ImmGen			<= {{20{inst[31]}},inst[31:20]};
				end
      7'b1100011: begin /* B-Type: beq, bne, blt, bge, bltu, bgeu */
					aluop[1:0] 	<= 2'b01;
    	    ImmGen   		<= {{19{inst[31]}},inst[31],inst[7],inst[30:25],inst[11:8],1'b0};
          regwrite  	<= 1'b0;
          branch_eq 	<= (f3 == 3'b000) ? 1'b1 : 1'b0;
          branch_ne 	<= (f3 == 3'b001) ? 1'b1 : 1'b0;
          branch_lt 	<= (f3 == 3'b100) ? 1'b1 : 1'b0;
		  end
			7'b0100011: begin	/* S-Type: sb; sh; sw */
					memwrite 		<= 1'b1;
					aluop[1:0] 	<= 2'b00;
					alusrc  	 	<= 1'b1;
					regwrite 		<= 1'b0;
					ImmGen   		<= {{20{inst[31]}},inst[31:25],inst[11:7]};
			end
			7'b0110011: begin	/* R-Type: add, sub, sll, slt, sltu, xor, srl, sra, or, and */
			end
			// Problema 3: Alteração para implementar o jump
			7'b1101111: begin	/* J-Type: j */
					jump 			 	<= 1'b1;
					aluop[1:0] 	<= 2'b01;
					ImmGen		 	<= {{11{inst[31]}},inst[20],inst[10:1],inst[11],inst[19:12], 1'b0};
					branch_eq 	<= 1'b1;
			end
		endcase
	end
endmodule

Overwriting control.v


<img src="https://raw.githubusercontent.com/cacauvicosa/mips/master/michael/riscv/riscv-single/riscv-single.png" width="600">

## Processador RISCV


### RiscV Opcodes bits e Registradores


<img src="https://raw.githubusercontent.com/arduinoufv/inf251/main/figures/mips_opcode_regs.png" alt="wscad2020" width="300"/>  <img src="https://raw.githubusercontent.com/arduinoufv/inf251/main/figures/riscV_opcode_regs.png" alt="wscad2020" width="300"/> 

### Código de decodificação

In [17]:
%%writefile decodefields.v
wire [5:0]  opcode; wire [6:0]  opcoderv; 
	wire [4:0]  rs;     wire [4:0]  rs1;
	wire [4:0]  rt;     wire [4:0]  rs2;
	wire [4:0]  rd;
                      wire [6:0] func7; wire [2:0] func3;
	wire [15:0] imm;
	wire [4:0]  shamt;
	wire [31:0] jaddr_s2;
	wire [31:0] seimm;  // sign extended immediate
	//
	assign opcode   = inst_s2[31:26];  assign opcoderv   = inst_s2[6:0]; 
	assign rs       = inst_s2[25:21];  assign rs2       = inst_s2[24:20];
	assign rt       = inst_s2[20:16];  assign rs1       = inst_s2[19:15];
	assign rd       = inst_s2[11:7];
                                      assign func7       = inst_s2[31:25];
                                      assign func3       = inst_s2[14:12];
	assign imm      = inst_s2[15:0];
	assign shamt    = inst_s2[10:6];
	assign jaddr_s2 = {pc[31:28], inst_s2[25:0], {2{1'b0}}};
	assign seimm 	= {{16{inst_s2[15]}}, inst_s2[15:0]};

  // register file
	wire [31:0] data1, data2;
	regm regm1(.clk(clk), .read1(rs1), .read2(rs2),
			.data1(data1), .data2(data2),
			.regwrite(regwrite_s5), .wrreg(wrreg_s5),
			.wrdata(wrdata_s5));

      // control (opcode -> ...)
	wire		regdst;
	wire		branch_eq_s2;
	wire		branch_ne_s2;
  wire		branch_lt_s2;
	wire		memread;
	wire		memwrite;
	wire		memtoreg;
	wire [1:0]	aluop;
	wire		regwrite;
	wire		alusrc;
	wire		jump_s2;
  wire [31:0] ImmGen;  // RISCV
	//
  //agora passa blt para o control
	control ctl1(.opcode(opcoderv), .regdst(regdst),
				.branch_eq(branch_eq_s2), .branch_ne(branch_ne_s2), .branch_lt(branch_lt_s2),
				.memread(memread),
				.memtoreg(memtoreg), .aluop(aluop),
				.memwrite(memwrite), .alusrc(alusrc),
				.regwrite(regwrite), .jump(jump_s2), .ImmGen(ImmGen), .inst(inst_s2));

  	// pass rs to stage 3 (for forwarding)
	wire [4:0] rs_s3;     	wire [4:0] rs1_s3;
	regr #(.N(5)) regr_s2_rs(.clk(clk), .clear(1'b0), .hold(stall_s1_s2),
				.in(rs1), .out(rs1_s3));

	// transfer seimm, rt, and rd to stage 3
	wire [31:0] seimm_s3;
	wire [4:0] 	rt_s3;    wire [4:0] rs2_s3;
	wire [4:0] 	rd_s3;
	regr #(.N(32)) reg_s2_seimm(.clk(clk), .clear(flush_s2), .hold(stall_s1_s2),
						.in(ImmGen), .out(seimm_s3));  // RISCV
	regr #(.N(10)) reg_s2_rt_rd(.clk(clk), .clear(flush_s2), .hold(stall_s1_s2),
						.in({rs2, rd}), .out({rs2_s3, rd_s3}));

  // shift left, seimm
	wire [31:0] seimm_sl2;
	assign seimm_sl2 = {seimm[29:0], 2'b0};  // shift left 2 bits
	// branch address
	wire [31:0] baddr_s2;
	assign baddr_s2 = pc4_s2 + ImmGen;

  wire [3:0] func_s3;
  
  	regr #(.N(4)) func7_3_s2(.clk(clk), .clear(1'b0), .hold(stall_s1_s2),
						.in({func7[5],func3}), .out(func_s3));




Overwriting decodefields.v


### RiscV Immediate and Funct Alucontrol

<img src="https://raw.githubusercontent.com/arduinoufv/inf251/main/figures/mips_imgen_aluctrl.png" alt="wscad2020" width="300"/>  <img src="https://raw.githubusercontent.com/arduinoufv/inf251/main/figures/imgen_aluctrl_RiscV.png" alt="wscad2020" width="300"/> 

### Estágio de execução

In [18]:
%%writefile execution_newcode.v
// ALU
	// second ALU input can come from an immediate value or data
	wire [31:0] alusrc_data2;
	assign alusrc_data2 = (alusrc_s3) ? seimm_s3 : fw_data2_s3;
	// ALU control
	wire [3:0] aluctl;
	wire [5:0] funct;
	assign funct = seimm_s3[5:0];
	alu_control alu_ctl1(.funct(func_s3), .aluop(aluop_s3), .aluctl(aluctl));
	// ALU
	wire [31:0]	alurslt;
  wire zero_s3;
	alu alu1(.ctl(aluctl), .a(fw_data1_s3), .b(alusrc_data2), .out(alurslt),
									.zero(zero_s3));

                  	// write register
	wire [4:0]	wrreg;
	wire [4:0]	wrreg_s4;
	assign wrreg = (regdst_s3) ? rd_s3 : rs2_s3;


Overwriting execution_newcode.v


### Mips e RISCV Formato de instruções

<img src="https://raw.githubusercontent.com/arduinoufv/inf251/main/figures/mips_formato_instrucoes.png" alt="wscad2020" width="500"/>  <img src="https://raw.githubusercontent.com/arduinoufv/inf251/main/figures/riscVaddsubaddildsd.png" alt="wscad2020" width="500"/> 

### DATAPATH

<img src="https://raw.githubusercontent.com/arduinoufv/inf251/main/figures/riscvhazard_forward.png" alt="wscad2020" width="800"/> 


### Código main

In [19]:
%%writefile main.v
/*
 * cpu. - five stage MIPS CPU.
 *
 */


`include "regr.v"
`include "im_reg.v"
`include "alu.v"
`include "control.v"
`include "datam.v"



module cpu(
		input wire clk);

	parameter NMEM = 7;  // number in instruction memory
	parameter IM_DATA = "im_data.txt";

	wire regwrite_s5;
	wire [4:0] wrreg_s5;
	wire [31:0]	wrdata_s5;
	reg stall_s1_s2;

	// {{{ flush control
	reg flush_s1, flush_s2, flush_s3;
	always @(*) begin
		flush_s1 <= 1'b0;
		flush_s2 <= 1'b0;
		flush_s3 <= 1'b0;
		if (pcsrc | jump_s4) begin
			flush_s1 <= 1'b1;
			flush_s2 <= 1'b1;
			flush_s3 <= 1'b1;
		end
	end
	// }}}

	// {{{ stage 1, IF (fetch)

	reg  [5:0] clock_counter;
	initial begin
		clock_counter <= 6'd1;
	end
        always @(posedge clk) begin
                clock_counter <= clock_counter + 1;
	end

	reg  [31:0] pc;
	initial begin
		pc <= 32'd0;
	end

	wire [31:0] pc4;  // PC + 4
	assign pc4 = pc + 4;   

	always @(posedge clk) begin
		if (stall_s1_s2) 
			pc <= pc;
		else if (pcsrc == 1'b1)
			pc <= baddr_s4;
		else if (jump_s4 == 1'b1)
			pc <= jaddr_s4;
		else
			pc <= pc4;
	end

	// pass PC + 4 to stage 2
	wire [31:0] pc4_s2;
	regr #(.N(32)) regr_pc4_s2(.clk(clk),
						.hold(stall_s1_s2), .clear(flush_s1),
						.in(pc), .out(pc4_s2));

	// instruction memory
	wire [31:0] inst;
	wire [31:0] inst_s2;
	im #(.NMEM(NMEM), .IM_DATA(IM_DATA))
		im1(.clk(clk), .addr(pc), .data(inst));
	regr #(.N(32)) regr_im_s2(.clk(clk),
						.hold(stall_s1_s2), .clear(flush_s1),
						.in(inst), .out(inst_s2));

	// }}}

	// {{{ stage 2, ID (decode)

`include "decodefields.v"


	// transfer register data to stage 3
	wire [31:0]	data1_s3, data2_s3;
	regr #(.N(64)) reg_s2_mem(.clk(clk), .clear(flush_s2), .hold(stall_s1_s2),
				.in({data1, data2}),
				.out({data1_s3, data2_s3}));


	// transfer PC + 4 to stage 3
	wire [31:0] pc4_s3;
	regr #(.N(32)) reg_pc4_s2(.clk(clk), .clear(1'b0), .hold(stall_s1_s2),
						.in(pc4_s2), .out(pc4_s3));

	


	// transfer the control signals to stage 3
	wire		regdst_s3;
	wire		memread_s3;
	wire		memwrite_s3;
	wire		memtoreg_s3;
	wire [1:0]	aluop_s3;
	wire		regwrite_s3;
	wire		alusrc_s3;
	// A bubble is inserted by setting all the control signals
	// to zero (stall_s1_s2).
	regr #(.N(8)) reg_s2_control(.clk(clk), .clear(stall_s1_s2), .hold(1'b0),
			.in({regdst, memread, memwrite,
					memtoreg, aluop, regwrite, alusrc}),
			.out({regdst_s3, memread_s3, memwrite_s3,
					memtoreg_s3, aluop_s3, regwrite_s3, alusrc_s3}));

	wire branch_eq_s3, branch_ne_s3, branch_lt_s3;
	regr #(.N(3)) branch_s2_s3(.clk(clk), .clear(flush_s2), .hold(1'b0),
				.in({branch_eq_s2, branch_ne_s2,branch_lt_s2}),
				.out({branch_eq_s3, branch_ne_s3,branch_lt_s3}));

	wire [31:0] baddr_s3;
	regr #(.N(32)) baddr_s2_s3(.clk(clk), .clear(flush_s2), .hold(1'b0),
				.in(baddr_s2), .out(baddr_s3));

	wire jump_s3;
	regr #(.N(1)) reg_jump_s3(.clk(clk), .clear(flush_s2), .hold(1'b0),
				.in(jump_s2),
				.out(jump_s3));

	wire [31:0] jaddr_s3;
	regr #(.N(32)) reg_jaddr_s3(.clk(clk), .clear(flush_s2), .hold(1'b0),
				.in(jaddr_s2), .out(jaddr_s3));
	// }}}

	// {{{ stage 3, EX (execute)

	reg [31:0] fw_data1_s3;
`include "execution_newcode.v"

	// pass through some control signals to stage 4
	wire regwrite_s4;
	wire memtoreg_s4;
	wire memread_s4;
	wire memwrite_s4;
	regr #(.N(4)) reg_s3(.clk(clk), .clear(flush_s2), .hold(1'b0),
				.in({regwrite_s3, memtoreg_s3, memread_s3,
						memwrite_s3}),
				.out({regwrite_s4, memtoreg_s4, memread_s4,
						memwrite_s4}));


	always @(*)
	case (forward_a)
			2'd1: fw_data1_s3 = alurslt_s4;
			2'd2: fw_data1_s3 = wrdata_s5;
		 default: fw_data1_s3 = data1_s3;
	endcase

	wire zero_s4;
	regr #(.N(1)) reg_zero_s3_s4(.clk(clk), .clear(1'b0), .hold(1'b0),
					.in(zero_s3), .out(zero_s4));

	// pass ALU result and zero to stage 4
	wire [31:0]	alurslt_s4;
	regr #(.N(32)) reg_alurslt(.clk(clk), .clear(flush_s3), .hold(1'b0),
				.in({alurslt}),
				.out({alurslt_s4}));

	// pass data2 to stage 4
	wire [31:0] data2_s4;
	reg [31:0] fw_data2_s3;
	always @(*)
	case (forward_b)
			2'd1: fw_data2_s3 = alurslt_s4;
			2'd2: fw_data2_s3 = wrdata_s5;
		 default: fw_data2_s3 = data2_s3;
	endcase
	regr #(.N(32)) reg_data2_s3(.clk(clk), .clear(flush_s3), .hold(1'b0),
				.in(fw_data2_s3), .out(data2_s4));


	// pass to stage 4
	regr #(.N(5)) reg_wrreg(.clk(clk), .clear(flush_s3), .hold(1'b0),
				.in(wrreg), .out(wrreg_s4));

	wire branch_eq_s4, branch_ne_s4, branch_lt_s4;
	regr #(.N(3)) branch_s3_s4(.clk(clk), .clear(flush_s3), .hold(1'b0),
				.in({branch_eq_s3, branch_ne_s3,branch_lt_s3}),
				.out({branch_eq_s4, branch_ne_s4,branch_lt_s4}));

	wire [31:0] baddr_s4;
	regr #(.N(32)) baddr_s3_s4(.clk(clk), .clear(flush_s3), .hold(1'b0),
				.in(baddr_s3), .out(baddr_s4));

	wire jump_s4;
	regr #(.N(1)) reg_jump_s4(.clk(clk), .clear(flush_s3), .hold(1'b0),
				.in(jump_s3),
				.out(jump_s4));

	wire [31:0] jaddr_s4;
	regr #(.N(32)) reg_jaddr_s4(.clk(clk), .clear(flush_s3), .hold(1'b0),
				.in(jaddr_s3), .out(jaddr_s4));
	// }}}

	// {{{ stage 4, MEM (memory)

	// pass regwrite and memtoreg to stage 5
	wire memtoreg_s5;
	regr #(.N(2)) reg_regwrite_s4(.clk(clk), .clear(1'b0), .hold(1'b0),
				.in({regwrite_s4, memtoreg_s4}),
				.out({regwrite_s5, memtoreg_s5}));

	// data memory
	wire [31:0] rdata;
	dm dm1(.clk(clk), .addr(alurslt_s4[8:2]), .rd(memread_s4), .wr(memwrite_s4),
			.wdata(data2_s4), .rdata(rdata));
	// pass read data to stage 5
	wire [31:0] rdata_s5;
	regr #(.N(32)) reg_rdata_s4(.clk(clk), .clear(1'b0), .hold(1'b0),
				.in(rdata),
				.out(rdata_s5));

	// pass alurslt to stage 5
	wire [31:0] alurslt_s5;
	regr #(.N(32)) reg_alurslt_s4(.clk(clk), .clear(1'b0), .hold(1'b0),
				.in(alurslt_s4),
				.out(alurslt_s5));

	// pass wrreg to stage 5
	regr #(.N(5)) reg_wrreg_s4(.clk(clk), .clear(1'b0), .hold(1'b0),
				.in(wrreg_s4),
				.out(wrreg_s5));

	// branch
	reg pcsrc;
	always @(*) begin
		case (1'b1)
			branch_eq_s4: pcsrc <= zero_s4;
			branch_ne_s4: pcsrc <= ~(zero_s4);
			branch_lt_s4: pcsrc <= alurslt_s4[31];

			default: pcsrc <= 1'b0;
		endcase
	end
	// }}}
			
	// {{{ stage 5, WB (write back)

	assign wrdata_s5 = (memtoreg_s5 == 1'b1) ? rdata_s5 : alurslt_s5;

	// }}}

	// {{{ forwarding

	// stage 3 (MEM) -> stage 2 (EX)
	// stage 4 (WB) -> stage 2 (EX)

	reg [1:0] forward_a;
	reg [1:0] forward_b;
	always @(*) begin
		// If the previous instruction (stage 4) would write,
		// and it is a value we want to read (stage 3), forward it.

		// data1 input to ALU
		if ((regwrite_s4 == 1'b1) && (wrreg_s4 == rs1_s3)) begin
			forward_a <= 2'd1;  // stage 4
		end else if ((regwrite_s5 == 1'b1) && (wrreg_s5 == rs1_s3)) begin
			forward_a <= 2'd2;  // stage 5
		end else
			forward_a <= 2'd0;  // no forwarding

		// data2 input to ALU
		if ((regwrite_s4 == 1'b1) & (wrreg_s4 == rs2_s3)) begin
			forward_b <= 2'd1;  // stage 5
		end else if ((regwrite_s5 == 1'b1) && (wrreg_s5 == rs2_s3)) begin
			forward_b <= 2'd2;  // stage 5
		end else
			forward_b <= 2'd0;  // no forwarding
	end
	// }}}

	// {{{ load use data hazard detection, signal stall

	/* If an operation in stage 4 (MEM) loads from memory (e.g. lw)
	 * and the operation in stage 3 (EX) depends on this value,
	 * a stall must be performed.  The memory read cannot 
	 * be forwarded because memory access is too slow.  It can
	 * be forwarded from stage 5 (WB) after a stall.
	 *
	 *   lw $1, 16($10)  ; I-type, rt_s3 = $1, memread_s3 = 1
	 *   sw $1, 32($12)  ; I-type, rt_s2 = $1, memread_s2 = 0
	 *
	 *   lw $1, 16($3)  ; I-type, rt_s3 = $1, memread_s3 = 1
	 *   sw $2, 32($1)  ; I-type, rt_s2 = $2, rs_s2 = $1, memread_s2 = 0
	 *
	 *   lw  $1, 16($3)  ; I-type, rt_s3 = $1, memread_s3 = 1
	 *   add $2, $1, $1  ; R-type, rs_s2 = $1, rt_s2 = $1, memread_s2 = 0
	 */
	always @(*) begin
		if (memread_s3 == 1'b1 && ((rs2 == rd_s3) || (rs1 == rd_s3)) ) begin
			stall_s1_s2 <= 1'b1;  // perform a stall
		end else
			stall_s1_s2 <= 1'b0;  // no stall
	end
	// }}}

endmodule


module top;
reg clk;

initial begin
  clk=0;
     forever #1 clk = ~clk;  
end 
//altere aqui, de acordo com o numero de instruções no programa
parameter nInstrucoes = 50;
cpu #(nInstrucoes)CPU(clk);
initial begin
    $dumpfile("test.vcd");
    $dumpvars(0,top);
    #256
    $writememh("mem.data", top.CPU.dm1.mem, 0, 15);
    $writememh("reg.data", top.CPU.regm1.mem, 0, 15);
    $dumpoff;
    $finish;
    end

endmodule


Overwriting main.v


## Formas de onda

(Caso apareça warning relacionado a im_data.txt, desconsidere. Isso significa que a memória de dados está com poucas instruções)

In [None]:
%%verilog
`include "main.v"

### Fetch

In [None]:
%%waveform test.vcd

sign_list = ['top.clk','top.CPU.clock_counter','top.CPU.pc','top.CPU.inst', 'top.CPU.rs1','top.CPU.rs2', 'top.CPU.rd','top.CPU.alurslt' ]
time_begin = 0
time_end = 40
base = 'hex' # bin, dec, dec2, hex, octal


### Decode

In [None]:
%%waveform test.vcd

op_dict = [{'110011':'ALU'}, {'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.opcoderv,r[0]', 'top.CPU.rs1','top.CPU.rs2', 'top.CPU.rd','top.CPU.data1', 'top.CPU.data2' , 'top.CPU.func3', 'top.CPU.func7,b' ]
time_begin = 0
time_end = 40
base = 'dec' # bin, dec, dec2, hex, octal


### Execute

#### Add e Sub

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.func3', 'top.CPU.func7,b', 'top.CPU.aluctl,r[0]' , 'top.CPU.fw_data1_s3', 'top.CPU.alusrc_data2','top.CPU.alurslt']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


#### Forward

In [None]:
%%writefile simple.s
add x2,x3,x4
sub x3,x5,x1
add x7,x2,x3

In [None]:
!rm -f -r simple/*
def assemble():
  cnv = AssemblyConverter(output_type = "t") #just text
  cnv.convert("simple.s")
  # Using readlines()
  file1 = open('simple/txt/simple.txt', 'r')
  Lines = file1.readlines()
  file1 = open('im_data.txt', 'w')


  for line in Lines:
    decimal_representation = int(line, 2)
    hexadecimal_string = hex(decimal_representation)
    file1.write(hexadecimal_string[2:]+'\n')

  file1.close()

assemble()

In [None]:
%%verilog 
`include "main.v"

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.func3', 'top.CPU.func7,b', 'top.CPU.aluctl,r[0]' , 'top.CPU.fw_data1_s3', 'top.CPU.alusrc_data2','top.CPU.alurslt']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


#### Addi

In [None]:
%%writefile simple.s
add x2,x3,x4  
sub x3,x5,x1
add x7,x2,x3
addi x2,x0,5
addi x3,x1,-2

In [None]:
!rm -f -r simple/*
assemble()

In [None]:
!cat simple/txt/simple.txt

In [None]:
%%verilog 
`include "main.v"

##### Decode

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'110011':'ALU','10011':'addi'},{'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.opcoderv,r[1]' , 'top.CPU.data1', 'top.CPU.data2','top.CPU.ImmGen', 'top.CPU.alurslt']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


##### Execute


In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.func3', 'top.CPU.func7,b', 'top.CPU.aluctl,r[0]' , 'top.CPU.fw_data1_s3', 'top.CPU.alusrc_data2','top.CPU.alurslt']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


##### Segundo Exemplo

In [None]:
%%writefile simple.s
addi x2,x0,5
addi x3,x0,-2
add x5,x3,x2  

In [None]:
!rm -f -r simple/*
assemble()

In [None]:
!cat simple/txt/simple.txt

In [None]:
%%verilog 
`include "main.v"

In [None]:
# Final Register file
!cat reg.data

#### Load and Store

##### Código de Teste para Load

* x2 = mem[x4+0] = mem[4+0/4] = mem[palavra 1] = 1
* x3 = mem[x4+12] = mem[4+12/4] = mem[palavra 4] = 4
* x1 = mem[x4+-4] = mem[4-4/4] = mem[palavra 0] = 0

Registro
```
0
0
1
4
4
5
6
....
```


In [None]:
%%writefile simple.s
lw x2,0(x4)
lw x3,12(x4)
lw x1,-4(x4)

In [None]:
!rm -f -r simple/*
assemble()

In [None]:
%%verilog 
`include "main.v"

Resultado esperado para o Banco de Registradores
```
0
0
1
4
4
5
6
....
```

In [None]:
# Final Register file
!cat reg.data

##### Segundo Código de Teste

* lw x1,4(x8)
* add x2,x2,x2
* add x3,x2,x1

```
x1 = mem[4+8/4] = 3
x2 = 2+2 = 4
x3 = 4 + 3 = 7
```

Resultado final esperado para Registradores
```
0
3
4
7
5
6
....
```



In [None]:
%%writefile simple.s
lw x1,4(x8)
add x2,x2,x2
add x3,x2,x1

In [None]:
!rm -f -r simple/*
assemble()

In [None]:
!cat im_data.txt

In [None]:
%%verilog 
`include "main.v"

In [None]:
# Final Register file
!cat reg.data

##### Terceiro Teste do Lw e unidade de Hazard

In [None]:
%%writefile simple.s
add x2,x2,x2
lw x1,4(x8)
add x3,x2,x1

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'110011':'ALU','10011':'addi','11':'Lw'},{'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.opcoderv,r[1]' , 'top.CPU.rs1', 'top.CPU.rs2','top.CPU.ImmGen', 'top.CPU.rd_s3']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


Resultado final esperado para Registradores
```
0
3
4
7
5
6
....
```

In [None]:
# Final Register file
!cat reg.data

#### Branches

[Arquivo com os binários dos 2 exemplos](https://raw.githubusercontent.com/arduinoufv/inf251/main/riscv/exemplos_para_desvio.txt)

##### Código de Teste


In [None]:
%%writefile simple.s
beq x19,x10,8
add x18,x18,x10
addi x19,x19,-1
beq x0,x0,-6
add x1,x1,x1

In [None]:
!rm -f -r simple/*
assemble()

In [None]:
!cat simple/txt/simple.txt

In [None]:
!cat im_data.txt

In [None]:
%%verilog 
`include "main.v"

###### Decode

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'110011':'ALU','10011':'addi','11':'Lw', '1100011':'Beq'},{'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter', 'top.CPU.pc', 'top.CPU.opcoderv,r[1]' , 'top.CPU.rs1', 'top.CPU.rs2','top.CPU.ImmGen', 'top.CPU.rd_s3', 'top.CPU.pc4_s2', 'top.CPU.baddr_s2']
time_begin = 0
time_end = 40
base = 'dec' # bin, dec, dec2, hex, octal

##### Segundo Teste BEQ
* 0: nop
* 4: addi x1,x1,1
* 8: beq  x1,x3,4
* 12: beq x0,x0,-4
* 16: sub x1,x1,x4

In [None]:
%%writefile simple.s
nop
addi x1,x1,1
beq x1,x3,4
beq x0,x0,-4
sub x1,x1,x4

In [None]:
!cat simple/txt/simple.txt

In [None]:
!cat im_data.txt

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'110011':'ALU','10011':'addi','11':'Lw', '1100011':'Beq'},{'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter', 'top.CPU.pc', 'top.CPU.opcoderv,r[1]' , 'top.CPU.rs1', 'top.CPU.rs2','top.CPU.ImmGen', 'top.CPU.rd_s3', 'top.CPU.pc4_s2', 'top.CPU.baddr_s2']
time_begin = 0
time_end = 40
base = 'dec' # bin, dec, dec2, hex, octal

###### Explicacao
```
                 4  8  12 16 20 24 4 8 12 16 20 16 20
0  nop           2  3  4  5  6  7  8 9 0  1  2  3  4
4  addi x1,x1,1  F  D  E  M  W     F D E  M  W           
8  beq x1,x3,4      F  D  E  M  W    F D  E  M  W
12 beq x0,x0,-4        F  D  E  M  W   F  D  E 
16 sub x1,x1,x4           F  D  E         F  D  F  D  E  M W
20                           F  D            F     F 
24                              F
```


In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.pc']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


In [None]:
# Final Register file
!cat reg.data

### Memory Stage

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.alurslt_s4','top.CPU.wrreg_s4']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


### Writback Stage

In [None]:
%%waveform test.vcd

op_dict = [{'10':'add','110':'sub'}, {'10':'sp','111':'t2','1001':'s1', '1000':'s0','110':'t1','100':'tp','1':'ra','11':'gp', '101':'t0'} ]

sign_list = ['top.CPU.clock_counter','top.CPU.alurslt_s5','top.CPU.wrreg_s5']
time_begin = 0
time_end = 40
base = 'dec2' # bin, dec, dec2, hex, octal


In [None]:
# Final Register file
!cat reg.data

In [None]:
# Final Data Memory
!cat mem.data 

# Testes

### Problema 1:  ANDI -Bitwise or immediate

In [20]:
%%writefile simple.s
addi x1,x0,7
andi x2,x1,8

Overwriting simple.s


In [21]:
!rm -f -r simple/*
assemble()

------Writing to Text file------
Output file: simple.txt
Number of instructions: 2


In [22]:
!cat simple/txt/simple.txt

00000000011100000000000010010011
00000000100000001111000100010011


In [23]:
%%verilog 
`include "main.v"

Installing dependencies. Please wait... done!
VCD info: dumpfile test.vcd opened for output.



Operação andi
```
x1 = 0111
8 =  1000
---------
x2 = 0000
```

In [24]:
# Final Register File
!cat reg.data

// 0x00000000
00000000
00000007
00000000
00000003
00000004
00000005
00000006
00000007
00000008
00000009
0000000a
0000000b
0000000c
0000000d
0000000e
0000000f


### Problema 2:  SRLI -Shift Right Logical Immediate

In [25]:
%%writefile simple.s
addi x1,x0,8
slri x2,x1,1

Overwriting simple.s


In [26]:
!rm -f -r simple/*
assemble()

------Writing to Text file------
Output file: simple.txt
Number of instructions: 2


In [27]:
!cat simple/txt/simple.txt

00000000100000000000000010010011
00000000000100001101000100010011


In [28]:
%%verilog 
`include "main.v"

VCD info: dumpfile test.vcd opened for output.



resultado esperado
```
x1 = 8
x2 = 4
```

In [29]:
# Final Register File
!cat reg.data

// 0x00000000
00000000
00000008
00000004
00000003
00000004
00000005
00000006
00000007
00000008
00000009
0000000a
0000000b
0000000c
0000000d
0000000e
0000000f


### Problema 3:  J - Jump



In [30]:
%%writefile simple.s
addi x1,x0,5
addi x2,x0,0
j loop
addi x1,x0,0
addi x2,x0,5
loop:
addi x2,x2,9

Overwriting simple.s


In [31]:
!rm -f -r simple/*
assemble()

------Writing to Text file------
Output file: simple.txt
Number of instructions: 6


In [32]:
!cat simple/txt/simple.txt

00000000010100000000000010010011
00000000000000000000000100010011
00000000000000000110000001101111
00000000000000000000000010010011
00000000010100000000000100010011
00000000100100010000000100010011


In [33]:
%%verilog 
`include "main.v"

VCD info: dumpfile test.vcd opened for output.



resultado esperado 
```
x1 = 5
x2 = 9
```

In [34]:
# Final Register File
!cat reg.data

// 0x00000000
00000000
00000005
00000009
00000003
00000004
00000005
00000006
00000007
00000008
00000009
0000000a
0000000b
0000000c
0000000d
0000000e
0000000f


### Problema 4:  BGT -Branch on Greater Than

In [35]:
%%writefile simple.s
addi x1,x0,5
addi x2,x0,0
bgt x1,x2,loop
addi x1,x0,0
addi x2,x0,5
loop:
addi x2,x2,8

Overwriting simple.s


In [36]:
!rm -f -r simple/*
assemble()

------Writing to Text file------
Output file: simple.txt
Number of instructions: 6


In [37]:
!cat simple/txt/simple.txt

00000000010100000000000010010011
00000000000000000000000100010011
00000000000100010100011001100011
00000000000000000000000010010011
00000000010100000000000100010011
00000000100000010000000100010011


In [38]:
%%verilog 
`include "main.v"

VCD info: dumpfile test.vcd opened for output.



resultado esperado
```
x1 = 5
x2 = 8
```

In [39]:
# Final Register File
!cat reg.data

// 0x00000000
00000000
00000005
00000008
00000003
00000004
00000005
00000006
00000007
00000008
00000009
0000000a
0000000b
0000000c
0000000d
0000000e
0000000f
