### Chapter 20: Design Examples

This chapter provides comprehensive examples of real-world digital design implementations using SystemVerilog. Each example demonstrates best practices and advanced SystemVerilog features in practical applications.

#### Combinational Logic Designs

##### Arithmetic Logic Unit (ALU)

```systemverilog
// 8-bit ALU with multiple operations
module alu_8bit #(
    parameter WIDTH = 8
)(
    input logic [WIDTH-1:0] a, b,
    input logic [3:0] op_sel,
    input logic cin,
    output logic [WIDTH-1:0] result,
    output logic cout, zero, negative, overflow
);

    logic [WIDTH:0] temp_result;
    
    always_comb begin
        temp_result = '0;
        cout = 1'b0;
        
        case (op_sel)
            4'b0000: temp_result = a + b;                       // ADD
            4'b0001: temp_result = a + b + cin;                 // ADC (Add with carry)
            4'b0010: temp_result = a - b;                       // SUB
            4'b0011: temp_result = a - b - cin;                 // SBC (Sub with borrow)
            4'b0100: temp_result = a & b;                       // AND
            4'b0101: temp_result = a | b;                       // OR
            4'b0110: temp_result = a ^ b;                       // XOR
            4'b0111: temp_result = ~a;                          // NOT
            4'b1000: temp_result = a << 1;                      // Shift left
            4'b1001: temp_result = a >> 1;                      // Shift right
            4'b1010: temp_result = $signed(a) >>> 1;            // Arithmetic shift right
            4'b1011: temp_result = {a[WIDTH-2:0], a[WIDTH-1]};  // Rotate left
            4'b1100: temp_result = {a[0], a[WIDTH-1:1]};        // Rotate right
            4'b1101: temp_result = (a < b) ? 1 : 0;             // Compare less than
            4'b1110: temp_result = (a == b) ? 1 : 0;            // Compare equal
            4'b1111: temp_result = a;                           // Pass through A
            default: temp_result = '0;
        endcase
        
        result = temp_result[WIDTH-1:0];
        cout = temp_result[WIDTH];
        zero = (result == '0);
        negative = result[WIDTH-1];
        
        // Overflow detection for addition/subtraction
        if (op_sel == 4'b0000 || op_sel == 4'b0001) // ADD operations
            overflow = (a[WIDTH-1] == b[WIDTH-1]) && (result[WIDTH-1] != a[WIDTH-1]);
        else if (op_sel == 4'b0010 || op_sel == 4'b0011) // SUB operations
            overflow = (a[WIDTH-1] != b[WIDTH-1]) && (result[WIDTH-1] != a[WIDTH-1]);
        else
            overflow = 1'b0;
    end

endmodule
```

##### Priority Encoder

```systemverilog
// Parameterized priority encoder
module priority_encoder #(
    parameter WIDTH = 8,
    parameter OUT_WIDTH = $clog2(WIDTH)
)(
    input logic [WIDTH-1:0] data_in,
    output logic [OUT_WIDTH-1:0] encoded_out,
    output logic valid
);

    always_comb begin
        encoded_out = '0;
        valid = 1'b0;
        
        // Priority encoding - highest bit has priority
        for (int i = WIDTH-1; i >= 0; i--) begin
            if (data_in[i]) begin
                encoded_out = i;
                valid = 1'b1;
                break;
            end
        end
    end

endmodule
```

##### Barrel Shifter

```systemverilog
// Configurable barrel shifter
module barrel_shifter #(
    parameter WIDTH = 8,
    parameter SHIFT_WIDTH = $clog2(WIDTH)
)(
    input logic [WIDTH-1:0] data_in,
    input logic [SHIFT_WIDTH-1:0] shift_amount,
    input logic shift_left,
    input logic arithmetic,
    output logic [WIDTH-1:0] data_out
);

    always_comb begin
        if (shift_left) begin
            data_out = data_in << shift_amount;
        end else begin
            if (arithmetic) begin
                data_out = $signed(data_in) >>> shift_amount;
            end else begin
                data_out = data_in >> shift_amount;
            end
        end
    end

endmodule
```

#### Sequential Logic (Counters, State Machines)

##### Universal Counter

```systemverilog
// Configurable up/down counter with load, enable, and terminal count
module universal_counter #(
    parameter WIDTH = 8,
    parameter RESET_VALUE = 0
)(
    input logic clk,
    input logic rst_n,
    input logic enable,
    input logic load,
    input logic up_down,  // 1 = up, 0 = down
    input logic [WIDTH-1:0] load_data,
    input logic [WIDTH-1:0] terminal_count,
    output logic [WIDTH-1:0] count,
    output logic tc_reached
);

    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            count <= RESET_VALUE;
        end else if (load) begin
            count <= load_data;
        end else if (enable) begin
            if (up_down) begin
                count <= count + 1;
            end else begin
                count <= count - 1;
            end
        end
    end
    
    assign tc_reached = (count == terminal_count);

endmodule
```

##### Advanced State Machine - UART Transmitter

```systemverilog
// UART Transmitter with configurable baud rate
module uart_transmitter #(
    parameter CLOCK_FREQ = 50_000_000,
    parameter BAUD_RATE = 115200,
    parameter DATA_BITS = 8
)(
    input logic clk,
    input logic rst_n,
    input logic [DATA_BITS-1:0] tx_data,
    input logic tx_start,
    output logic tx_ready,
    output logic tx_out
);

    localparam BAUD_TICK = CLOCK_FREQ / BAUD_RATE;
    localparam BAUD_WIDTH = $clog2(BAUD_TICK);
    
    typedef enum logic [2:0] {
        IDLE,
        START_BIT,
        DATA_BITS_STATE,
        PARITY_BIT,
        STOP_BIT
    } uart_state_t;
    
    uart_state_t current_state, next_state;
    
    logic [BAUD_WIDTH-1:0] baud_counter;
    logic [3:0] bit_counter;
    logic [DATA_BITS-1:0] shift_reg;
    logic baud_tick;
    logic parity_bit;
    
    // Baud rate generator
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            baud_counter <= '0;
        end else if (current_state == IDLE) begin
            baud_counter <= '0;
        end else if (baud_counter == BAUD_TICK - 1) begin
            baud_counter <= '0;
        end else begin
            baud_counter <= baud_counter + 1;
        end
    end
    
    assign baud_tick = (baud_counter == BAUD_TICK - 1);
    
    // State register
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            current_state <= IDLE;
        end else begin
            current_state <= next_state;
        end
    end
    
    // Next state logic
    always_comb begin
        next_state = current_state;
        
        case (current_state)
            IDLE: begin
                if (tx_start) begin
                    next_state = START_BIT;
                end
            end
            
            START_BIT: begin
                if (baud_tick) begin
                    next_state = DATA_BITS_STATE;
                end
            end
            
            DATA_BITS_STATE: begin
                if (baud_tick && (bit_counter == DATA_BITS - 1)) begin
                    next_state = PARITY_BIT;
                end
            end
            
            PARITY_BIT: begin
                if (baud_tick) begin
                    next_state = STOP_BIT;
                end
            end
            
            STOP_BIT: begin
                if (baud_tick) begin
                    next_state = IDLE;
                end
            end
        endcase
    end
    
    // Data path
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            shift_reg <= '0;
            bit_counter <= '0;
            parity_bit <= '0;
        end else begin
            case (current_state)
                IDLE: begin
                    if (tx_start) begin
                        shift_reg <= tx_data;
                        parity_bit <= ^tx_data; // Even parity
                        bit_counter <= '0;
                    end
                end
                
                DATA_BITS_STATE: begin
                    if (baud_tick) begin
                        shift_reg <= {1'b0, shift_reg[DATA_BITS-1:1]};
                        bit_counter <= bit_counter + 1;
                    end
                end
            endcase
        end
    end
    
    // Output logic
    always_comb begin
        case (current_state)
            IDLE: tx_out = 1'b1;
            START_BIT: tx_out = 1'b0;
            DATA_BITS_STATE: tx_out = shift_reg[0];
            PARITY_BIT: tx_out = parity_bit;
            STOP_BIT: tx_out = 1'b1;
            default: tx_out = 1'b1;
        endcase
    end
    
    assign tx_ready = (current_state == IDLE);

endmodule
```

#### Memory Models

##### Dual-Port RAM

```systemverilog
// True dual-port RAM with byte enables
module dual_port_ram #(
    parameter ADDR_WIDTH = 10,
    parameter DATA_WIDTH = 32,
    parameter BYTE_WIDTH = 8,
    parameter NUM_BYTES = DATA_WIDTH / BYTE_WIDTH
)(
    // Port A
    input logic clk_a,
    input logic [ADDR_WIDTH-1:0] addr_a,
    input logic [DATA_WIDTH-1:0] data_in_a,
    input logic [NUM_BYTES-1:0] byte_en_a,
    input logic we_a,
    input logic en_a,
    output logic [DATA_WIDTH-1:0] data_out_a,
    
    // Port B
    input logic clk_b,
    input logic [ADDR_WIDTH-1:0] addr_b,
    input logic [DATA_WIDTH-1:0] data_in_b,
    input logic [NUM_BYTES-1:0] byte_en_b,
    input logic we_b,
    input logic en_b,
    output logic [DATA_WIDTH-1:0] data_out_b
);

    localparam MEM_DEPTH = 2**ADDR_WIDTH;
    
    logic [DATA_WIDTH-1:0] memory [MEM_DEPTH];
    
    // Port A operations
    always_ff @(posedge clk_a) begin
        if (en_a) begin
            if (we_a) begin
                for (int i = 0; i < NUM_BYTES; i++) begin
                    if (byte_en_a[i]) begin
                        memory[addr_a][i*BYTE_WIDTH +: BYTE_WIDTH] <= 
                            data_in_a[i*BYTE_WIDTH +: BYTE_WIDTH];
                    end
                end
            end
            data_out_a <= memory[addr_a];
        end
    end
    
    // Port B operations
    always_ff @(posedge clk_b) begin
        if (en_b) begin
            if (we_b) begin
                for (int i = 0; i < NUM_BYTES; i++) begin
                    if (byte_en_b[i]) begin
                        memory[addr_b][i*BYTE_WIDTH +: BYTE_WIDTH] <= 
                            data_in_b[i*BYTE_WIDTH +: BYTE_WIDTH];
                    end
                end
            end
            data_out_b <= memory[addr_b];
        end
    end

endmodule
```

##### FIFO Buffer

```systemverilog
// Synchronous FIFO with configurable depth and width
module sync_fifo #(
    parameter DATA_WIDTH = 32,
    parameter FIFO_DEPTH = 16,
    parameter ADDR_WIDTH = $clog2(FIFO_DEPTH)
)(
    input logic clk,
    input logic rst_n,
    
    // Write interface
    input logic wr_en,
    input logic [DATA_WIDTH-1:0] wr_data,
    output logic full,
    output logic almost_full,
    
    // Read interface
    input logic rd_en,
    output logic [DATA_WIDTH-1:0] rd_data,
    output logic empty,
    output logic almost_empty,
    
    // Status
    output logic [ADDR_WIDTH:0] data_count
);

    logic [DATA_WIDTH-1:0] memory [FIFO_DEPTH];
    logic [ADDR_WIDTH:0] wr_ptr, rd_ptr;
    logic [ADDR_WIDTH:0] count;
    
    // Pointer management
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            wr_ptr <= '0;
        end else if (wr_en && !full) begin
            wr_ptr <= (wr_ptr == FIFO_DEPTH - 1) ? '0 : wr_ptr + 1;
        end
    end
    
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            rd_ptr <= '0;
        end else if (rd_en && !empty) begin
            rd_ptr <= (rd_ptr == FIFO_DEPTH - 1) ? '0 : rd_ptr + 1;
        end
    end
    
    // Data count
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            count <= '0;
        end else begin
            case ({wr_en && !full, rd_en && !empty})
                2'b10: count <= count + 1;  // Write only
                2'b01: count <= count - 1;  // Read only
                default: count <= count;    // Both or neither
            endcase
        end
    end
    
    // Memory operations
    always_ff @(posedge clk) begin
        if (wr_en && !full) begin
            memory[wr_ptr[ADDR_WIDTH-1:0]] <= wr_data;
        end
    end
    
    assign rd_data = memory[rd_ptr[ADDR_WIDTH-1:0]];
    
    // Status flags
    assign empty = (count == '0);
    assign full = (count == FIFO_DEPTH);
    assign almost_empty = (count <= 1);
    assign almost_full = (count >= FIFO_DEPTH - 1);
    assign data_count = count;

endmodule
```

#### Bus Protocols

##### AXI4-Lite Master Interface

```systemverilog
// AXI4-Lite Master Interface
module axi4_lite_master #(
    parameter ADDR_WIDTH = 32,
    parameter DATA_WIDTH = 32
)(
    input logic aclk,
    input logic aresetn,
    
    // User interface
    input logic req_valid,
    input logic req_write,
    input logic [ADDR_WIDTH-1:0] req_addr,
    input logic [DATA_WIDTH-1:0] req_wdata,
    input logic [DATA_WIDTH/8-1:0] req_wstrb,
    output logic req_ready,
    output logic resp_valid,
    output logic [DATA_WIDTH-1:0] resp_rdata,
    output logic [1:0] resp_status,
    input logic resp_ready,
    
    // AXI4-Lite Master Interface
    // Write Address Channel
    output logic [ADDR_WIDTH-1:0] m_axi_awaddr,
    output logic [2:0] m_axi_awprot,
    output logic m_axi_awvalid,
    input logic m_axi_awready,
    
    // Write Data Channel
    output logic [DATA_WIDTH-1:0] m_axi_wdata,
    output logic [DATA_WIDTH/8-1:0] m_axi_wstrb,
    output logic m_axi_wvalid,
    input logic m_axi_wready,
    
    // Write Response Channel
    input logic [1:0] m_axi_bresp,
    input logic m_axi_bvalid,
    output logic m_axi_bready,
    
    // Read Address Channel
    output logic [ADDR_WIDTH-1:0] m_axi_araddr,
    output logic [2:0] m_axi_arprot,
    output logic m_axi_arvalid,
    input logic m_axi_arready,
    
    // Read Data Channel
    input logic [DATA_WIDTH-1:0] m_axi_rdata,
    input logic [1:0] m_axi_rresp,
    input logic m_axi_rvalid,
    output logic m_axi_rready
);

    typedef enum logic [2:0] {
        IDLE,
        WRITE_ADDR,
        WRITE_DATA,
        WRITE_RESP,
        READ_ADDR,
        READ_DATA
    } axi_state_t;
    
    axi_state_t current_state, next_state;
    
    logic [ADDR_WIDTH-1:0] addr_reg;
    logic [DATA_WIDTH-1:0] wdata_reg;
    logic [DATA_WIDTH/8-1:0] wstrb_reg;
    logic write_req;
    
    // State machine
    always_ff @(posedge aclk or negedge aresetn) begin
        if (!aresetn) begin
            current_state <= IDLE;
        end else begin
            current_state <= next_state;
        end
    end
    
    // Next state logic
    always_comb begin
        next_state = current_state;
        
        case (current_state)
            IDLE: begin
                if (req_valid && req_ready) begin
                    if (req_write) begin
                        next_state = WRITE_ADDR;
                    end else begin
                        next_state = READ_ADDR;
                    end
                end
            end
            
            WRITE_ADDR: begin
                if (m_axi_awvalid && m_axi_awready) begin
                    next_state = WRITE_DATA;
                end
            end
            
            WRITE_DATA: begin
                if (m_axi_wvalid && m_axi_wready) begin
                    next_state = WRITE_RESP;
                end
            end
            
            WRITE_RESP: begin
                if (m_axi_bvalid && m_axi_bready) begin
                    next_state = IDLE;
                end
            end
            
            READ_ADDR: begin
                if (m_axi_arvalid && m_axi_arready) begin
                    next_state = READ_DATA;
                end
            end
            
            READ_DATA: begin
                if (m_axi_rvalid && m_axi_rready) begin
                    next_state = IDLE;
                end
            end
        endcase
    end
    
    // Register request
    always_ff @(posedge aclk or negedge aresetn) begin
        if (!aresetn) begin
            addr_reg <= '0;
            wdata_reg <= '0;
            wstrb_reg <= '0;
            write_req <= 1'b0;
        end else if (req_valid && req_ready) begin
            addr_reg <= req_addr;
            wdata_reg <= req_wdata;
            wstrb_reg <= req_wstrb;
            write_req <= req_write;
        end
    end
    
    // AXI signal assignments
    assign m_axi_awaddr = addr_reg;
    assign m_axi_awprot = 3'b000;
    assign m_axi_awvalid = (current_state == WRITE_ADDR);
    
    assign m_axi_wdata = wdata_reg;
    assign m_axi_wstrb = wstrb_reg;
    assign m_axi_wvalid = (current_state == WRITE_DATA);
    
    assign m_axi_bready = (current_state == WRITE_RESP) && resp_ready;
    
    assign m_axi_araddr = addr_reg;
    assign m_axi_arprot = 3'b000;
    assign m_axi_arvalid = (current_state == READ_ADDR);
    
    assign m_axi_rready = (current_state == READ_DATA) && resp_ready;
    
    // User interface
    assign req_ready = (current_state == IDLE);
    assign resp_valid = (current_state == WRITE_RESP && m_axi_bvalid) ||
                       (current_state == READ_DATA && m_axi_rvalid);
    assign resp_rdata = m_axi_rdata;
    assign resp_status = write_req ? m_axi_bresp : m_axi_rresp;

endmodule
```

#### Processor Components

##### Simple RISC-V CPU Core

```systemverilog
// Simplified RISC-V RV32I CPU Core
module riscv_core #(
    parameter ADDR_WIDTH = 32,
    parameter DATA_WIDTH = 32
)(
    input logic clk,
    input logic rst_n,
    
    // Instruction Memory Interface
    output logic [ADDR_WIDTH-1:0] imem_addr,
    input logic [DATA_WIDTH-1:0] imem_data,
    output logic imem_req,
    
    // Data Memory Interface
    output logic [ADDR_WIDTH-1:0] dmem_addr,
    output logic [DATA_WIDTH-1:0] dmem_wdata,
    input logic [DATA_WIDTH-1:0] dmem_rdata,
    output logic [3:0] dmem_be,
    output logic dmem_we,
    output logic dmem_req
);

    // Register file
    logic [DATA_WIDTH-1:0] registers [32];
    
    // Pipeline registers
    logic [ADDR_WIDTH-1:0] pc, pc_next;
    logic [DATA_WIDTH-1:0] instruction;
    logic [DATA_WIDTH-1:0] alu_result;
    logic [DATA_WIDTH-1:0] reg_data1, reg_data2;
    
    // Instruction decode
    logic [6:0] opcode;
    logic [4:0] rd, rs1, rs2;
    logic [2:0] funct3;
    logic [6:0] funct7;
    logic [DATA_WIDTH-1:0] immediate;
    
    // Control signals
    logic reg_write;
    logic [1:0] alu_op;
    logic alu_src;
    logic mem_read, mem_write;
    logic [1:0] mem_to_reg;
    logic branch, jump;
    logic [3:0] alu_control;
    
    // Instruction fetch
    assign imem_addr = pc;
    assign imem_req = 1'b1;
    assign instruction = imem_data;
    
    // Instruction decode
    assign opcode = instruction[6:0];
    assign rd = instruction[11:7];
    assign funct3 = instruction[14:12];
    assign rs1 = instruction[19:15];
    assign rs2 = instruction[24:20];
    assign funct7 = instruction[31:25];
    
    // Immediate generation
    always_comb begin
        case (opcode)
            7'b0010011, 7'b0000011: // I-type
                immediate = {{20{instruction[31]}}, instruction[31:20]};
            7'b0100011: // S-type
                immediate = {{20{instruction[31]}}, instruction[31:25], instruction[11:7]};
            7'b1100011: // B-type
                immediate = {{19{instruction[31]}}, instruction[31], instruction[7], 
                           instruction[30:25], instruction[11:8], 1'b0};
            7'b0110111, 7'b0010111: // U-type
                immediate = {instruction[31:12], 12'b0};
            7'b1101111: // J-type
                immediate = {{11{instruction[31]}}, instruction[31], instruction[19:12],
                           instruction[20], instruction[30:21], 1'b0};
            default:
                immediate = 32'b0;
        endcase
    end
    
    // Control unit
    always_comb begin
        // Default values
        reg_write = 1'b0;
        alu_op = 2'b00;
        alu_src = 1'b0;
        mem_read = 1'b0;
        mem_write = 1'b0;
        mem_to_reg = 2'b00;
        branch = 1'b0;
        jump = 1'b0;
        
        case (opcode)
            7'b0110011: begin // R-type
                reg_write = 1'b1;
                alu_op = 2'b10;
            end
            7'b0010011: begin // I-type ALU
                reg_write = 1'b1;
                alu_src = 1'b1;
                alu_op = 2'b10;
            end
            7'b0000011: begin // Load
                reg_write = 1'b1;
                alu_src = 1'b1;
                mem_read = 1'b1;
                mem_to_reg = 2'b01;
            end
            7'b0100011: begin // Store
                alu_src = 1'b1;
                mem_write = 1'b1;
            end
            7'b1100011: begin // Branch
                alu_op = 2'b01;
                branch = 1'b1;
            end
            7'b1101111: begin // JAL
                reg_write = 1'b1;
                mem_to_reg = 2'b10;
                jump = 1'b1;
            end
        endcase
    end
    
    // ALU control
    always_comb begin
        case (alu_op)
            2'b00: alu_control = 4'b0010; // ADD
            2'b01: alu_control = 4'b0110; // SUB (for branch)
            2'b10: begin
                case (funct3)
                    3'b000: alu_control = (funct7[5] && opcode[5]) ? 4'b0110 : 4'b0010; // SUB : ADD
                    3'b010: alu_control = 4'b0111; // SLT
                    3'b110: alu_control = 4'b0001; // OR
                    3'b111: alu_control = 4'b0000; // AND
                    default: alu_control = 4'b0010;
                endcase
            end
            default: alu_control = 4'b0010;
        endcase
    end
    
    // Register file read
    assign reg_data1 = (rs1 == 5'b0) ? 32'b0 : registers[rs1];
    assign reg_data2 = (rs2 == 5'b0) ? 32'b0 : registers[rs2];
    
    // ALU
    logic [DATA_WIDTH-1:0] alu_input2;
    logic alu_zero;
    
    assign alu_input2 = alu_src ? immediate : reg_data2;
    
    always_comb begin
        case (alu_control)
            4'b0000: alu_result = reg_data1 & alu_input2; // AND
            4'b0001: alu_result = reg_data1 | alu_input2; // OR
            4'b0010: alu_result = reg_data1 + alu_input2; // ADD
            4'b0110: alu_result = reg_data1 - alu_input2; // SUB
            4'b0111: alu_result = ($signed(reg_data1) < $signed(alu_input2)) ? 1 : 0; // SLT
            default: alu_result = 32'b0;
        endcase
    end
    
    assign alu_zero = (alu_result == 32'b0);
    
    // Data memory interface
    assign dmem_addr = alu_result;
    assign dmem_wdata = reg_data2;
    assign dmem_be = 4'b1111; // Word access
    assign dmem_we = mem_write;
    assign dmem_req = mem_read || mem_write;
    
    // Write back
    logic [DATA_WIDTH-1:0] write_data;
    
    always_comb begin
        case (mem_to_reg)
            2'b00: write_data = alu_result;
            2'b01: write_data = dmem_rdata;
            2'b10: write_data = pc + 4; // For JAL
            default: write_data = alu_result;
        endcase
    end
    
    // Register file write
    always_ff @(posedge clk) begin
        if (reg_write && (rd != 5'b0)) begin
            registers[rd] <= write_data;
        end
    end
    
    // PC update
    logic pc_src;
    assign pc_src = (branch && alu_zero) || jump;
    
    always_comb begin
        if (pc_src) begin
            pc_next = pc + immediate;
        end else begin
            pc_next = pc + 4;
        end
    end
    
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            pc <= 32'h0000_0000;
        end else begin
            pc <= pc_next;
        end
    end

endmodule
```

##### Cache Memory Controller

```systemverilog
// Direct-mapped cache controller
module cache_controller #(
    parameter CACHE_SIZE = 1024,     // Cache size in bytes
    parameter BLOCK_SIZE = 64,       // Block size in bytes
    parameter ADDR_WIDTH = 32,
    parameter DATA_WIDTH = 32
)(
    input logic clk,
    input logic rst_n,
    
    // Processor interface
    input logic [ADDR_WIDTH-1:0] cpu_addr,
    input logic [DATA_WIDTH-1:0] cpu_wdata,
    output logic [DATA_WIDTH-1:0] cpu_rdata,
    input logic cpu_req,
    input logic cpu_we,
    output logic cpu_ready,
    
    // Memory interface
    output logic [ADDR_WIDTH-1:0] mem_addr,
    output logic [DATA_WIDTH-1:0] mem_wdata,
    input logic [DATA_WIDTH-1:0] mem_rdata,
    output logic mem_req,
    output logic mem_we,
    input logic mem_ready
);

    localparam NUM_BLOCKS = CACHE_SIZE / BLOCK_SIZE;
    localparam WORDS_PER_BLOCK = BLOCK_SIZE / (DATA_WIDTH / 8);
    localparam INDEX_WIDTH = $clog2(NUM_BLOCKS);
    localparam OFFSET_WIDTH = $clog2(WORDS_PER_BLOCK);
    localparam TAG_WIDTH = ADDR_WIDTH - INDEX_WIDTH - OFFSET_WIDTH - 2;
    
    // Cache memory arrays
    logic [DATA_WIDTH-1:0] cache_data [NUM_BLOCKS][WORDS_PER_BLOCK];
    logic [TAG_WIDTH-1:0] cache_tags [NUM_BLOCKS];
    logic cache_valid [NUM_BLOCKS];
    logic cache_dirty [NUM_BLOCKS];
    
    // Address breakdown
    logic [TAG_WIDTH-1:0] addr_tag;
    logic [INDEX_WIDTH-1:0] addr_index;
    logic [OFFSET_WIDTH-1:0] addr_offset;
    
    assign {addr_tag, addr_index, addr_offset} = cpu_addr[ADDR_WIDTH-1:2];
    
    // Cache state machine
    typedef enum logic [2:0] {
        IDLE,
        COMPARE_TAG,
        ALLOCATE,
        WRITEBACK,
        REFILL
    } cache_state_t;
    
    cache_state_t current_state, next_state;
    
    // Cache lookup
    logic hit, miss;
    logic [OFFSET_WIDTH-1:0] refill_counter;
    logic [ADDR_WIDTH-1:0] refill_addr;
    
    assign hit = cache_valid[addr_index] && 
                (cache_tags[addr_index] == addr_tag);
    assign miss = !hit;
    
    // State machine
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            current_state <= IDLE;
        end else begin
            current_state <= next_state;
        end
    end
    
    always_comb begin
        next_state = current_state;
        
        case (current_state)
            IDLE: begin
                if (cpu_req) begin
                    next_state = COMPARE_TAG;
                end
            end
            
            COMPARE_TAG: begin
                if (hit) begin
                    next_state = IDLE;
                end else begin
                    if (cache_valid[addr_index] && cache_dirty[addr_index]) begin
                        next_state = WRITEBACK;
                    end else begin
                        next_state = ALLOCATE;
                    end
                end
            end
            
            WRITEBACK: begin
                if (mem_ready && (refill_counter == WORDS_PER_BLOCK - 1)) begin
                    next_state = ALLOCATE;
                end
            end
            
            ALLOCATE: begin
                next_state = REFILL;
            end
            
            REFILL: begin
                if (mem_ready && (refill_counter == WORDS_PER_BLOCK - 1)) begin
                    next_state = COMPARE_TAG;
                end
            end
        endcase
    end
    
    // Refill counter
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            refill_counter <= '0;
        end else begin
            case (current_state)
                WRITEBACK, REFILL: begin
                    if (mem_ready) begin
                        refill_counter <= refill_counter + 1;
                    end
                end
                default: begin
                    refill_counter <= '0;
                end
            endcase
        end
    end
    
    // Cache data management
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            for (int i = 0; i < NUM_BLOCKS; i++) begin
                cache_valid[i] <= 1'b0;
                cache_dirty[i] <= 1'b0;
                cache_tags[i] <= '0;
            end
        end else begin
            case (current_state)
                COMPARE_TAG: begin
                    if (hit && cpu_we) begin
                        cache_data[addr_index][addr_offset] <= cpu_wdata;
                        cache_dirty[addr_index] <= 1'b1;
                    end
                end
                
                REFILL: begin
                    if (mem_ready) begin
                        cache_data[addr_index][refill_counter] <= mem_rdata;
                        if (refill_counter == WORDS_PER_BLOCK - 1) begin
                            cache_valid[addr_index] <= 1'b1;
                            cache_dirty[addr_index] <= 1'b0;
                            cache_tags[addr_index] <= addr_tag;
                        end
                    end
                end
                
                ALLOCATE: begin
                    cache_valid[addr_index] <= 1'b0;
                end
            endcase
        end
    end
    
    // Memory interface
    always_comb begin
        mem_req = 1'b0;
        mem_we = 1'b0;
        mem_addr = '0;
        mem_wdata = '0;
        refill_addr = {addr_tag, addr_index, {OFFSET_WIDTH{1'b0}}, 2'b00};
        
        case (current_state)
            WRITEBACK: begin
                mem_req = 1'b1;
                mem_we = 1'b1;
                mem_addr = {cache_tags[addr_index], addr_index, 
                           refill_counter, 2'b00};
                mem_wdata = cache_data[addr_index][refill_counter];
            end
            
            REFILL: begin
                mem_req = 1'b1;
                mem_we = 1'b0;
                mem_addr = refill_addr + (refill_counter << 2);
            end
        endcase
    end
    
    // CPU interface
    assign cpu_rdata = cache_data[addr_index][addr_offset];
    assign cpu_ready = (current_state == COMPARE_TAG && hit) ||
                      (current_state == IDLE && !cpu_req);

endmodule
```

##### Pipeline Controller with Hazard Detection

```systemverilog
// 5-stage pipeline controller with hazard detection and forwarding
module pipeline_controller (
    input logic clk,
    input logic rst_n,
    
    // Instruction decode stage
    input logic [4:0] id_rs1, id_rs2, id_rd,
    input logic id_reg_write,
    input logic id_mem_read,
    
    // Execute stage
    input logic [4:0] ex_rs1, ex_rs2, ex_rd,
    input logic ex_reg_write,
    input logic ex_mem_read,
    
    // Memory stage
    input logic [4:0] mem_rd,
    input logic mem_reg_write,
    
    // Write-back stage
    input logic [4:0] wb_rd,
    input logic wb_reg_write,
    
    // Control outputs
    output logic pc_write,
    output logic if_id_write,
    output logic id_ex_flush,
    output logic [1:0] forward_a,
    output logic [1:0] forward_b,
    output logic stall
);

    // Hazard detection
    logic load_use_hazard;
    logic control_hazard;
    
    // Load-use hazard detection
    assign load_use_hazard = ex_mem_read && 
                           ((ex_rd == id_rs1) || (ex_rd == id_rs2)) &&
                           (ex_rd != 5'b0);
    
    // Data forwarding logic
    always_comb begin
        // Forward A (ALU input A)
        if (mem_reg_write && (mem_rd != 5'b0) && (mem_rd == ex_rs1)) begin
            forward_a = 2'b10; // Forward from MEM stage
        end else if (wb_reg_write && (wb_rd != 5'b0) && (wb_rd == ex_rs1)) begin
            forward_a = 2'b01; // Forward from WB stage
        end else begin
            forward_a = 2'b00; // No forwarding
        end
        
        // Forward B (ALU input B)
        if (mem_reg_write && (mem_rd != 5'b0) && (mem_rd == ex_rs2)) begin
            forward_b = 2'b10; // Forward from MEM stage
        end else if (wb_reg_write && (wb_rd != 5'b0) && (wb_rd == ex_rs2)) begin
            forward_b = 2'b01; // Forward from WB stage
        end else begin
            forward_b = 2'b00; // No forwarding
        end
    end
    
    // Stall logic
    assign stall = load_use_hazard;
    
    // Pipeline control
    assign pc_write = !stall;
    assign if_id_write = !stall;
    assign id_ex_flush = stall || control_hazard;

endmodule
```

#### Complete System Integration Example

##### Simple SoC (System on Chip)

```systemverilog
// Simple SoC integrating CPU, memory, and peripherals
module simple_soc #(
    parameter ADDR_WIDTH = 32,
    parameter DATA_WIDTH = 32
)(
    input logic clk,
    input logic rst_n,
    
    // GPIO
    input logic [15:0] gpio_in,
    output logic [15:0] gpio_out,
    
    // UART
    input logic uart_rx,
    output logic uart_tx,
    
    // External memory interface
    output logic [ADDR_WIDTH-1:0] ext_mem_addr,
    output logic [DATA_WIDTH-1:0] ext_mem_wdata,
    input logic [DATA_WIDTH-1:0] ext_mem_rdata,
    output logic [3:0] ext_mem_be,
    output logic ext_mem_we,
    output logic ext_mem_req,
    input logic ext_mem_ready
);

    // Memory map
    localparam IMEM_BASE = 32'h0000_0000;
    localparam IMEM_SIZE = 32'h0000_4000; // 16KB
    localparam DMEM_BASE = 32'h0001_0000;
    localparam DMEM_SIZE = 32'h0000_4000; // 16KB
    localparam GPIO_BASE = 32'h4000_0000;
    localparam UART_BASE = 32'h4000_1000;
    localparam EXT_MEM_BASE = 32'h8000_0000;
    
    // CPU signals
    logic [ADDR_WIDTH-1:0] cpu_imem_addr, cpu_dmem_addr;
    logic [DATA_WIDTH-1:0] cpu_imem_data, cpu_dmem_wdata, cpu_dmem_rdata;
    logic [3:0] cpu_dmem_be;
    logic cpu_dmem_we, cpu_imem_req, cpu_dmem_req;
    
    // Memory select signals
    logic sel_imem, sel_dmem, sel_gpio, sel_uart, sel_ext_mem;
    
    // Address decode
    always_comb begin
        sel_imem = (cpu_imem_addr >= IMEM_BASE) && 
                  (cpu_imem_addr < IMEM_BASE + IMEM_SIZE);
        sel_dmem = (cpu_dmem_addr >= DMEM_BASE) && 
                  (cpu_dmem_addr < DMEM_BASE + DMEM_SIZE);
        sel_gpio = (cpu_dmem_addr >= GPIO_BASE) && 
                  (cpu_dmem_addr < GPIO_BASE + 32'h1000);
        sel_uart = (cpu_dmem_addr >= UART_BASE) && 
                  (cpu_dmem_addr < UART_BASE + 32'h1000);
        sel_ext_mem = (cpu_dmem_addr >= EXT_MEM_BASE);
    end
    
    // CPU instantiation
    riscv_core cpu (
        .clk(clk),
        .rst_n(rst_n),
        .imem_addr(cpu_imem_addr),
        .imem_data(cpu_imem_data),
        .imem_req(cpu_imem_req),
        .dmem_addr(cpu_dmem_addr),
        .dmem_wdata(cpu_dmem_wdata),
        .dmem_rdata(cpu_dmem_rdata),
        .dmem_be(cpu_dmem_be),
        .dmem_we(cpu_dmem_we),
        .dmem_req(cpu_dmem_req)
    );
    
    // Instruction memory
    logic [DATA_WIDTH-1:0] imem_rdata;
    
    dual_port_ram #(
        .ADDR_WIDTH(12),
        .DATA_WIDTH(DATA_WIDTH)
    ) instruction_memory (
        .clk_a(clk),
        .addr_a(cpu_imem_addr[13:2]),
        .data_in_a('0),
        .byte_en_a('0),
        .we_a(1'b0),
        .en_a(sel_imem),
        .data_out_a(imem_rdata),
        .clk_b(clk),
        .addr_b('0),
        .data_in_b('0),
        .byte_en_b('0),
        .we_b(1'b0),
        .en_b(1'b0),
        .data_out_b()
    );
    
    // Data memory
    logic [DATA_WIDTH-1:0] dmem_rdata;
    
    dual_port_ram #(
        .ADDR_WIDTH(12),
        .DATA_WIDTH(DATA_WIDTH)
    ) data_memory (
        .clk_a(clk),
        .addr_a(cpu_dmem_addr[13:2]),
        .data_in_a(cpu_dmem_wdata),
        .byte_en_a(cpu_dmem_be),
        .we_a(cpu_dmem_we && sel_dmem),
        .en_a(sel_dmem),
        .data_out_a(dmem_rdata),
        .clk_b(clk),
        .addr_b('0),
        .data_in_b('0),
        .byte_en_b('0),
        .we_b(1'b0),
        .en_b(1'b0),
        .data_out_b()
    );
    
    // GPIO controller
    logic [DATA_WIDTH-1:0] gpio_rdata;
    
    gpio_controller gpio_ctrl (
        .clk(clk),
        .rst_n(rst_n),
        .addr(cpu_dmem_addr[3:2]),
        .wdata(cpu_dmem_wdata),
        .rdata(gpio_rdata),
        .we(cpu_dmem_we && sel_gpio),
        .en(sel_gpio),
        .gpio_in(gpio_in),
        .gpio_out(gpio_out)
    );
    
    // UART controller
    logic [DATA_WIDTH-1:0] uart_rdata;
    
    uart_controller uart_ctrl (
        .clk(clk),
        .rst_n(rst_n),
        .addr(cpu_dmem_addr[3:2]),
        .wdata(cpu_dmem_wdata),
        .rdata(uart_rdata),
        .we(cpu_dmem_we && sel_uart),
        .en(sel_uart),
        .uart_rx(uart_rx),
        .uart_tx(uart_tx)
    );
    
    // Data multiplexer
    always_comb begin
        if (sel_dmem) begin
            cpu_dmem_rdata = dmem_rdata;
        end else if (sel_gpio) begin
            cpu_dmem_rdata = gpio_rdata;
        end else if (sel_uart) begin
            cpu_dmem_rdata = uart_rdata;
        end else if (sel_ext_mem) begin
            cpu_dmem_rdata = ext_mem_rdata;
        end else begin
            cpu_dmem_rdata = '0;
        end
    end
    
    assign cpu_imem_data = sel_imem ? imem_rdata : '0;
    
    // External memory interface
    assign ext_mem_addr = cpu_dmem_addr;
    assign ext_mem_wdata = cpu_dmem_wdata;
    assign ext_mem_be = cpu_dmem_be;
    assign ext_mem_we = cpu_dmem_we && sel_ext_mem;
    assign ext_mem_req = cpu_dmem_req && sel_ext_mem;

endmodule
```

#### Best Practices and Design Guidelines

##### Design Principles

1. **Modularity**: Break complex designs into smaller, manageable modules
2. **Parameterization**: Use parameters to make designs configurable and reusable
3. **Clock Domain Crossing**: Properly handle signals crossing different clock domains
4. **Reset Strategy**: Implement consistent reset strategies across all modules
5. **Naming Conventions**: Use clear, consistent naming for signals and modules

##### Performance Optimization

```systemverilog
// Example of optimized design patterns

// 1. Pipeline critical paths
module optimized_multiplier #(
    parameter WIDTH = 32
)(
    input logic clk,
    input logic rst_n,
    input logic [WIDTH-1:0] a, b,
    input logic valid_in,
    output logic [2*WIDTH-1:0] product,
    output logic valid_out
);

    // Pipeline stages for large multiplier
    logic [WIDTH-1:0] a_reg1, b_reg1;
    logic [2*WIDTH-1:0] partial_product;
    logic valid_reg1, valid_reg2;
    
    // Stage 1: Register inputs
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            a_reg1 <= '0;
            b_reg1 <= '0;
            valid_reg1 <= 1'b0;
        end else begin
            a_reg1 <= a;
            b_reg1 <= b;
            valid_reg1 <= valid_in;
        end
    end
    
    // Stage 2: Multiplication
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            partial_product <= '0;
            valid_reg2 <= 1'b0;
        end else begin
            partial_product <= a_reg1 * b_reg1;
            valid_reg2 <= valid_reg1;
        end
    end
    
    // Stage 3: Output
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            product <= '0;
            valid_out <= 1'b0;
        end else begin
            product <= partial_product;
            valid_out <= valid_reg2;
        end
    end

endmodule

// 2. Resource sharing
module shared_alu #(
    parameter WIDTH = 32
)(
    input logic clk,
    input logic rst_n,
    input logic [WIDTH-1:0] operand_a, operand_b,
    input logic [1:0] operation,
    input logic enable,
    output logic [WIDTH-1:0] result
);

    // Shared ALU for multiple operations
    always_ff @(posedge clk or negedge rst_n) begin
        if (!rst_n) begin
            result <= '0;
        end else if (enable) begin
            case (operation)
                2'b00: result <= operand_a + operand_b;
                2'b01: result <= operand_a - operand_b;
                2'b10: result <= operand_a & operand_b;
                2'b11: result <= operand_a | operand_b;
            endcase
        end
    end

endmodule
```

##### Verification Considerations

When designing these components, consider:

1. **Testability**: Include test modes and observability points
2. **Assertions**: Add SystemVerilog assertions for critical properties
3. **Coverage**: Ensure all code paths and corner cases are testable
4. **Formal Verification**: Design with formal verification in mind

#### Summary

This chapter demonstrated practical SystemVerilog implementations across various domains:

- **Combinational Logic**: ALU, encoders, and barrel shifters showing advanced combinational design techniques
- **Sequential Logic**: Counters and state machines with real-world UART example
- **Memory Systems**: Dual-port RAM and FIFO implementations with proper handling of memory interfaces
- **Bus Protocols**: AXI4-Lite master showing industry-standard communication protocols
- **Processor Components**: RISC-V core, cache controller, and pipeline management demonstrating complex digital systems

Each example incorporates SystemVerilog best practices including parameterization, proper clocking, reset handling, and modular design. These examples serve as templates for developing robust, scalable digital systems in real-world applications.