From 3d4de8e838973ff9f553f2896cd46d5fd9c4c571 Mon Sep 17 00:00:00 2001 From: Davide Schiavone Date: Mon, 22 Jul 2024 16:03:40 +0200 Subject: [PATCH] adding ACK pwr gate in mem wrapper, fixing pwr manager APPs (#549) --- .github/workflows/sim-apps-job/test_apps.py | 2 +- .gitignore | 1 + Makefile | 1 + configs/example_interleaved.hjson | 4 +- configs/general.hjson | 4 +- core-v-mini-mcu.core | 7 + core-v-mini-mcu.dc.upf.tpl | 159 ++++++++ core-v-mini-mcu.upf.tpl | 6 +- hw/core-v-mini-mcu/core_v_mini_mcu.sv | 21 +- hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl | 16 +- hw/core-v-mini-mcu/memory_subsystem.sv.tpl | 2 + hw/fpga/sram_wrapper.sv.tpl | 4 + .../data/power_manager.hjson.tpl | 14 +- hw/simulation/simulation.vlt | 1 + hw/simulation/sram_wrapper.sv | 1 + hw/system/x_heep_system.sv.tpl | 9 +- scripts/synthesis/dc_shell/dc_script.tcl | 4 + sw/CMakeLists.txt | 4 +- sw/applications/example_power_manager/main.c | 5 +- .../example_spi_host_dma_power_gate/main.c | 341 ------------------ .../example_spidma_powergate/main.c | 192 ++++++++++ sw/device/bsp/w25q/w25q.c | 94 ++++- sw/device/bsp/w25q/w25q128jw.h | 9 + .../lib/drivers/power_manager/power_manager.c | 208 +---------- .../power_manager/power_manager_cpu_restore.S | 88 +++++ .../power_manager/power_manager_cpu_store.S | 88 +++++ sw/linker/link_flash_load.ld.tpl | 3 +- tb/testharness.sv | 6 +- 28 files changed, 709 insertions(+), 585 deletions(-) create mode 100644 core-v-mini-mcu.dc.upf.tpl delete mode 100644 sw/applications/example_spi_host_dma_power_gate/main.c create mode 100644 sw/applications/example_spidma_powergate/main.c create mode 100644 sw/device/lib/drivers/power_manager/power_manager_cpu_restore.S create mode 100644 sw/device/lib/drivers/power_manager/power_manager_cpu_store.S diff --git a/.github/workflows/sim-apps-job/test_apps.py b/.github/workflows/sim-apps-job/test_apps.py index 0d3cdfce7..0b4e1516b 100755 --- a/.github/workflows/sim-apps-job/test_apps.py +++ b/.github/workflows/sim-apps-job/test_apps.py @@ -31,7 +31,7 @@ class BColors: # Blacklist of apps to skip blacklist = [ "example_spi_read", - "example_spi_host_dma_power_gate", + "example_spidma_powergate", "example_spi_write", ] diff --git a/.gitignore b/.gitignore index 710c28c7f..822378eac 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,7 @@ run_verif_rtl_log.txt # ignore the following hw automatically generated files environment.yml core-v-mini-mcu.upf +core-v-mini-mcu.dc.upf tb/tb_util.svh hw/core-v-mini-mcu/include/core_v_mini_mcu_pkg.sv hw/core-v-mini-mcu/system_bus.sv diff --git a/Makefile b/Makefile index 20b36e2c4..905b2c6b3 100644 --- a/Makefile +++ b/Makefile @@ -113,6 +113,7 @@ mcu-gen: $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/device/lib/runtime --cpu $(CPU) --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --header-c sw/device/lib/runtime/core_v_mini_mcu.h.tpl $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/linker --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --linker_script sw/linker/link.ld.tpl $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir . --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --pkg-sv ./core-v-mini-mcu.upf.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir . --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --pkg-sv ./core-v-mini-mcu.dc.upf.tpl $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/ip/power_manager/rtl --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv hw/ip/power_manager/data/power_manager.sv.tpl $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/ip/power_manager/data --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv hw/ip/power_manager/data/power_manager.hjson.tpl bash -c "cd hw/ip/power_manager; source power_manager_gen.sh; cd ../../../" diff --git a/configs/example_interleaved.hjson b/configs/example_interleaved.hjson index e831d7978..aac0801e5 100644 --- a/configs/example_interleaved.hjson +++ b/configs/example_interleaved.hjson @@ -30,11 +30,11 @@ name: code start: 0 // minimum size for freeRTOS and clang - size: 0x00000C800 + size: 0x00000D800 }, { name: data - start: 0x00000C800 + start: 0x00000D800 } ] } \ No newline at end of file diff --git a/configs/general.hjson b/configs/general.hjson index d3465c500..f70406401 100644 --- a/configs/general.hjson +++ b/configs/general.hjson @@ -14,11 +14,11 @@ name: code start: 0 #minimum size for freeRTOS and clang - size: 0x00000C800 + size: 0x00000D800 }, { name: data - start: 0x00000C800 + start: 0x00000D800 } ] } \ No newline at end of file diff --git a/core-v-mini-mcu.core b/core-v-mini-mcu.core index 681994899..aef1f20e2 100644 --- a/core-v-mini-mcu.core +++ b/core-v-mini-mcu.core @@ -237,6 +237,10 @@ parameters: datatype: bool paramtype: vlogdefine default: false + FPGA_SYNTHESIS: + datatype: bool + paramtype: vlogdefine + default: false FPGA_NEXYS: datatype: bool paramtype: vlogdefine @@ -421,6 +425,7 @@ targets: - X_EXT - SYNTHESIS=true - REMOVE_OBI_FIFO + - FPGA_SYNTHESIS=true - FPGA_NEXYS=true tools: vivado: @@ -442,6 +447,7 @@ targets: - X_EXT - SYNTHESIS=true - REMOVE_OBI_FIFO + - FPGA_SYNTHESIS=true tools: vivado: part: xc7z020clg400-1 @@ -462,6 +468,7 @@ targets: - X_EXT - SYNTHESIS=true - REMOVE_OBI_FIFO + - FPGA_SYNTHESIS=true - FPGA_ZCU104=true tools: vivado: diff --git a/core-v-mini-mcu.dc.upf.tpl b/core-v-mini-mcu.dc.upf.tpl new file mode 100644 index 000000000..b08abad80 --- /dev/null +++ b/core-v-mini-mcu.dc.upf.tpl @@ -0,0 +1,159 @@ +upf_version 2.1 + +set_design_top core_v_mini_mcu +set_scope . + + +<%text> +##################### +## POWER DOMAINS ## +##################### +\ + +create_power_domain PD_TOP -include_scope +create_power_domain PD_CPU -elements {cpu_subsystem_i} +create_power_domain PD_PERIP_SUBS -elements {peripheral_subsystem_i} +% for bank in xheep.iter_ram_banks(): +create_power_domain PD_MEM_BANK_${bank.name()} -elements {memory_subsystem_i/ram${bank.name()}_i} +% endfor + + +<%text> +#################### +## POWER STATES ## +#################### +\ + +add_power_state PD_TOP.primary -state TOP_ON <%text>\ + {-supply_expr {power == `{FULL_ON, 1.2} && ground == `{FULL_ON, 0.0}}} + +add_power_state PD_CPU.primary -state CPU_ON <%text>\ + {-supply_expr {power == `{FULL_ON, 1.2} && ground == `{FULL_ON, 0.0}}} + +add_power_state PD_CPU.primary -state CPU_OFF <%text>\ + {-supply_expr {power == `{OFF} && ground == `{FULL_ON, 0.0}}} -simstate CORRUPT + +add_power_state PD_PERIP_SUBS.primary -state PERIP_SUBS_ON <%text>\ + {-supply_expr {power == `{FULL_ON, 1.2} && ground == `{FULL_ON, 0.0}}} + +add_power_state PD_PERIP_SUBS.primary -state PERIP_SUBS_OFF <%text>\ + {-supply_expr {power == `{OFF} && ground == `{FULL_ON, 0.0}}} -simstate CORRUPT + +% for bank in xheep.iter_ram_banks(): +add_power_state PD_MEM_BANK_${bank.name()}.primary -state MEM_BANK_${bank.name()}_ON <%text>\ + {-supply_expr {power == `{FULL_ON, 1.2} && ground == `{FULL_ON, 0.0}}} + +add_power_state PD_MEM_BANK_${bank.name()}.primary -state MEM_BANK_${bank.name()}_OFF <%text>\ + {-supply_expr {power == `{OFF} && ground == `{FULL_ON, 0.0}}} -simstate CORRUPT + +% endfor + +<%text> +################### +## SUPPLY NETS ## +################### +\ + +create_supply_port VDD -direction in +create_supply_port VSS -direction in + +create_supply_net VDD +create_supply_net VSS + +connect_supply_net VDD -ports VDD +connect_supply_net VSS -ports VSS + +create_supply_set PD_TOP.primary -function {power VDD} -function {ground VSS} -update + +create_supply_net VDD_CPU +create_supply_set PD_CPU.primary -function {power VDD_CPU} -function {ground VSS} -update + +create_supply_net VDD_PERIP_SUBS +create_supply_set PD_PERIP_SUBS.primary -function {power VDD_PERIP_SUBS} -function {ground VSS} -update + +% for bank in xheep.iter_ram_banks(): +create_supply_net VDD_MEM_BANK_${bank.name()} +create_supply_set PD_MEM_BANK_${bank.name()}.primary -function {power VDD_MEM_BANK_${bank.name()}} -function {ground VSS} -update + +% endfor + +<%text> +################ +## SWITCHES ## +################ +\ + +create_power_switch switch_PD_CPU <%text>\ + -supply_set PD_TOP.primary <%text>\ + -domain PD_CPU <%text>\ + -input_supply_port {sw_in VDD} <%text>\ + -output_supply_port {sw_out VDD_CPU} <%text>\ + -control_port {sw_ctrl ao_peripheral_subsystem_i/cpu_subsystem_pwr_ctrl_o<%text>\[pwrgate_en_n<%text>\]} <%text>\ + -ack_port {sw_ack ao_peripheral_subsystem_i/cpu_subsystem_pwr_ctrl_i<%text>\[pwrgate_ack_n<%text>\]} <%text>\ + -on_state {on_state sw_in {sw_ctrl}} <%text>\ + -off_state {off_state {!sw_ctrl}} + +create_power_switch switch_PD_PERIP_SUBS <%text>\ + -supply_set PD_TOP.primary <%text>\ + -domain PD_PERIP_SUBS <%text>\ + -input_supply_port {sw_in VDD} <%text>\ + -output_supply_port {sw_out VDD_PERIP_SUBS} <%text>\ + -control_port {sw_ctrl ao_peripheral_subsystem_i/peripheral_subsystem_pwr_ctrl_o<%text>\[pwrgate_en_n<%text>\]} <%text>\ + -ack_port {sw_ack ao_peripheral_subsystem_i/peripheral_subsystem_pwr_ctrl_i<%text>\[pwrgate_ack_n<%text>\]} <%text>\ + -on_state {on_state sw_in {sw_ctrl}} <%text>\ + -off_state {off_state {!sw_ctrl}} + +% for bank in xheep.iter_ram_banks(): +create_power_switch switch_PD_MEM_BANK_${bank.name()} <%text>\ + -supply_set PD_TOP.primary <%text>\ + -domain PD_MEM_BANK_${bank.name()} <%text>\ + -input_supply_port {sw_in VDD} <%text>\ + -output_supply_port {sw_out VDD_MEM_BANK_${bank.name()}} <%text>\ + -control_port {sw_ctrl ao_peripheral_subsystem_i/memory_subsystem_pwr_ctrl_o[${bank.name()}]<%text>\[pwrgate_en_n<%text>\]} <%text>\ + -ack_port {sw_ack ao_peripheral_subsystem_i/memory_subsystem_pwr_ctrl_i[${bank.name()}]<%text>\[pwrgate_ack_n<%text>\]} <%text>\ + -on_state {on_state sw_in {sw_ctrl}} <%text>\ + -off_state {off_state {!sw_ctrl}} + +% endfor + +<%text> +################# +## ISOLATION ## +################# +\ + +set_isolation cpu_iso <%text>\ + -domain PD_CPU <%text>\ + -isolation_power_net VDD <%text>\ + -isolation_ground_net VSS <%text>\ + -isolation_signal ao_peripheral_subsystem_i/cpu_subsystem_pwr_ctrl_o<%text>\[isogate_en_n<%text>\] <%text>\ + -isolation_sense low <%text>\ + -clamp_value 0 <%text>\ + -applies_to outputs <%text>\ + -name_prefix cpu_iso_cell <%text>\ + -location parent + +set_isolation perip_subs_iso <%text>\ + -domain PD_PERIP_SUBS <%text>\ + -isolation_power_net VDD <%text>\ + -isolation_ground_net VSS <%text>\ + -isolation_signal ao_peripheral_subsystem_i/peripheral_subsystem_pwr_ctrl_o<%text>\[isogate_en_n<%text>\] <%text>\ + -isolation_sense low <%text>\ + -clamp_value 0 <%text>\ + -applies_to outputs <%text>\ + -name_prefix cpu_iso_cell <%text>\ + -location parent + +% for bank in xheep.iter_ram_banks(): +set_isolation mem_bank_${bank.name()}_iso <%text>\ + -domain PD_MEM_BANK_${bank.name()} <%text>\ + -isolation_power_net VDD <%text>\ + -isolation_ground_net VSS <%text>\ + -isolation_signal ao_peripheral_subsystem_i/memory_subsystem_pwr_ctrl_o[${bank.name()}]<%text>\[isogate_en_n<%text>\] <%text>\ + -isolation_sense low <%text>\ + -clamp_value 0 <%text>\ + -elements {memory_subsystem_i/ram${bank.name()}_i/rdata_o} <%text>\ + -name_prefix cpu_iso_cell <%text>\ + -location parent + +% endfor diff --git a/core-v-mini-mcu.upf.tpl b/core-v-mini-mcu.upf.tpl index 378a74a68..d7e0f6f1f 100644 --- a/core-v-mini-mcu.upf.tpl +++ b/core-v-mini-mcu.upf.tpl @@ -109,8 +109,8 @@ create_power_switch switch_PD_MEM_BANK_${bank.name()} <%text>\ -domain PD_MEM_BANK_${bank.name()} <%text>\ -input_supply_port {sw_in VDD} <%text>\ -output_supply_port {sw_out VDD_MEM_BANK_${bank.name()}} <%text>\ - -control_port {sw_ctrl memory_subsystem_banks_powergate_switch_no[${bank.name()}]} <%text>\ - -ack_port {sw_ack memory_subsystem_banks_powergate_switch_ack_ni[${bank.name()}]} <%text>\ + -control_port {sw_ctrl memory_subsystem_banks_powergate_switch_n[${bank.name()}]} <%text>\ + -ack_port {sw_ack memory_subsystem_i.ram${bank.name()}_i.pwrgate_ack_no} <%text>\ -on_state {on_state sw_in {sw_ctrl}} <%text>\ -off_state {off_state {!sw_ctrl}} @@ -152,7 +152,7 @@ set_isolation mem_bank_${bank.name()}_iso <%text>\ -isolation_signal memory_subsystem_banks_powergate_iso_n[${bank.name()}] <%text>\ -isolation_sense low <%text>\ -clamp_value 0 <%text>\ - -applies_to outputs <%text>\ + -elements {memory_subsystem_i/ram${bank.name()}_i/rdata_o} <%text>\ -name_prefix cpu_iso_cell <%text>\ -location parent diff --git a/hw/core-v-mini-mcu/core_v_mini_mcu.sv b/hw/core-v-mini-mcu/core_v_mini_mcu.sv index 95755494b..4cfb3d9d5 100644 --- a/hw/core-v-mini-mcu/core_v_mini_mcu.sv +++ b/hw/core-v-mini-mcu/core_v_mini_mcu.sv @@ -315,8 +315,6 @@ module core_v_mini_mcu input logic cpu_subsystem_powergate_switch_ack_ni, output logic peripheral_subsystem_powergate_switch_no, input logic peripheral_subsystem_powergate_switch_ack_ni, - output logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_no, - input logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_ack_ni, output logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_switch_no, input logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_switch_ack_ni, output logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_iso_no, @@ -421,6 +419,8 @@ module core_v_mini_mcu logic peripheral_subsystem_powergate_iso_n; logic peripheral_subsystem_clkgate_en_n; + logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_n; + logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_ack_n; logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_set_retentive_n; logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_iso_n; logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_clkgate_en_n; @@ -440,16 +440,14 @@ module core_v_mini_mcu assign peripheral_subsystem_rst_n = peripheral_subsystem_pwr_ctrl_out.rst_n; assign peripheral_subsystem_clkgate_en_n = peripheral_subsystem_pwr_ctrl_out.clkgate_en_n; - //pwrgate exposed both outside and inside to deal with memories with embedded SLEEP mode or external PWR cells - assign memory_subsystem_banks_powergate_switch_no[0] = memory_subsystem_pwr_ctrl_out[0].pwrgate_en_n; - assign memory_subsystem_pwr_ctrl_in[0].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_ni[0]; + assign memory_subsystem_banks_powergate_switch_n[0] = memory_subsystem_pwr_ctrl_out[0].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[0].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[0]; //isogate exposed outside for UPF sim flow and switch cells assign memory_subsystem_banks_powergate_iso_n[0] = memory_subsystem_pwr_ctrl_out[0].isogate_en_n; assign memory_subsystem_banks_set_retentive_n[0] = memory_subsystem_pwr_ctrl_out[0].retentive_en_n; assign memory_subsystem_clkgate_en_n[0] = memory_subsystem_pwr_ctrl_out[0].clkgate_en_n; - //pwrgate exposed both outside and inside to deal with memories with embedded SLEEP mode or external PWR cells - assign memory_subsystem_banks_powergate_switch_no[1] = memory_subsystem_pwr_ctrl_out[1].pwrgate_en_n; - assign memory_subsystem_pwr_ctrl_in[1].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_ni[1]; + assign memory_subsystem_banks_powergate_switch_n[1] = memory_subsystem_pwr_ctrl_out[1].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[1].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[1]; //isogate exposed outside for UPF sim flow and switch cells assign memory_subsystem_banks_powergate_iso_n[1] = memory_subsystem_pwr_ctrl_out[1].isogate_en_n; assign memory_subsystem_banks_set_retentive_n[1] = memory_subsystem_pwr_ctrl_out[1].retentive_en_n; @@ -610,11 +608,8 @@ module core_v_mini_mcu .clk_gate_en_ni(memory_subsystem_clkgate_en_n), .ram_req_i(ram_slave_req), .ram_resp_o(ram_slave_resp), - /* - the memory_subsystem_banks_powergate_switch_no gets wired both internally - and externally to support both macros that have and do not have SLEEP capabilities integrated in the macros - */ - .pwrgate_ni(memory_subsystem_banks_powergate_switch_no), + .pwrgate_ni(memory_subsystem_banks_powergate_switch_n), + .pwrgate_ack_no(memory_subsystem_banks_powergate_switch_ack_n), .set_retentive_ni(memory_subsystem_banks_set_retentive_n) ); diff --git a/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl b/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl index da2140430..cb5717528 100644 --- a/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl +++ b/hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl @@ -69,8 +69,6 @@ ${pad.core_v_mini_mcu_interface} input logic cpu_subsystem_powergate_switch_ack_ni, output logic peripheral_subsystem_powergate_switch_no, input logic peripheral_subsystem_powergate_switch_ack_ni, - output logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_no, - input logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_ack_ni, output logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_switch_no, input logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_switch_ack_ni, output logic [EXT_DOMAINS_RND-1:0] external_subsystem_powergate_iso_no, @@ -175,6 +173,8 @@ ${pad.core_v_mini_mcu_interface} logic peripheral_subsystem_powergate_iso_n; logic peripheral_subsystem_clkgate_en_n; + logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_n; + logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_ack_n; logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_set_retentive_n; logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_iso_n; logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_clkgate_en_n; @@ -195,9 +195,8 @@ ${pad.core_v_mini_mcu_interface} assign peripheral_subsystem_clkgate_en_n = peripheral_subsystem_pwr_ctrl_out.clkgate_en_n; % for bank in xheep.iter_ram_banks(): - //pwrgate exposed both outside and inside to deal with memories with embedded SLEEP mode or external PWR cells - assign memory_subsystem_banks_powergate_switch_no[${bank.name()}] = memory_subsystem_pwr_ctrl_out[${bank.name()}].pwrgate_en_n; - assign memory_subsystem_pwr_ctrl_in[${bank.name()}].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_ni[${bank.name()}]; + assign memory_subsystem_banks_powergate_switch_n[${bank.name()}] = memory_subsystem_pwr_ctrl_out[${bank.name()}].pwrgate_en_n; + assign memory_subsystem_pwr_ctrl_in[${bank.name()}].pwrgate_ack_n = memory_subsystem_banks_powergate_switch_ack_n[${bank.name()}]; //isogate exposed outside for UPF sim flow and switch cells assign memory_subsystem_banks_powergate_iso_n[${bank.name()}] = memory_subsystem_pwr_ctrl_out[${bank.name()}].isogate_en_n; assign memory_subsystem_banks_set_retentive_n[${bank.name()}] = memory_subsystem_pwr_ctrl_out[${bank.name()}].retentive_en_n; @@ -359,11 +358,8 @@ ${pad.core_v_mini_mcu_interface} .clk_gate_en_ni(memory_subsystem_clkgate_en_n), .ram_req_i(ram_slave_req), .ram_resp_o(ram_slave_resp), - /* - the memory_subsystem_banks_powergate_switch_no gets wired both internally - and externally to support both macros that have and do not have SLEEP capabilities integrated in the macros - */ - .pwrgate_ni(memory_subsystem_banks_powergate_switch_no), + .pwrgate_ni(memory_subsystem_banks_powergate_switch_n), + .pwrgate_ack_no(memory_subsystem_banks_powergate_switch_ack_n), .set_retentive_ni(memory_subsystem_banks_set_retentive_n) ); diff --git a/hw/core-v-mini-mcu/memory_subsystem.sv.tpl b/hw/core-v-mini-mcu/memory_subsystem.sv.tpl index aa8f3b834..660b1b6a9 100644 --- a/hw/core-v-mini-mcu/memory_subsystem.sv.tpl +++ b/hw/core-v-mini-mcu/memory_subsystem.sv.tpl @@ -21,6 +21,7 @@ module memory_subsystem // power manager signals that goes to the ASIC macros input logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] pwrgate_ni, + output logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] pwrgate_ack_no, input logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] set_retentive_ni ); @@ -74,6 +75,7 @@ module memory_subsystem .wdata_i(ram_req_i[${i}].wdata), .be_i(ram_req_i[${i}].be), .pwrgate_ni(pwrgate_ni[${i}]), + .pwrgate_ack_no(pwrgate_ack_no[${i}]), .set_retentive_ni(set_retentive_ni[${i}]), .rdata_o(ram_resp_o[${i}].rdata) ); diff --git a/hw/fpga/sram_wrapper.sv.tpl b/hw/fpga/sram_wrapper.sv.tpl index 5eeac9455..6d518136a 100644 --- a/hw/fpga/sram_wrapper.sv.tpl +++ b/hw/fpga/sram_wrapper.sv.tpl @@ -24,10 +24,14 @@ module sram_wrapper #( input logic [3:0] be_i, // power manager signals that goes to the ASIC macros input logic pwrgate_ni, + output logic pwrgate_ack_no, input logic set_retentive_ni, // output ports output logic [31:0] rdata_o ); + +assign pwrgate_ack_no = pwrgate_ni; + <%el = ""%> % for num_words in xheep.iter_bank_numwords(): ${el}if (NumWords == 32'd${num_words}) begin diff --git a/hw/ip/power_manager/data/power_manager.hjson.tpl b/hw/ip/power_manager/data/power_manager.hjson.tpl index cdfd6b2dc..0d3362558 100644 --- a/hw/ip/power_manager/data/power_manager.hjson.tpl +++ b/hw/ip/power_manager/data/power_manager.hjson.tpl @@ -19,12 +19,22 @@ } { name: "RESTORE_ADDRESS", - desc: "Restore xddress value", + desc: "Restore address value", resval: "0x00000000" swaccess: "rw", hwaccess: "hro", fields: [ - { bits: "31:0", name: "RESTORE_XDDRESS", desc: "Restore xddress Reg, used by BOOTROM" } + { bits: "31:0", name: "RESTORE_ADDRESS", desc: "Restore address Reg, used by BOOTROM" } + ] + } + + { name: "GLOBAL_POINTER", + desc: "Global Pointer value", + resval: "0x00000000" + swaccess: "rw", + hwaccess: "hro", + fields: [ + { bits: "31:0", name: "GLOBAL_POINTER", desc: "Global Reg, used by power manager HAL" } ] } diff --git a/hw/simulation/simulation.vlt b/hw/simulation/simulation.vlt index 79672c20a..0b30dd69b 100644 --- a/hw/simulation/simulation.vlt +++ b/hw/simulation/simulation.vlt @@ -6,3 +6,4 @@ lint_off -rule UNOPTFLAT -file "*/hw/simulation/pad_cell_*.sv" -match "Signal unoptimizable*" lint_off -rule UNUSED -file "*/hw/simulation/sram_wrapper.sv" -match "Signal is not used: 'pwrgate_ni*" +lint_off -rule UNDRIVEN -file "*/hw/simulation/sram_wrapper.sv" -match "Signal is not driven: 'pwrgate_ack_no*" diff --git a/hw/simulation/sram_wrapper.sv b/hw/simulation/sram_wrapper.sv index 1d6119580..0986b3e2c 100644 --- a/hw/simulation/sram_wrapper.sv +++ b/hw/simulation/sram_wrapper.sv @@ -24,6 +24,7 @@ module sram_wrapper #( input logic [3:0] be_i, // power manager signals that goes to the ASIC macros input logic pwrgate_ni, + output logic pwrgate_ack_no, input logic set_retentive_ni, // output ports output logic [31:0] rdata_o diff --git a/hw/system/x_heep_system.sv.tpl b/hw/system/x_heep_system.sv.tpl index 4eea88a01..e77bd1b44 100644 --- a/hw/system/x_heep_system.sv.tpl +++ b/hw/system/x_heep_system.sv.tpl @@ -83,8 +83,6 @@ ${pad.x_heep_system_interface} logic cpu_subsystem_powergate_switch_ack_n; logic peripheral_subsystem_powergate_switch_n; logic peripheral_subsystem_powergate_switch_ack_n; - logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_n; - logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] memory_subsystem_banks_powergate_switch_ack_n; // PAD controller reg_req_t pad_req; @@ -103,6 +101,11 @@ ${pad.x_heep_system_interface} ${pad.internal_signals} % endfor +`ifdef FPGA_SYNTHESIS + assign cpu_subsystem_powergate_switch_ack_n = cpu_subsystem_powergate_switch_n; + assign peripheral_subsystem_powergate_switch_ack_n = peripheral_subsystem_powergate_switch_n; +`endif + core_v_mini_mcu #( .COREV_PULP(COREV_PULP), .FPU(FPU), @@ -148,8 +151,6 @@ ${pad.core_v_mini_mcu_bonding} .cpu_subsystem_powergate_switch_ack_ni(cpu_subsystem_powergate_switch_ack_n), .peripheral_subsystem_powergate_switch_no(peripheral_subsystem_powergate_switch_n), .peripheral_subsystem_powergate_switch_ack_ni(peripheral_subsystem_powergate_switch_ack_n), - .memory_subsystem_banks_powergate_switch_no(memory_subsystem_banks_powergate_switch_n), - .memory_subsystem_banks_powergate_switch_ack_ni(memory_subsystem_banks_powergate_switch_ack_n), .external_subsystem_powergate_switch_no, .external_subsystem_powergate_switch_ack_ni, .external_subsystem_powergate_iso_no, diff --git a/scripts/synthesis/dc_shell/dc_script.tcl b/scripts/synthesis/dc_shell/dc_script.tcl index 8edeb7f96..3a246ab80 100644 --- a/scripts/synthesis/dc_shell/dc_script.tcl +++ b/scripts/synthesis/dc_shell/dc_script.tcl @@ -10,11 +10,15 @@ remove_design -all source ${SET_LIBS} +define_design_lib WORK -path ./work + source ${READ_SOURCES}.tcl elaborate ${TOP_MODULE} link +load_upf ../../../core-v-mini-mcu.dc.upf + write -f ddc -hierarchy -output ${REPORT_DIR}/precompiled.ddc source ${CONSTRAINTS} diff --git a/sw/CMakeLists.txt b/sw/CMakeLists.txt index 22a54ecd2..549a2a064 100644 --- a/sw/CMakeLists.txt +++ b/sw/CMakeLists.txt @@ -133,7 +133,7 @@ SET( app_found 0 ) FOREACH(file_path IN LISTS new_list) SET(add 0) # This variable is set to 1 if the file_pth needs to be added to the list if(${file_path} MATCHES "${SOURCE_PATH}device/") - if(NOT ${file_path} MATCHES "\\.S$") + if(NOT ${file_path} MATCHES ".*/crt/.*") SET(add 1) endif() elseif( (${file_path} MATCHES "${SOURCE_PATH}external/") AND ( NOT ${file_path} MATCHES "exclude" ) ) @@ -172,7 +172,7 @@ if( app_found EQUAL 0 ) FOREACH(file_path IN LISTS new_list) SET(add 0) # This variable is set to 1 if the file_pth needs to be added to the list if(${file_path} MATCHES "${ROOT_PROJECT}device/") - if(NOT ${file_path} MATCHES "\\.S$") + if(NOT ${file_path} MATCHES ".*/crt/.*") SET(add 1) endif() elseif( ( ${file_path} MATCHES "${ROOT_PROJECT}/applications/${PROJECT}/" ) AND ( NOT ${file_path} MATCHES "${ROOT_PROJECT}applications/${PROJECT}/.*${MAINFILE}\." ) AND ( NOT ${file_path} MATCHES "exclude" ) ) diff --git a/sw/applications/example_power_manager/main.c b/sw/applications/example_power_manager/main.c index ee1b9bdda..4f025e1e7 100644 --- a/sw/applications/example_power_manager/main.c +++ b/sw/applications/example_power_manager/main.c @@ -217,6 +217,9 @@ int main(int argc, char *argv[]) trans.sign_ext = 0; trans.end = DMA_TRANS_END_INTR; trans.dim = DMA_DIM_CONF_1D; + trans.dim_inv = 0; + trans.channel = 0; + trans.pad_top_du = 0; trans.pad_bottom_du = 0; trans.pad_left_du = 0; @@ -231,7 +234,7 @@ int main(int argc, char *argv[]) { CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); if (dma_is_ready(0) == 0) - { + { if (power_gate_core(&power_manager, kDma_pm_e, &power_manager_counters) != kPowerManagerOk_e) { PRINTF("Error: power manager fail.\n\r"); diff --git a/sw/applications/example_spi_host_dma_power_gate/main.c b/sw/applications/example_spi_host_dma_power_gate/main.c deleted file mode 100644 index 776991cb1..000000000 --- a/sw/applications/example_spi_host_dma_power_gate/main.c +++ /dev/null @@ -1,341 +0,0 @@ -// Copyright EPFL contributors. -// Licensed under the Apache License, Version 2.0, see LICENSE for details. -// SPDX-License-Identifier: Apache-2.0 - -#include -#include -#include - -#include "core_v_mini_mcu.h" -#include "csr.h" -#include "hart.h" -#include "handler.h" -#include "soc_ctrl.h" -#include "spi_host.h" -#include "dma.h" -#include "fast_intr_ctrl.h" -#include "power_manager.h" -#include "x-heep.h" - -#ifdef TARGET_IS_FPGA - #define USE_SPI_FLASH -#endif - -/* By default, printfs are activated for FPGA and disabled for simulation. */ -#define PRINTF_IN_FPGA 1 -#define PRINTF_IN_SIM 0 - -#if TARGET_SIM && PRINTF_IN_SIM - #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif PRINTF_IN_FPGA && !TARGET_SIM - #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#else - #define PRINTF(...) -#endif - -// Type of data frome the SPI. For types different than words the SPI data is requested in separate transactions -// word(0), half-word(1), byte(2,3) -#define SPI_DATA_TYPE DMA_DATA_TYPE_WORD - -// Number of elements to copy -#define COPY_DATA_NUM 16 - -#define FLASH_CLK_MAX_HZ (133*1000*1000) // In Hz (133 MHz for the flash w25q128jvsim used in the EPFL Programmer) - -#define REVERT_24b_ADDR(addr) ((((uint32_t)(addr) & 0xff0000) >> 16) | ((uint32_t)(addr) & 0xff00) | (((uint32_t)(addr) & 0xff) << 16)) - -volatile int8_t dma_intr_flag; -int8_t core_sleep_flag; -spi_host_t* spi_peri; - -static power_manager_t power_manager; - -void dma_intr_handler_trans_done(uint8_t channel) -{ - PRINTF("Non-weak implementation of a DMA interrupt\n\r"); - dma_intr_flag = 1; -} - -// Reserve memory array -uint32_t flash_data[COPY_DATA_NUM] __attribute__ ((aligned (4))) = {0x76543210,0xfedcba98,0x579a6f90,0x657d5bee,0x758ee41f,0x01234567,0xfedbca98,0x89abcdef,0x679852fe,0xff8252bb,0x763b4521,0x6875adaa,0x09ac65bb,0x666ba334,0x44556677,0x0000ba98}; -uint32_t copy_data[COPY_DATA_NUM] __attribute__ ((aligned (4))) = { 0 }; - -#if SPI_DATA_TYPE == DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD - #define DATA_TYPE uint32_t -#elif SPI_DATA_TYPE == DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_16BIT_WORD - #define DATA_TYPE uint16_t -#else - #define DATA_TYPE uint8_t -#endif - -#define COPY_DATA_TYPE (COPY_DATA_NUM/(sizeof(uint32_t)/sizeof(DATA_TYPE))) - -int main(int argc, char *argv[]) -{ - - soc_ctrl_t soc_ctrl; - soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); - uint32_t read_byte_cmd; - uint32_t* flash_data_lma = flash_data; - //set MS 8 bits to 0 as the flash only uses 24b - flash_data_lma = (uint32_t*) ((uint32_t)(flash_data_lma) & 0x00FFFFFF); - - - if ( get_spi_flash_mode(&soc_ctrl) == SOC_CTRL_SPI_FLASH_MODE_SPIMEMIO ) - { -#ifdef USE_SPI_FLASH - PRINTF("This application cannot work with the memory mapped SPI FLASH module - do not use the FLASH_EXEC linker script for this application\n"); - return EXIT_SUCCESS; -#else - /* - if we are using in SIMULATION the SPIMMIO from Yosys, then the flash_original data is different - as the compilation is done differently, so we will store there the first WORDs of code mapped at the beginning of the FLASH - */ - uint32_t* ptr_flash = (uint32_t*)FLASH_MEM_START_ADDRESS; - for(int i =0; i < COPY_DATA_NUM ; i++){ - flash_data[i] = ptr_flash[i]; - } -#endif - } - - #ifndef USE_SPI_FLASH - spi_peri = spi_host1; - #else - spi_peri = spi_flash; - #endif - - // Setup power_manager - mmio_region_t power_manager_reg = mmio_region_from_addr(POWER_MANAGER_START_ADDRESS); - power_manager.base_addr = power_manager_reg; - power_manager_counters_t power_manager_cpu_counters; - // Init cpu_subsystem's counters - if (power_gate_counters_init(&power_manager_cpu_counters, 300, 300, 300, 300, 300, 300, 0, 0) != kPowerManagerOk_e) - { - PRINTF("Error: power manager fail. Check the reset and powergate counters value\n\r"); - return EXIT_FAILURE; - } - - uint32_t core_clk = soc_ctrl_get_frequency(&soc_ctrl); - - // Enable interrupt on processor side - // Enable global interrupt for machine-level interrupts - CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); - - // Set mie.MEIE bit to one to enable machine-level fast dma interrupt - const uint32_t mask = 1 << 19; - CSR_SET_BITS(CSR_REG_MIE, mask); - - #ifdef USE_SPI_FLASH - // Select SPI host as SPI output - soc_ctrl_select_spi_host(&soc_ctrl); - #endif - - // Enable SPI host device - spi_set_enable(spi_peri, true); - // Enable SPI output - spi_output_enable(spi_peri, true); - - // SPI and SPI_FLASH are the same IP so same register map - uint32_t *fifo_ptr_rx = (uintptr_t)spi_peri + SPI_HOST_RXDATA_REG_OFFSET; - - core_sleep_flag = 0; - - // -- DMA CONFIGURATION -- - - dma_init(NULL); - - #ifndef USE_SPI_FLASH - uint8_t slot = DMA_TRIG_SLOT_SPI_RX ; // The DMA will wait for the SPI RX FIFO valid signal - #else - uint8_t slot = DMA_TRIG_SLOT_SPI_FLASH_RX ; // The DMA will wait for the SPI FLASH RX FIFO valid signal - #endif - - static dma_target_t tgt_src = { - .size_du = COPY_DATA_NUM, - .inc_du = 0, - .type = SPI_DATA_TYPE, - }; - tgt_src.ptr = fifo_ptr_rx; - tgt_src.trig = slot; - - static dma_target_t tgt_dst = { - .ptr = copy_data, - .inc_du = 1, - .type = SPI_DATA_TYPE, - .trig = DMA_TRIG_MEMORY, - }; - - static dma_trans_t trans = { - .src = &tgt_src, - .dst = &tgt_dst, - .end = DMA_TRANS_END_INTR, - }; - - dma_config_flags_t res; - - res = dma_validate_transaction(&trans ,DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); - PRINTF("trans: %u \n\r", res ); - res = dma_load_transaction(&trans); - PRINTF(" load: %u \n\r", res ); - - - - // Configure SPI clock - // SPI clk freq = 1/2 core clk freq when clk_div = 0 - // SPI_CLK = CORE_CLK/(2 + 2 * CLK_DIV) <= CLK_MAX => CLK_DIV > (CORE_CLK/CLK_MAX - 2)/2 - uint16_t clk_div = 0; - if(FLASH_CLK_MAX_HZ < core_clk/2){ - clk_div = (core_clk/(FLASH_CLK_MAX_HZ) - 2)/2; // The value is truncated - if (core_clk/(2 + 2 * clk_div) > FLASH_CLK_MAX_HZ) clk_div += 1; // Adjust if the truncation was not 0 - } - // SPI Configuration - // Configure chip 0 (flash memory) - const uint32_t chip_cfg = spi_create_configopts((spi_configopts_t){ - .clkdiv = clk_div, - .csnidle = 0xF, - .csntrail = 0xF, - .csnlead = 0xF, - .fullcyc = false, - .cpha = 0, - .cpol = 0 - }); - spi_set_configopts(spi_peri, 0, chip_cfg); - spi_set_csid(spi_peri, 0); - - // Reset - const uint32_t reset_cmd = 0xFFFFFFFF; - spi_write_word(spi_peri, reset_cmd); - const uint32_t cmd_reset = spi_create_command((spi_command_t){ - .len = 3, - .csaat = false, - .speed = SPI_SPEED_STANDARD, - .direction = SPI_DIR_TX_ONLY - }); - spi_set_command(spi_peri, cmd_reset); - spi_wait_for_ready(spi_peri); - - // Power up flash - const uint32_t powerup_byte_cmd = 0xab; - spi_write_word(spi_peri, powerup_byte_cmd); - const uint32_t cmd_powerup = spi_create_command((spi_command_t){ - .len = 0, - .csaat = false, - .speed = SPI_SPEED_STANDARD, - .direction = SPI_DIR_TX_ONLY - }); - spi_set_command(spi_peri, cmd_powerup); - spi_wait_for_ready(spi_peri); - - // Load command FIFO with read command (1 Byte at single speed) - const uint32_t cmd_read = spi_create_command((spi_command_t){ - .len = 3, - .csaat = true, - .speed = SPI_SPEED_STANDARD, - .direction = SPI_DIR_TX_ONLY - }); - - dma_intr_flag = 0; - dma_launch(&trans); - PRINTF("Launched\n\r"); - - #if SPI_DATA_TYPE == DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD - if(get_spi_flash_mode(&soc_ctrl) != SOC_CTRL_SPI_FLASH_MODE_SPIMEMIO) - read_byte_cmd = ((REVERT_24b_ADDR(flash_data_lma) << 8) | 0x03); // The address bytes sent through the SPI to the Flash are in reverse order - else - // we read the data from the FLASH address 0x0, which corresponds to FLASH_MEM_START_ADDRESS - read_byte_cmd = ((REVERT_24b_ADDR(0x0) << 8) | 0x03); // The address bytes sent through the SPI to the Flash are in reverse order - const uint32_t cmd_read_rx = spi_create_command((spi_command_t){ // Single transaction - .len = COPY_DATA_NUM*sizeof(DATA_TYPE) - 1, // In bytes - 1 - .csaat = false, - .speed = SPI_SPEED_STANDARD, - .direction = SPI_DIR_RX_ONLY - }); - spi_write_word(spi_peri, read_byte_cmd); // Fill TX FIFO with TX data (read command + 3B address) - spi_wait_for_ready(spi_peri); // Wait for readiness to process commands - spi_set_command(spi_peri, cmd_read); // Send read command to the external device through SPI - spi_wait_for_ready(spi_peri); - spi_set_command(spi_peri, cmd_read_rx); // Receive data in RX - spi_wait_for_ready(spi_peri); - #else - const uint32_t cmd_read_rx = spi_create_command((spi_command_t){ // Multiple transactions of the data type - .len = (sizeof(DATA_TYPE) - 1), - .csaat = false, - .speed = SPI_SPEED_STANDARD, - .direction = SPI_DIR_RX_ONLY - }); - DATA_TYPE* flash_ptr = (DATA_TYPE *)flash_data_lma; - for (int i = 0; i +#include +#include + +#include "x-heep.h" +#include "w25q128jw.h" +#include "csr.h" +#include "core_v_mini_mcu.h" +#include "power_manager.h" + +/* By default, PRINTFs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 + +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define PRINTF(...) +#endif + +#if defined(TARGET_PYNQ_Z2) || defined(TARGET_ZCU104) || defined(TARGET_NEXYS_A7_100T) + #define USE_SPI_FLASH +#endif + +#define FLASH_ONLY_WORDS 32 +#define FLASH_ONLY_BYTES (FLASH_ONLY_WORDS*4) + +uint32_t on_chip_buffer[FLASH_ONLY_WORDS]; + +int32_t __attribute__((section(".xheep_data_flash_only"))) __attribute__ ((aligned (16))) flash_only_buffer[FLASH_ONLY_WORDS] = { + 0xABCDEF00, + 0xABCDEF01, + 0xABCDEF02, + 0xABCDEF03, + 0xABCDEF04, + 0xABCDEF05, + 0xABCDEF06, + 0xABCDEF07, + 0xABCDEF08, + 0xABCDEF09, + 0xABCDEF0A, + 0xABCDEF0B, + 0xABCDEF0C, + 0xABCDEF0D, + 0xABCDEF0E, + 0xABCDEF0F, + 0xABCDEF10, + 0xABCDEF11, + 0xABCDEF12, + 0xABCDEF13, + 0xABCDEF14, + 0xABCDEF15, + 0xABCDEF16, + 0xABCDEF17, + 0xABCDEF18, + 0xABCDEF19, + 0xABCDEF1A, + 0xABCDEF1B, + 0xABCDEF1C, + 0xABCDEF1D, + 0xABCDEF1E, + 0xABCDEF1F, +}; + +int32_t __attribute__ ((aligned (16))) flash_only_buffer_golden_value[FLASH_ONLY_WORDS] = { + 0xABCDEF00, + 0xABCDEF01, + 0xABCDEF02, + 0xABCDEF03, + 0xABCDEF04, + 0xABCDEF05, + 0xABCDEF06, + 0xABCDEF07, + 0xABCDEF08, + 0xABCDEF09, + 0xABCDEF0A, + 0xABCDEF0B, + 0xABCDEF0C, + 0xABCDEF0D, + 0xABCDEF0E, + 0xABCDEF0F, + 0xABCDEF10, + 0xABCDEF11, + 0xABCDEF12, + 0xABCDEF13, + 0xABCDEF14, + 0xABCDEF15, + 0xABCDEF16, + 0xABCDEF17, + 0xABCDEF18, + 0xABCDEF19, + 0xABCDEF1A, + 0xABCDEF1B, + 0xABCDEF1C, + 0xABCDEF1D, + 0xABCDEF1E, + 0xABCDEF1F, +}; + + +int main(int argc, char *argv[]) +{ + soc_ctrl_t soc_ctrl; + soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); + + if ( get_spi_flash_mode(&soc_ctrl) == SOC_CTRL_SPI_FLASH_MODE_SPIMEMIO ) { + PRINTF("This application cannot work with the memory mapped SPI FLASH" + "module - do not use the FLASH_EXEC linker script for this application\n"); + return EXIT_SUCCESS; + } + + // Pick the correct spi device based on simulation type + spi_host_t* spi; + #ifndef USE_SPI_FLASH + spi = spi_host1; + #else + spi = spi_flash; + #endif + + // Setup power_manager + power_manager_t power_manager; + mmio_region_t power_manager_reg = mmio_region_from_addr(POWER_MANAGER_START_ADDRESS); + power_manager.base_addr = power_manager_reg; + power_manager_counters_t power_manager_counters; + //counters + uint32_t reset_off, reset_on, switch_off, switch_on, iso_off, iso_on; + + //Turn off: first, isolate the CPU outputs, then I reset it, then I switch it off (reset and switch off order does not really matter) + iso_off = 10; + reset_off = iso_off + 5; + switch_off = reset_off + 5; + //Turn on: first, give back power by switching on, then deassert the reset, the unisolate the CPU outputs + switch_on = 10; + reset_on = switch_on + 20; //give 20 cycles to emulate the turn on time, this number depends on technology and here it is just a random number + iso_on = reset_on + 5; + + if (power_gate_counters_init(&power_manager_counters, reset_off, reset_on, switch_off, switch_on, iso_off, iso_on, 0, 0) != kPowerManagerOk_e) + { + PRINTF("Error: power manager fail. Check the reset and powergate counters value\n\r"); + return EXIT_FAILURE; + } + + // Define status variable + int32_t errors = 0; + + // Init SPI host and SPI<->Flash bridge parameters + if (w25q128jw_init(spi) != FLASH_OK) return EXIT_FAILURE; + + uint32_t *test_buffer_flash = heep_get_flash_address_offset(flash_only_buffer); + // Read from flash memory at the same address + w25q_error_codes_t status = w25q128jw_read_standard_dma_async(test_buffer_flash, on_chip_buffer, FLASH_ONLY_BYTES); + if (status != FLASH_OK) exit(EXIT_FAILURE); + + //wait for the DMA to finish in DEEP SLEEP mode + while (!dma_is_ready(0)) + { + CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); + if (dma_is_ready(0) == 0) + { + PRINTF("Going to sleep...\r\n"); + if (power_gate_core(&power_manager, kDma_pm_e, &power_manager_counters) != kPowerManagerOk_e) + { + PRINTF("Error: power manager fail.\n\r"); + return EXIT_FAILURE; + } + PRINTF("Woken up...\r\n"); + + } + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + } + + + PRINTF("Check results...\r\n"); + + // Check if what we read is correct (i.e. on_chip_buffer == flash_only_buffer_golden_value) + for(int i = 0; i < FLASH_ONLY_WORDS; i++) { + if (on_chip_buffer[i] != flash_only_buffer_golden_value[i]) { + errors++; + PRINTF("Error: on_chip_buffer[%d] = 0x%08x, flash_only_buffer_golden_value[%d] = 0x%08x\n", i, on_chip_buffer[i], i, flash_only_buffer_golden_value[i]); + } + } + + if(errors==0) PRINTF("TEST RUN SUCCEFFULLY\r\n"); + + return errors; +} diff --git a/sw/device/bsp/w25q/w25q.c b/sw/device/bsp/w25q/w25q.c index ecee8670f..04908d8ea 100644 --- a/sw/device/bsp/w25q/w25q.c +++ b/sw/device/bsp/w25q/w25q.c @@ -441,8 +441,8 @@ w25q_error_codes_t w25q128jw_erase_and_write_standard(uint32_t addr, void* data, } - w25q_error_codes_t w25q128jw_read_standard_dma(uint32_t addr, void *data, uint32_t length, uint8_t no_wait_dma, uint8_t no_sanity_checks) { + // Sanity checks if (!no_sanity_checks) if (w25q128jw_sanity_checks(addr, data, length) != FLASH_OK) return FLASH_ERROR; @@ -540,6 +540,98 @@ w25q_error_codes_t w25q128jw_read_standard_dma(uint32_t addr, void *data, uint32 return FLASH_OK; } +w25q_error_codes_t w25q128jw_read_standard_dma_async(uint32_t addr, void *data, uint32_t length) { + + // Sanity checks + if (w25q128jw_sanity_checks(addr, data, length) != FLASH_OK) return FLASH_ERROR; + + // Take into account the extra bytes (if any) + if (length % 4 != 0) { + //only multiple of 4 bytes are supported in this function + return FLASH_ERROR; + } + + /* + * SET UP DMA + */ + // SPI and SPI_FLASH are the same IP so same register map + uint32_t *fifo_ptr_rx = (uint32_t *)((uintptr_t)spi + SPI_HOST_RXDATA_REG_OFFSET); + + // Init DMA, the integrated DMA is used (peri == NULL) + dma_init(NULL); + + // The DMA will wait for the SPI HOST/FLASH RX FIFO valid signal + #ifndef USE_SPI_FLASH + uint8_t slot = DMA_TRIG_SLOT_SPI_RX; + #else + uint8_t slot = DMA_TRIG_SLOT_SPI_FLASH_RX; + #endif + + // Set up DMA source target + static dma_target_t tgt_src = { + .inc_du = 0, // Target is peripheral, no increment + .type = DMA_DATA_TYPE_WORD, // Data type is word + }; + // Size is in data units (words in this case) + tgt_src.size_du = length>>2; + // Target is SPI RX FIFO + tgt_src.ptr = (uint8_t*)fifo_ptr_rx; + // Trigger to control the data flow + tgt_src.trig = slot; + + // Set up DMA destination target + static dma_target_t tgt_dst = { + .inc_du = 1, // Increment by 1 data unit (word) + .type = DMA_DATA_TYPE_WORD, // Data type is byte + .trig = DMA_TRIG_MEMORY, // Read-write operation to memory + }; + tgt_dst.ptr = (uint8_t*)data; // Target is the data buffer + + // Set up DMA transaction + static dma_trans_t trans = { + .src = &tgt_src, + .dst = &tgt_dst, + .end = DMA_TRANS_END_INTR, //so that you can wait for interrupt + }; + // Validate, load and launch DMA transaction + dma_config_flags_t res; + res = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); + res = dma_load_transaction(&trans); + res = dma_launch(&trans); + + // Address + Read command + uint32_t read_byte_cmd = ((REVERT_24b_ADDR(addr & 0x00ffffff) << 8) | FC_RD); + // Load command to TX FIFO + spi_write_word(spi, read_byte_cmd); + spi_wait_for_ready(spi); + + // Set up segment parameters -> send command and address + const uint32_t cmd_read_1 = spi_create_command((spi_command_t){ + .len = 3, // 4 Bytes + .csaat = true, // Command not finished + .speed = SPI_SPEED_STANDARD, // Single speed + .direction = SPI_DIR_TX_ONLY // Write only + }); + // Load segment parameters to COMMAND register + spi_set_command(spi, cmd_read_1); + spi_wait_for_ready(spi); + + // Set up segment parameters -> read length bytes + const uint32_t cmd_read_2 = spi_create_command((spi_command_t){ + .len = length-1, // len bytes + .csaat = false, // End command + .speed = SPI_SPEED_STANDARD, // Single speed + .direction = SPI_DIR_RX_ONLY // Read only + }); + spi_set_command(spi, cmd_read_2); + spi_wait_for_ready(spi); + + // Wait for DMA to finish transaction outside this function, the DMA generates also an interrupt + // However, you need to enable the interrupt in the INT controllers, and CPU + + return FLASH_OK; +} + w25q_error_codes_t w25q128jw_write_standard_dma(uint32_t addr, void *data, uint32_t length) { // Call the wrapper with quad = 0, dma = 1 diff --git a/sw/device/bsp/w25q/w25q128jw.h b/sw/device/bsp/w25q/w25q128jw.h index e99e2985e..1bc8b5e4d 100644 --- a/sw/device/bsp/w25q/w25q128jw.h +++ b/sw/device/bsp/w25q/w25q128jw.h @@ -269,6 +269,15 @@ w25q_error_codes_t w25q128jw_erase_and_write_standard(uint32_t addr, void* data, */ w25q_error_codes_t w25q128jw_read_standard_dma(uint32_t addr, void *data, uint32_t length, uint8_t no_wait_dma, uint8_t no_sanity_checks); +/** + * @brief Read from flash at standard speed using DMA but wait for DMA in the application + * + * @param addr 24-bit flash address to read from. + * @param data pointer to the data buffer. + * @param length number of bytes to read, must be multiple of 4 + * @return FLASH_OK if the read is successful, @ref error_codes otherwise. +*/ +w25q_error_codes_t w25q128jw_read_standard_dma_async(uint32_t addr, void *data, uint32_t length); /** * @brief Write to flash at standard speed using DMA. Use this function only to write to unitialized data diff --git a/sw/device/lib/drivers/power_manager/power_manager.c b/sw/device/lib/drivers/power_manager/power_manager.c index 4a9c37a85..066771218 100644 --- a/sw/device/lib/drivers/power_manager/power_manager.c +++ b/sw/device/lib/drivers/power_manager/power_manager.c @@ -15,174 +15,7 @@ #include "x-heep.h" - -void __attribute__ ((noinline)) power_gate_core_asm() -{ - asm volatile ( - - // write POWER_GATE_CORE[0] = 1 - "lui a0, %[base_address_20bit]\n" - "li a1, 1\n" - "sw a1, %[power_manager_power_gate_core_reg_offset](a0)\n" - - // write WAKEUP_STATE[0] = 1 - "sw a1, %[power_manager_wakeup_state_reg_offset](a0)\n" : : \ - \ - [base_address_20bit] "i" (POWER_MANAGER_START_ADDRESS >> 12), \ - [power_manager_power_gate_core_reg_offset] "i" (POWER_MANAGER_POWER_GATE_CORE_REG_OFFSET), \ - [power_manager_wakeup_state_reg_offset] "i" (POWER_MANAGER_WAKEUP_STATE_REG_OFFSET) : "a0", "a1" \ - ); - - asm volatile ( - - // write registers - "la a0, __power_manager_start\n" - "sw x1, 0(a0)\n" - "sw x2, 4(a0)\n" - "sw x3, 8(a0)\n" - "sw x4, 12(a0)\n" - "sw x5, 16(a0)\n" - "sw x6, 20(a0)\n" - "sw x7, 24(a0)\n" - "sw x8, 28(a0)\n" - "sw x9, 32(a0)\n" - "sw x10, 36(a0)\n" - "sw x11, 40(a0)\n" - "sw x12, 44(a0)\n" - "sw x13, 48(a0)\n" - "sw x14, 52(a0)\n" - "sw x15, 56(a0)\n" - "sw x16, 60(a0)\n" - "sw x17, 64(a0)\n" - "sw x18, 68(a0)\n" - "sw x19, 72(a0)\n" - "sw x20, 76(a0)\n" - "sw x21, 80(a0)\n" - "sw x22, 88(a0)\n" - "sw x23, 92(a0)\n" - "sw x24, 96(a0)\n" - "sw x25, 100(a0)\n" - "sw x26, 104(a0)\n" - "sw x27, 108(a0)\n" - "sw x28, 112(a0)\n" - "sw x29, 116(a0)\n" - "sw x30, 120(a0)\n" - "sw x31, 124(a0)\n" - //csr - "csrr a1, mstatus\n" - "sw a1, 128(a0)\n" - "csrr a1, mie\n" - "sw a1, 132(a0)\n" - "csrr a1, mtvec\n" - "sw a1, 136(a0)\n" - "csrr a1, mscratch\n" - "sw a1, 140(a0)\n" - "csrr a1, mepc\n" - "sw a1, 144(a0)\n" - "csrr a1, mcause\n" - "sw a1, 148(a0)\n" - "csrr a1, mtval\n" - "sw a1, 152(a0)\n" - "csrr a1, mcycle\n" - "sw a1, 156(a0)\n" - "csrr a1, minstret\n" - "sw a1, 160(a0)\n" : : : "a0", "a1" \ - ); - - asm volatile ( - - // write RESTORE_ADDRESS[31:0] = PC - "lui a0, %[base_address_20bit]\n" - "la a1, wakeup\n" - "sw a1, %[power_manager_restore_address_reg_offset](a0)\n" - - // wait for interrupt - "wfi\n" - - // ---------------------------- - // power-gate - // ---------------------------- - - // ---------------------------- - // wake-up - // ---------------------------- - - // write POWER_GATE_CORE[0] = 0 - "wakeup:" - "lui a0, %[base_address_20bit]\n" - "sw x0, %[power_manager_power_gate_core_reg_offset](a0)\n" - - // write WAKEUP_STATE[0] = 0 - "sw x0, %[power_manager_wakeup_state_reg_offset](a0)\n" - - // write RESTORE_ADDRESS[31:0] = 0 - "sw x0, %[power_manager_restore_address_reg_offset](a0)\n" : : \ - \ - [base_address_20bit] "i" (POWER_MANAGER_START_ADDRESS >> 12), \ - [power_manager_power_gate_core_reg_offset] "i" (POWER_MANAGER_POWER_GATE_CORE_REG_OFFSET), \ - [power_manager_wakeup_state_reg_offset] "i" (POWER_MANAGER_WAKEUP_STATE_REG_OFFSET), \ - [power_manager_restore_address_reg_offset] "i" (POWER_MANAGER_RESTORE_ADDRESS_REG_OFFSET) : "a0", "a1" \ - ); - - asm volatile ( - - // write CORE_REG_Xn[31:0] = Xn - "la a0, __power_manager_start\n" - //one of the following load is gonna overwrite a0, but a0 was already stored before to the right value - "lw x1, 0(a0)\n" - "lw x2, 4(a0)\n" - "lw x3, 8(a0)\n" - "lw x4, 12(a0)\n" - "lw x5, 16(a0)\n" - "lw x6, 20(a0)\n" - "lw x7, 24(a0)\n" - "lw x8, 28(a0)\n" - "lw x9, 32(a0)\n" - "lw x10, 36(a0)\n" - "lw x11, 40(a0)\n" - "lw x12, 44(a0)\n" - "lw x13, 48(a0)\n" - "lw x14, 52(a0)\n" - "lw x15, 56(a0)\n" - "lw x16, 60(a0)\n" - "lw x17, 64(a0)\n" - "lw x18, 68(a0)\n" - "lw x19, 72(a0)\n" - "lw x20, 76(a0)\n" - "lw x21, 80(a0)\n" - "lw x22, 88(a0)\n" - "lw x23, 92(a0)\n" - "lw x24, 96(a0)\n" - "lw x25, 100(a0)\n" - "lw x26, 104(a0)\n" - "lw x27, 108(a0)\n" - "lw x28, 112(a0)\n" - "lw x29, 116(a0)\n" - "lw x30, 120(a0)\n" - "lw x31, 124(a0)\n" - //csr - "lw a1, 128(a0)\n" - "csrw mstatus, a1\n" - "lw a1, 132(a0)\n" - "csrw mie, a1\n" - "lw a1, 136(a0)\n" - "csrw mtvec, a1\n" - "lw a1, 140(a0)\n" - "csrw mscratch, a1\n" - "lw a1, 144(a0)\n" - "csrw mepc, a1\n" - "lw a1, 148(a0)\n" - "csrw mcause, a1\n" - "lw a1, 152(a0)\n" - "csrw mtval, a1\n" - "lw a1, 156(a0)\n" - "csrw mcycle, a1\n" - "lw a1, 160(a0)\n" - "csrw minstret, a1\n": : : "a0", "a1" \ - ); - - return; -} +extern void power_manager_cpu_store(); power_manager_result_t __attribute__ ((noinline)) power_gate_core(const power_manager_t *power_manager, power_manager_sel_intr_t sel_intr, power_manager_counters_t* cpu_counter) { @@ -201,14 +34,11 @@ power_manager_result_t __attribute__ ((noinline)) power_gate_core(const power_ma mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(POWER_MANAGER_INTR_STATE_REG_OFFSET), 0x0); // enable wait for SWITCH ACK - #ifdef TARGET_PYNQ_Z2 - reg = bitfield_bit32_write(reg, POWER_MANAGER_CPU_WAIT_ACK_SWITCH_ON_COUNTER_CPU_WAIT_ACK_SWITCH_ON_COUNTER_BIT, 0x0); - #else - reg = bitfield_bit32_write(reg, POWER_MANAGER_CPU_WAIT_ACK_SWITCH_ON_COUNTER_CPU_WAIT_ACK_SWITCH_ON_COUNTER_BIT, 0x1); - #endif + reg = bitfield_bit32_write(reg, POWER_MANAGER_CPU_WAIT_ACK_SWITCH_ON_COUNTER_CPU_WAIT_ACK_SWITCH_ON_COUNTER_BIT, 0x1); + mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(POWER_MANAGER_CPU_WAIT_ACK_SWITCH_ON_COUNTER_REG_OFFSET), reg); - power_gate_core_asm(); + power_manager_cpu_store(); // clean up states mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(POWER_MANAGER_EN_WAIT_FOR_INTR_REG_OFFSET), 0x0); @@ -231,11 +61,7 @@ power_manager_result_t __attribute__ ((noinline)) power_gate_periph(const power_ { uint32_t reg = 0; - #ifdef TARGET_PYNQ_Z2 - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(POWER_MANAGER_PERIPH_WAIT_ACK_SWITCH_ON_REG_OFFSET), 0x0); - #else - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(POWER_MANAGER_PERIPH_WAIT_ACK_SWITCH_ON_REG_OFFSET), 0x1); - #endif + mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(POWER_MANAGER_PERIPH_WAIT_ACK_SWITCH_ON_REG_OFFSET), 0x1); if (sel_state == kOn_e) { @@ -265,11 +91,7 @@ power_manager_result_t __attribute__ ((noinline)) power_gate_ram_block(const pow if (sel_state == kOn_e) { - #ifdef TARGET_PYNQ_Z2 - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].wait_ack_switch), 0x0); - #else - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].wait_ack_switch), 0x1); - #endif + mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].wait_ack_switch), 0x1); for (int i=0; iswitch_on; i++) asm volatile ("nop\n;"); mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].switch_off), 0x0); for (int i=0; iiso_off; i++) asm volatile ("nop\n;"); @@ -277,11 +99,7 @@ power_manager_result_t __attribute__ ((noinline)) power_gate_ram_block(const pow } else if (sel_state == kOff_e) { - #ifdef TARGET_PYNQ_Z2 - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].wait_ack_switch), 0x0); - #else - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].wait_ack_switch), 0x1); - #endif + mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].wait_ack_switch), 0x1); for (int i=0; iiso_on; i++) asm volatile ("nop\n;"); mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_ram_map[sel_block].iso), 0x1); for (int i=0; iswitch_off; i++) asm volatile ("nop\n;"); @@ -309,11 +127,7 @@ power_manager_result_t __attribute__ ((noinline)) power_gate_external(const powe if (sel_state == kOn_e) { - #ifdef TARGET_PYNQ_Z2 - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].wait_ack_switch), 0x0); - #else - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].wait_ack_switch), 0x1); - #endif + mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].wait_ack_switch), 0x1); for (int i=0; iswitch_on; i++) asm volatile ("nop\n;"); mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].switch_off), 0x0); for (int i=0; iiso_off; i++) asm volatile ("nop\n;"); @@ -323,11 +137,7 @@ power_manager_result_t __attribute__ ((noinline)) power_gate_external(const powe } else if (sel_state == kOff_e) { - #ifdef TARGET_PYNQ_Z2 - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].wait_ack_switch), 0x0); - #else - mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].wait_ack_switch), 0x1); - #endif + mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].wait_ack_switch), 0x1); for (int i=0; iiso_on; i++) asm volatile ("nop\n;"); mmio_region_write32(power_manager->base_addr, (ptrdiff_t)(power_manager_external_map[sel_external].iso), 0x1); for (int i=0; iswitch_off; i++) asm volatile ("nop\n;"); diff --git a/sw/device/lib/drivers/power_manager/power_manager_cpu_restore.S b/sw/device/lib/drivers/power_manager/power_manager_cpu_restore.S new file mode 100644 index 000000000..00c9f9f3c --- /dev/null +++ b/sw/device/lib/drivers/power_manager/power_manager_cpu_restore.S @@ -0,0 +1,88 @@ +// Copyright 2024 EPFL +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +#include "core_v_mini_mcu.h" +#include "power_manager_regs.h" // Generated. + +# power_manager_cpu.S +# This function re-stores the CPU context when back from (deep) sleep +.global power_manager_cpu_restore +.type power_manager_cpu_restore, @function + +#define POWER_MANAGER_START_ADDRESS_20bit (POWER_MANAGER_START_ADDRESS >> 12) + + +power_manager_cpu_restore: + + //using lui to load the upper 20 bits of the address instead of la as I want to be sure no other registers are used + lui t0, POWER_MANAGER_START_ADDRESS_20bit + sw x0, POWER_MANAGER_POWER_GATE_CORE_REG_OFFSET(t0) + + // write WAKEUP_STATE[0] = 0 + sw x0, POWER_MANAGER_WAKEUP_STATE_REG_OFFSET(t0) + + // write RESTORE_ADDRESS[31:0] = 0 + sw x0, POWER_MANAGER_RESTORE_ADDRESS_REG_OFFSET(t0) + + // restore gp as it is gonna be used to calculate the address of __power_manager_start + lw gp, POWER_MANAGER_GLOBAL_POINTER_REG_OFFSET(t0) + + // write CORE_REG_Xn[31:0] = Xn + la t0, __power_manager_start + // restore context, this part could be optimized + + //one of the following load is gonna overwrite t0, but t0 was already stored before to the right value + lw x1, 0(t0) + lw x2, 4(t0) + lw x3, 8(t0) + lw x4, 12(t0) + lw x5, 16(t0) + lw x6, 20(t0) + lw x7, 24(t0) + lw x8, 28(t0) + lw x9, 32(t0) + lw x10, 36(t0) + lw x11, 40(t0) + lw x12, 44(t0) + lw x13, 48(t0) + lw x14, 52(t0) + lw x15, 56(t0) + lw x16, 60(t0) + lw x17, 64(t0) + lw x18, 68(t0) + lw x19, 72(t0) + lw x20, 76(t0) + lw x21, 80(t0) + lw x22, 88(t0) + lw x23, 92(t0) + lw x24, 96(t0) + lw x25, 100(t0) + lw x26, 104(t0) + lw x27, 108(t0) + lw x28, 112(t0) + lw x29, 116(t0) + lw x30, 120(t0) + lw x31, 124(t0) + //csr + lw t1, 128(t0) + csrw mstatus, t1 + lw t1, 132(t0) + csrw mie, t1 + lw t1, 136(t0) + csrw mtvec, t1 + lw t1, 140(t0) + csrw mscratch, t1 + lw t1, 144(t0) + csrw mepc, t1 + lw t1, 148(t0) + csrw mcause, t1 + lw t1, 152(t0) + csrw mtval, t1 + lw t1, 156(t0) + csrw mcycle, t1 + lw t1, 160(t0) + csrw minstret, t1 + + + ret \ No newline at end of file diff --git a/sw/device/lib/drivers/power_manager/power_manager_cpu_store.S b/sw/device/lib/drivers/power_manager/power_manager_cpu_store.S new file mode 100644 index 000000000..92e1ca8e0 --- /dev/null +++ b/sw/device/lib/drivers/power_manager/power_manager_cpu_store.S @@ -0,0 +1,88 @@ +// Copyright 2024 EPFL +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +#include "core_v_mini_mcu.h" +#include "power_manager_regs.h" // Generated. + +# power_manager_cpu.S +# This function stores the CPU context, goes to (deep) sleep with WFI +.global power_manager_cpu_store # make the function visible to the linker +.type power_manager_cpu_store, @function + +power_manager_cpu_store: + + // write POWER_GATE_CORE[0] = 1 + la t0, POWER_MANAGER_START_ADDRESS + li t1, 1 + sw t1, POWER_MANAGER_POWER_GATE_CORE_REG_OFFSET(t0) + + // write WAKEUP_STATE[0] = 1, this is check in the bootrom at reset time when waking up + sw t1, POWER_MANAGER_WAKEUP_STATE_REG_OFFSET(t0) + + // save the global pointer, _power_manager_start is the start of the power_manager section and saved in the gp reg + // when returning from deep sleep, the gp register is used to restore the context, thus saving it in an always on register + sw gp, POWER_MANAGER_GLOBAL_POINTER_REG_OFFSET(t0) + + // save context, this part could be optimized + la t0, __power_manager_start + sw x1, 0(t0) + sw x2, 4(t0) + sw x3, 8(t0) + sw x4, 12(t0) + sw x5, 16(t0) + sw x6, 20(t0) + sw x7, 24(t0) + sw x8, 28(t0) + sw x9, 32(t0) + sw x10, 36(t0) + sw x11, 40(t0) + sw x12, 44(t0) + sw x13, 48(t0) + sw x14, 52(t0) + sw x15, 56(t0) + sw x16, 60(t0) + sw x17, 64(t0) + sw x18, 68(t0) + sw x19, 72(t0) + sw x20, 76(t0) + sw x21, 80(t0) + sw x22, 88(t0) + sw x23, 92(t0) + sw x24, 96(t0) + sw x25, 100(t0) + sw x26, 104(t0) + sw x27, 108(t0) + sw x28, 112(t0) + sw x29, 116(t0) + sw x30, 120(t0) + sw x31, 124(t0) + //csr + csrr t1, mstatus + sw t1, 128(t0) + csrr t1, mie + sw t1, 132(t0) + csrr t1, mtvec + sw t1, 136(t0) + csrr t1, mscratch + sw t1, 140(t0) + csrr t1, mepc + sw t1, 144(t0) + csrr t1, mcause + sw t1, 148(t0) + csrr t1, mtval + sw t1, 152(t0) + csrr t1, mcycle + sw t1, 156(t0) + csrr t1, minstret + sw t1, 160(t0) + + la t0, POWER_MANAGER_START_ADDRESS + //save return address to restore + la t1, power_manager_cpu_restore + sw t1, POWER_MANAGER_RESTORE_ADDRESS_REG_OFFSET(t0) + + // wait for interrupt + wfi + + ret \ No newline at end of file diff --git a/sw/linker/link_flash_load.ld.tpl b/sw/linker/link_flash_load.ld.tpl index cb801f864..9e7b637f7 100644 --- a/sw/linker/link_flash_load.ld.tpl +++ b/sw/linker/link_flash_load.ld.tpl @@ -60,7 +60,8 @@ SECTIONS { KEEP (*(.text.spi_wait_for_rx_watermark*)) KEEP (*(.text.spi_read_word*)) KEEP (*(.text.memcpy)) - KEEP (*(.text.w25q128jw_read_standard*)) /* as this function is used in the crt0, link it in the top, should be before 1024 Bytes loaded by the bootrom */ + KEEP (*(.text.w25q128jw_read_standard)) /* as this function is used in the crt0, link it in the top, should be before 1024 Bytes loaded by the bootrom */ + KEEP (*(.text.w25q128jw_read_standard.*)) /* sometimes the function is renamed as w25q128jw_read_standard.part */ *(.xheep_init_data_crt0) /* this global variables are used in the crt0 */ KEEP (*_bswapsi2*(.text)) /* this function is used in the w25q128jw_read_standard */ } >ram0 AT >FLASH0 diff --git a/tb/testharness.sv b/tb/testharness.sv index 4631f35c6..9008beb4d 100644 --- a/tb/testharness.sv +++ b/tb/testharness.sv @@ -327,7 +327,7 @@ module testharness #( always_ff @(negedge clk_i) begin tb_cpu_subsystem_powergate_switch_ack_n[0] <= x_heep_system_i.cpu_subsystem_powergate_switch_n; tb_peripheral_subsystem_powergate_switch_ack_n[0] <= x_heep_system_i.peripheral_subsystem_powergate_switch_n; - tb_memory_subsystem_banks_powergate_switch_ack_n[0] <= x_heep_system_i.memory_subsystem_banks_powergate_switch_n; + tb_memory_subsystem_banks_powergate_switch_ack_n[0] <= x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_banks_powergate_switch_n; tb_external_subsystem_powergate_switch_ack_n[0] <= external_subsystem_powergate_switch_n; for (int i = 0; i < SWITCH_ACK_LATENCY; i++) begin tb_memory_subsystem_banks_powergate_switch_ack_n[i+1] <= tb_memory_subsystem_banks_powergate_switch_ack_n[i]; @@ -346,12 +346,12 @@ module testharness #( `ifndef VERILATOR force x_heep_system_i.core_v_mini_mcu_i.cpu_subsystem_powergate_switch_ack_ni = delayed_tb_cpu_subsystem_powergate_switch_ack_n; force x_heep_system_i.core_v_mini_mcu_i.peripheral_subsystem_powergate_switch_ack_ni = delayed_tb_peripheral_subsystem_powergate_switch_ack_n; - force x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_banks_powergate_switch_ack_ni = delayed_tb_memory_subsystem_banks_powergate_switch_ack_n; + force x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_banks_powergate_switch_ack_n = delayed_tb_memory_subsystem_banks_powergate_switch_ack_n; force external_subsystem_powergate_switch_ack_n = delayed_tb_external_subsystem_powergate_switch_ack_n; `else x_heep_system_i.cpu_subsystem_powergate_switch_ack_n = delayed_tb_cpu_subsystem_powergate_switch_ack_n; x_heep_system_i.peripheral_subsystem_powergate_switch_ack_n = delayed_tb_peripheral_subsystem_powergate_switch_ack_n; - x_heep_system_i.memory_subsystem_banks_powergate_switch_ack_n = delayed_tb_memory_subsystem_banks_powergate_switch_ack_n; + x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_banks_powergate_switch_ack_n = delayed_tb_memory_subsystem_banks_powergate_switch_ack_n; external_subsystem_powergate_switch_ack_n = delayed_tb_external_subsystem_powergate_switch_ack_n; `endif end