Skip to content

Commit

Permalink
First part of universal binary (Pi0,1,2,3) conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
IanSB committed Nov 21, 2021
1 parent 05b649f commit 60453dc
Show file tree
Hide file tree
Showing 9 changed files with 156 additions and 135 deletions.
17 changes: 9 additions & 8 deletions src/arm-exception.c
Expand Up @@ -4,14 +4,15 @@
#include "rpi-interrupts.h"
//#include "tube-defs.h"
#include "startup.h"
#include "defs.h"

// From here: https://www.raspberrypi.org/forums/viewtopic.php?f=72&t=53862
void reboot_now(void)
{
const int PM_PASSWORD = 0x5a000000;
const int PM_RSTC_WRCFG_FULL_RESET = 0x00000020;
unsigned int *PM_WDOG = (unsigned int *) (_get_peripheral_base() + 0x00100024);
unsigned int *PM_RSTC = (unsigned int *) (_get_peripheral_base() + 0x0010001c);
//const int PM_PASSWORD = 0x5a000000;
//const int PM_RSTC_WRCFG_FULL_RESET = 0x00000020;
//unsigned int *PM_WDOG = (unsigned int *) (_get_peripheral_base() + 0x00100024);
//unsigned int *PM_RSTC = (unsigned int *) (_get_peripheral_base() + 0x0010001c);

// timeout = 1/16th of a second? (whatever)
*PM_WDOG = PM_PASSWORD | 1;
Expand Down Expand Up @@ -75,10 +76,10 @@ void dump_info(unsigned int *context, int offset, char *type) {
// The stacked LR points one or two words afer the exception address
addr = (unsigned int *)((reg[13] & ~3) - offset);
dump_hex((unsigned int)addr);
#ifdef HAS_MULTICORE
dump_string(" on core ");
dump_digit(_get_core());
#endif
if (_get_hardware_id() >= _RPI2) {
dump_string(" on core ");
dump_digit(_get_core());
}
dump_string("\r\n");
dump_string("Registers:\r\n");
for (i = 0; i <= 13; i++) {
Expand Down
81 changes: 47 additions & 34 deletions src/armc-start.S
Expand Up @@ -28,6 +28,7 @@
// Relocate to just below 32MB

#include "defs.h"
#include "rpi-base.h"

.equ STACK_SIZE, 0x00100000

Expand All @@ -38,14 +39,12 @@
.equ C0_ABORT_STACK, STACK_SIZE*5
.equ C0_UNDEFINED_STACK, STACK_SIZE*6

#if defined(RPI2) || defined(RPI3) || defined(RPI4)
.equ C1_SVR_STACK, STACK_SIZE*7
.equ C1_IRQ_STACK, STACK_SIZE*8
.equ C1_FIQ_STACK, STACK_SIZE*9
.equ C1_USER_STACK, STACK_SIZE*10
.equ C1_ABORT_STACK, STACK_SIZE*11
.equ C1_UNDEFINED_STACK, STACK_SIZE*12
#endif

.equ SCTLR_ENABLE_DATA_CACHE, 0x4
.equ SCTLR_ENABLE_BRANCH_PREDICTION, 0x800
Expand Down Expand Up @@ -79,11 +78,9 @@
.global _get_gpu_data_base_r4
.global _get_gpu_command_base_r10

#ifdef HAS_MULTICORE
.global _get_core
.global _init_core
.global _spin_core
#endif

// From the ARM ARM (Architecture Reference Manual). Make sure you get the
// ARMv5 documentation which includes the ARMv6 documentation which is the
Expand Down Expand Up @@ -141,8 +138,9 @@ _reset_:
#endif
// BL _enable_l1_cache

#ifdef HAS_MULTICORE

bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_d
#ifdef KERNEL_OLD

// if kernel_old=1 all cores are running and we need to sleep 1-3
Expand Down Expand Up @@ -192,8 +190,7 @@ _not_in_hyp_mode:
_reset_continue:
#endif

#endif

rpi0_1_d:
// We enter execution in supervisor mode. For more information on
// processor modes see ARM Section A2.2 (Processor Modes)

Expand Down Expand Up @@ -238,10 +235,10 @@ _reset_continue:

// Enable VFP ------------------------------------------------------------

#ifdef HAS_MULTICORE
bl _get_hardware_id
cmp r0, #_RPI2
bge rpi2_4_a


#else
// r1 = Access Control Register
MRC p15, #0, r1, c1, c0, #2
// enable full access for p10,11
Expand All @@ -256,7 +253,8 @@ _reset_continue:
MOV r0,#0x40000000
// FPEXC = r0
FMXR FPEXC, r0
#endif

rpi2_4_a:

// The c-startup function which we never return from. This function will
// initialise the ro data section (most things that have the const
Expand Down Expand Up @@ -338,26 +336,34 @@ _get_cpsr:

.section ".text._init_cycle_counter"
_init_cycle_counter:
// Enable the cycle counter, and run at the ARM clock rate
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
mov r0, #7
mcr p15, 0, r0, c9, c12, 0
mov r0, #(1 << 31)
mcr p15, 0, r0, c9, c12, 1
#else
mov r0, #7
mcr p15, 0, r0, c15, c12, 0
#endif
mov pc, lr
// Enable the cycle counter, and run at the ARM clock rate
push {r0, lr}
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_a
mov r0, #7
mcr p15, 0, r0, c9, c12, 0
mov r0, #(1 << 31)
mcr p15, 0, r0, c9, c12, 1
b donerpi0_1_a
rpi0_1_a:
mov r0, #7
mcr p15, 0, r0, c15, c12, 0
donerpi0_1_a:
pop {r0, pc}

.section ".text._get_cycle_counter"
_get_cycle_counter:
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
push {r0, lr}
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_b
mrc p15, 0, r0, c9, c13, 0
#else
b donerpi0_1_b
rpi0_1_b:
mrc p15, 0, r0, c15, c12, 1
#endif
mov pc, lr
donerpi0_1_b:
pop {r0, pc}

.section ".text._set_interrupts"
_set_interrupts:
Expand Down Expand Up @@ -480,13 +486,17 @@ _invalidate_dtlb_mva:

.section ".text._data_memory_barrier"
_data_memory_barrier:
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
push {r0, lr}
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_c
dmb
#else
b donerpi0_1_c
rpi0_1_c:
mov r0, #0
mcr p15, 0, r0, c7, c10, 5
#endif
mov pc, lr
donerpi0_1_c:
pop {r0, pc}

#ifdef USE_MULTICORE
.section ".text._init_core"
Expand Down Expand Up @@ -551,7 +561,10 @@ _init_continue:
bl run_core
#endif

#ifdef HAS_MULTICORE
bl _get_hardware_id
cmp r0, #_RPI2
blt rpi0_1_e

.section ".text._spin_core"
// If main does return for some reason, just catch it and stay here.
_spin_core:
Expand Down Expand Up @@ -581,8 +594,8 @@ _get_core:
mrc p15, 0, r0, c0, c0, 5
and r0, #3
mov pc, lr

#endif
rpi0_1_e:

// Default handlers for FIQ/IRQ do nothing

Expand Down
56 changes: 26 additions & 30 deletions src/cache.c
Expand Up @@ -22,8 +22,6 @@ const static int aa = 1;
const static int bb = 1;
const static int shareable = 1;

#if defined(RPI2) || defined (RPI3) || defined(RPI4)

#define SETWAY_LEVEL_SHIFT 1

// 4 ways x 128 sets x 64 bytes per line 32KB
Expand Down Expand Up @@ -134,7 +132,6 @@ void CleanDataCache (void)
}
}

#endif

// TLB 4KB Section Descriptor format
// 31..12 Section Base Address
Expand All @@ -157,11 +154,11 @@ void map_4k_page(int logical, int physical) {
// XP (bit 23) in SCTRL no longer exists, and we see to be using ARMv6 table formats
// this means bit 0 of the page table is actually XN and must be clear to allow native ARM code to execute
// (this was the cause of issue #27)
#if defined(RPI2) || defined (RPI3) || defined(RPI4)
PageTable2[logical] = (physical<<12) | 0x132 | (bb << 6) | (aa << 2);
#else
PageTable2[logical] = (physical<<12) | 0x133 | (bb << 6) | (aa << 2);
#endif
if (_get_hardware_id() >= _RPI2) {
PageTable2[logical] = (physical<<12) | 0x132 | (bb << 6) | (aa << 2);
} else {
PageTable2[logical] = (physical<<12) | 0x133 | (bb << 6) | (aa << 2);
}
}

void enable_MMU_and_IDCaches(int cached_screen_area, int cached_screen_size)
Expand Down Expand Up @@ -296,34 +293,33 @@ void enable_MMU_and_IDCaches(int cached_screen_area, int cached_screen_size)
asm volatile ("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr));
//log_debug("ttbcr = %08x", ttbcr);

#if defined(RPI2) || defined(RPI3) || defined(RPI4)
// set TTBR0 - page table walk memory cacheability/shareable
// [Bit 0, Bit 6] indicates inner cachability: 01 = normal memory, inner write-back write-allocate cacheable
// [Bit 4, Bit 3] indicates outer cachability: 01 = normal memory, outer write-back write-allocate cacheable
// Bit 1 indicates sharable
// 4A = 0100 1010
int attr = ((aa & 1) << 6) | (bb << 3) | (shareable << 1) | ((aa & 2) >> 1);
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (attr | (unsigned) &PageTable));
#else
// set TTBR0 (page table walk inner cacheable, outer non-cacheable, shareable memory)
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (0x03 | (unsigned) &PageTable));
#endif
if (_get_hardware_id() >= _RPI2) {
// set TTBR0 - page table walk memory cacheability/shareable
// [Bit 0, Bit 6] indicates inner cachability: 01 = normal memory, inner write-back write-allocate cacheable
// [Bit 4, Bit 3] indicates outer cachability: 01 = normal memory, outer write-back write-allocate cacheable
// Bit 1 indicates sharable
// 4A = 0100 1010
int attr = ((aa & 1) << 6) | (bb << 3) | (shareable << 1) | ((aa & 2) >> 1);
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (attr | (unsigned) &PageTable));
} else {
// set TTBR0 (page table walk inner cacheable, outer non-cacheable, shareable memory)
asm volatile ("mcr p15, 0, %0, c2, c0, 0" :: "r" (0x03 | (unsigned) &PageTable));
}
unsigned ttbr0;
asm volatile ("mrc p15, 0, %0, c2, c0, 0" : "=r" (ttbr0));
//log_debug("ttbr0 = %08x", ttbr0);


// Invalidate entire data cache
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
// asm volatile ("isb" ::: "memory");
asm volatile (".word 0xf57ff06f" ::: "memory");
InvalidateDataCache();
#else
// invalidate data cache and flush prefetch buffer
// NOTE: The below code seems to cause a Pi 2 to crash
asm volatile ("mcr p15, 0, %0, c7, c5, 4" :: "r" (0) : "memory");
asm volatile ("mcr p15, 0, %0, c7, c6, 0" :: "r" (0) : "memory");
#endif
if (_get_hardware_id() >= _RPI2) {
asm volatile (".word 0xf57ff06f" ::: "memory"); // asm volatile ("isb" ::: "memory"); (won't compile on arm v6)
InvalidateDataCache();
} else {
// invalidate data cache and flush prefetch buffer
// NOTE: The below code seems to cause a Pi 2 to crash
asm volatile ("mcr p15, 0, %0, c7, c5, 4" :: "r" (0) : "memory");
asm volatile ("mcr p15, 0, %0, c7, c6, 0" :: "r" (0) : "memory");
}

// enable MMU, L1 cache and instruction cache, L2 cache, write buffer,
// branch prediction and extended page table on
Expand Down
20 changes: 4 additions & 16 deletions src/defs.h
Expand Up @@ -11,27 +11,15 @@
//do not leave USE_ARM_CAPTURE uncommented during a release build as all versions will be ARM
//#define USE_ARM_CAPTURE //uncomment to select ARM capture build

#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define CACHED_SCREEN_OFFSET 0x00B00000 // offset to cached screen area
#define CACHED_SCREEN_SIZE 0x00100000 // size of cached screen area
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace

#if defined(RPI2) || defined(RPI3)
#define HAS_MULTICORE // indicates multiple cores are available
#if defined(USE_ARM_CAPTURE)
#define WARN_12BIT // warn that 9bpp & 12bpp won't work
#define HIDE_12BIT_PROFILES // 12 bit profile won't work on Pi zero2 etc
#define INHIBIT_DOUBLE_HEIGHT // inhibit line doubling as it causes memory stalls
#endif
#endif

#if defined(RPI4)
#define HAS_MULTICORE // indicates multiple cores are available
#define USE_CACHED_SCREEN // caches the upper half of the screen area and uses it for mode7 deinterlace
#define USE_ALT_M7DEINTERLACE_CODE // uses re-ordered code for mode7 deinterlace
#define MODE7_ALWAYS_ARM // always runs mode7 capture code on ARM
#endif

//#define WARN_12BIT // warn that 9bpp & 12bpp won't work
//#define HIDE_12BIT_PROFILES // 12 bit profile won't work on Pi zero2 etc
//#define INHIBIT_DOUBLE_HEIGHT // inhibit line doubling as it causes memory stalls
//#define USE_MULTICORE //can be used to add code in an extra core


Expand Down
8 changes: 2 additions & 6 deletions src/osd.c
Expand Up @@ -428,15 +428,11 @@ static param_t features[] = {
{ F_RETURN, "Return Position", "return", 0, 1, 1 },
{ F_DEBUG, "Debug", "debug", 0, 1, 1 },
{ F_DIRECTION, "Button Reverse", "button_reverse", 0, 1, 1 },
#if defined(RPI2) || defined(RPI3) || defined(RPI4)
{ F_OCLOCK_CPU, "Overclock CPU", "overclock_cpu", 0, 100, 1 },
{ F_OCLOCK_CORE, "Overclock Core", "overclock_core", 0, 125, 1 },
{ F_OCLOCK_SDRAM, "Overclock SDRAM", "overclock_sdram", 0, 175, 1 },
#else

{ F_OCLOCK_CPU, "Overclock CPU", "overclock_cpu", 0, 75, 1 },
{ F_OCLOCK_CORE, "Overclock Core", "overclock_core", 0, 175, 1 },
{ F_OCLOCK_SDRAM, "Overclock SDRAM", "overclock_sdram", 0, 175, 1 },
#endif

{ F_RSTATUS, "Powerup Message", "powerup_message", 0, 1, 1 },
{ F_FRONTEND, "Interface", "interface", 0, NUM_FRONTENDS - 1, 1 },
{ -1, NULL, NULL, 0, 0, 0 }
Expand Down

0 comments on commit 60453dc

Please sign in to comment.