Skip to content

Commit

Permalink
Merge pull request #10 from dinosaure/better-boot-S
Browse files Browse the repository at this point in the history
Better boot.S and link script
  • Loading branch information
dinosaure committed Nov 21, 2021
2 parents 6329dbe + 9a02679 commit b953611
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 222 deletions.
122 changes: 5 additions & 117 deletions caml/nolibc/memcpy.c
Expand Up @@ -2,123 +2,11 @@
#include <stdint.h>
#include <endian.h>

void *memcpy(void *restrict dest, const void *restrict src, size_t n)
void *memcpy(void *v_dst, const void *v_src, size_t len)
{
unsigned char *d = dest;
const unsigned char *s = src;
char *dst = v_dst;
const char *src = v_src;

#ifdef __GNUC__

#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LS >>
#define RS <<
#else
#define LS <<
#define RS >>
#endif

typedef uint32_t __attribute__((__may_alias__)) u32;
uint32_t w, x;

for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;

if ((uintptr_t)d % 4 == 0) {
for (; n>=16; s+=16, d+=16, n-=16) {
*(u32 *)(d+0) = *(u32 *)(s+0);
*(u32 *)(d+4) = *(u32 *)(s+4);
*(u32 *)(d+8) = *(u32 *)(s+8);
*(u32 *)(d+12) = *(u32 *)(s+12);
}
if (n&8) {
*(u32 *)(d+0) = *(u32 *)(s+0);
*(u32 *)(d+4) = *(u32 *)(s+4);
d += 8; s += 8;
}
if (n&4) {
*(u32 *)(d+0) = *(u32 *)(s+0);
d += 4; s += 4;
}
if (n&2) {
*d++ = *s++; *d++ = *s++;
}
if (n&1) {
*d = *s;
}
return dest;
}

if (n >= 32) switch ((uintptr_t)d % 4) {
case 1:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
n -= 3;
for (; n>=17; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+1);
*(u32 *)(d+0) = (w LS 24) | (x RS 8);
w = *(u32 *)(s+5);
*(u32 *)(d+4) = (x LS 24) | (w RS 8);
x = *(u32 *)(s+9);
*(u32 *)(d+8) = (w LS 24) | (x RS 8);
w = *(u32 *)(s+13);
*(u32 *)(d+12) = (x LS 24) | (w RS 8);
}
break;
case 2:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
n -= 2;
for (; n>=18; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+2);
*(u32 *)(d+0) = (w LS 16) | (x RS 16);
w = *(u32 *)(s+6);
*(u32 *)(d+4) = (x LS 16) | (w RS 16);
x = *(u32 *)(s+10);
*(u32 *)(d+8) = (w LS 16) | (x RS 16);
w = *(u32 *)(s+14);
*(u32 *)(d+12) = (x LS 16) | (w RS 16);
}
break;
case 3:
w = *(u32 *)s;
*d++ = *s++;
n -= 1;
for (; n>=19; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+3);
*(u32 *)(d+0) = (w LS 8) | (x RS 24);
w = *(u32 *)(s+7);
*(u32 *)(d+4) = (x LS 8) | (w RS 24);
x = *(u32 *)(s+11);
*(u32 *)(d+8) = (w LS 8) | (x RS 24);
w = *(u32 *)(s+15);
*(u32 *)(d+12) = (x LS 8) | (w RS 24);
}
break;
}
if (n&16) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&8) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&4) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&2) {
*d++ = *s++; *d++ = *s++;
}
if (n&1) {
*d = *s;
}
return dest;
#endif

for (; n; n--) *d++ = *s++;
return dest;
for (; len; len--) *dst++ = *src++;
return dst;
}
39 changes: 11 additions & 28 deletions caml/nolibc/memmove.c
@@ -1,36 +1,19 @@
#include <string.h>
#include <stdint.h>
#include <stdio.h>

#define WT size_t
#define WS (sizeof(WT))

void *memmove(void *dest, const void *src, size_t n)
void *memmove(void *v_dst, const void *v_src, size_t len)
{
char *d = dest;
const char *s = src;
char *dst = v_dst;
const char *src = v_src;

if (d==s) return d;
if (s+n <= d || d+n <= s) return memcpy(d, s, n);
if (dst == src) return dst;
if (src + len <= dst || dst + len <= src) return memcpy(dst, src, len) ;

if (d<s) {
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
while ((uintptr_t)d % WS) {
if (!n--) return dest;
*d++ = *s++;
}
for (; n>=WS; n-=WS, d+=WS, s+=WS) *(WT *)d = *(WT *)s;
}
for (; n; n--) *d++ = *s++;
} else {
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
while ((uintptr_t)(d+n) % WS) {
if (!n--) return dest;
d[n] = s[n];
}
while (n>=WS) n-=WS, *(WT *)(d+n) = *(WT *)(s+n);
}
while (n) n--, d[n] = s[n];
}
if (dst < src)
for (; len; len--) *dst++ = *src++;
else
while (len) len--, dst[len] = src[len];

return dest;
return dst;
}
4 changes: 4 additions & 0 deletions caml/nolibc/stubs.c
Expand Up @@ -8,11 +8,14 @@
#include <sys/stat.h>
#include <unistd.h>

extern void uart_drain_output_queue(void);

#define STUB_ABORT(function) \
int __unsup_##function(void) __asm__(#function) __attribute__((noreturn)); \
int __unsup_##function(void) \
{ \
printf("STUB: abort: %s() called\n", #function); \
uart_drain_output_queue(); \
abort(); \
}

Expand All @@ -28,6 +31,7 @@
static int called = 1; \
if (!called) {\
printf("STUB: %s() called\n", #function); \
uart_drain_output_queue(); \
called = 1; \
} \
errno = ENOSYS; \
Expand Down
55 changes: 32 additions & 23 deletions kernel/lib/boot.S
@@ -1,27 +1,36 @@
// AArch64 mode
.macro ADR_REL register, symbol
adrp \register, \symbol
add \register, \register, #:lo12:\symbol
.endm

// To keep this in the first portion of the binary.
.section ".text.boot"
.globl _start
.equ _core_id_mask, 0b11

.section .text._start

// Entry point for the kernel. Registers:
// x0 -> 32 bit pointer to DTB in memory (primary core only) / 0 (secondary cores)
// x1 -> 0
// x2 -> 0
// x3 -> 0
// x4 -> 32 bit kernel entry point, _start location
_start:
mov sp, #0x30000000
mrs x1, MPIDR_EL1
and x1, x1, _core_id_mask
mov x2, #0
cmp x1, x2
b.ne .L_parking_loop
.L_bss_init_loop:
ADR_REL x0, __bss_start
ADR_REL x1, __bss_end_exclusive
.L_bss_loop:
cmp x0, x1
b.eq .L_prepare_c
stp xzr, xzr, [x0], #16
b .L_bss_loop
.L_prepare_c:
ADR_REL x0, __boot_core_stack_end_exclusive
mov sp, x0
.L_jump:
bl _start_c
b .L_parking_loop
.L_parking_loop:
wfe
b .L_parking_loop

// clear bss
ldr x5, =__bss_start
ldr w6, =__bss_size
3: cbz w6, 4f
str xzr, [x5], #8
sub w6, w6, #1
cbnz w6, 3b

// jump to C code, should not return
4: bl kernel_main
1: wfe
b 1b
.size _start, . - _start
.type _start, function
.global _start
10 changes: 4 additions & 6 deletions kernel/lib/kernel.c
@@ -1,17 +1,15 @@
#include "io.h"
#include "lib.h"
#include "log.h"
#include "mem.h"
#include "mclock.h"
#include "crt_init.h"

static char *unused_args[] = { "mirage", NULL };
static uintptr_t sp_at_start;

extern void _nolibc_init(uintptr_t heap_start, size_t heap_size);
extern void caml_startup(char **);
extern int errno;
static char* args[] = { "gi(l)braltar", NULL };

void kernel_main(uint64_t dtb_ptr32, uint64_t x1, uint64_t x2, uint64_t x3) {
void _start_c() {
uintptr_t heap_start;
size_t heap_size;

Expand All @@ -31,7 +29,7 @@ void kernel_main(uint64_t dtb_ptr32, uint64_t x1, uint64_t x2, uint64_t x3) {
_nolibc_init(heap_start, heap_size);
uart_drain_output_queue();

caml_startup(unused_args);
caml_startup(args);

for(;;);
}
8 changes: 0 additions & 8 deletions kernel/lib/lib.h
@@ -1,11 +1,3 @@
#include <stddef.h>

void *memset(void *dest, int c, size_t n);
void *memcpy(void *restrict dest, const void *restrict src, size_t n);
void *memmove(void *dest, const void *src, size_t n);
int memcmp(const void *vl, const void *vr, size_t n);
int strcmp(const char *l, const char *r);
int strncmp(const char *l, const char *r, size_t n);
char *strcpy(char *restrict dest, const char *restrict src);
size_t strlen(const char *s);
int isspace(int c);
8 changes: 5 additions & 3 deletions kernel/lib/mem.c
Expand Up @@ -26,7 +26,9 @@
#define PAGE_SHIFT 12
#define PAGE_MASK ~(0xfff)

#define MEMORY_SIZE 0x30000000 /* 768Mb */
extern char __boot_core_stack_end_exclusive[];

#define MEMORY_SIZE __boot_core_stack_end_exclusive

static uint64_t heap_start;

Expand All @@ -42,15 +44,15 @@ void mem_lock_heap(uintptr_t *start, size_t *size)
{
mem_locked = 1;
*start = heap_start;
*size = MEMORY_SIZE - heap_start;
*size = ((unsigned long long) MEMORY_SIZE) - heap_start;
}

void mem_init(void)
{
extern char __text_start[], __text_end[], __rodata_end[], __end[];
uint64_t mem_size;

mem_size = MEMORY_SIZE;
mem_size = (unsigned long long) MEMORY_SIZE;
heap_start = ((uint64_t)&__end + PAGE_SIZE - 1) & PAGE_MASK;

log(INFO, "RPi4: Memory map: %llu MB addressable:\n",
Expand Down

0 comments on commit b953611

Please sign in to comment.