-
Notifications
You must be signed in to change notification settings - Fork 1.1k
/
boot.S
447 lines (385 loc) · 16.5 KB
/
boot.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
/******************************************************************************
* Copyright (c) 2010 - 2022 Xilinx, Inc. All rights reserved.
* Copyright (c) 2022 - 2023 Advanced Micro Devices, Inc. All Rights Reserved.
* SPDX-License-Identifier: MIT
******************************************************************************/
/*****************************************************************************/
/**
* @file boot.S
*
* @addtogroup a9_boot_code Cortex A9 Processor Boot Code
* @{
* <h2> boot.S </h2>
* The boot code performs minimum configuration which is required for an
* application to run starting from processor reset state of the processor. Below is a
* sequence illustrating what all configuration is performed before control
* reaches to main function.
*
* 1. Program vector table base for exception handling
* 2. Invalidate instruction cache, data cache and TLBs
* 3. Program stack pointer for various modes (IRQ, FIQ, supervisor, undefine,
* abort, system)
* 4. Configure MMU with short descriptor translation table format and program
* base address of translation table
* 5. Enable data cache, instruction cache and MMU
* 6. Enable Floating point unit
* 7. Transfer control to _start which clears BSS sections, initializes
* global timer and runs global constructor before jumping to main
* application
*
* <pre>
* MODIFICATION HISTORY:
*
* Ver Who Date Changes
* ----- ------- -------- ---------------------------------------------------
* 1.00a ecm/sdm 10/20/09 Initial version
* 3.06a sgd 05/15/12 Updated L2CC Auxiliary and Tag RAM Latency control
* register settings.
* 3.06a asa 06/17/12 Modified the TTBR settings and L2 Cache auxiliary
* register settings.
* 3.07a asa 07/16/12 Modified the L2 Cache controller settings to improve
* performance. Changed the property of the ".boot"
* section.
* 3.07a sgd 08/21/12 Modified the L2 Cache controller and cp15 Aux Control
* Register settings
* 3.09a sgd 02/06/13 Updated SLCR l2c Ram Control register to a
* value of 0x00020202. Fix for CR 697094 (SI#687034).
* 3.10a srt 04/18/13 Implemented ARM Erratas. Please refer to file
* 'xil_errata.h' for errata description
* 4.2 pkp 06/19/14 Enabled asynchronous abort exception
* 5.0 pkp 16/15/14 Modified initialization code to enable scu after
* MMU is enabled
* 5.1 pkp 05/13/15 Changed the initialization order so to first invalidate
* caches and TLB, enable MMU and caches, then enable SMP
* bit in ACTLR. L2Cache invalidation and enabling of L2Cache
* is done later.
* 5.4 asa 12/6/15 Added code to initialize SPSR for all relevant modes.
* 6.0 mus 08/04/16 Added code to detect zynq-7000 base silicon configuration and
* attempt to enable dual core behavior on single cpu zynq-7000s
* devices is prevented from corrupting system behavior.
* 6.0 mus 08/24/16 Check CPU core before putting cpu1 to reset for single core
* zynq-7000s devices
* 7.6 mus 09/02/21 SCU invalidation should be done only from primary CPU, so
* skipping it when USE_AMP is set to 1. It fixes CR#1109723
* 7.7 asa 01/06/22 Removed Cortex-A9 errata handling for errata
* 742230 and 743622. These do not apply to
* Cortex-A9 revision r3p0 being used in Zynq
* platforms.
* 9.0 mus 07/27/23 Removed dependency on XPAR_CPU_ID, updated logic to use
* CPU affinity register to read CPU ID.
*
* </pre>
*
* @note
*
* None.
*
******************************************************************************/
#include "bspconfig.h"
#ifndef SDT
#include "xparameters.h"
#endif
#include "xil_errata.h"
.globl MMUTable
.global _prestart
.global _boot
.global __stack
.global __irq_stack
.global __supervisor_stack
.global __abort_stack
.global __fiq_stack
.global __undef_stack
.global _vector_table
.set PSS_L2CC_BASE_ADDR, 0xF8F02000
.set PSS_SLCR_BASE_ADDR, 0xF8000000
.set RESERVED, 0x0fffff00
.set TblBase , MMUTable
.set LRemap, 0xFE00000F /* set the base address of the peripheral block as not shared */
.set L2CCWay, (PSS_L2CC_BASE_ADDR + 0x077C) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_INVLD_WAY_OFFSET)*/
.set L2CCSync, (PSS_L2CC_BASE_ADDR + 0x0730) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_SYNC_OFFSET)*/
.set L2CCCrtl, (PSS_L2CC_BASE_ADDR + 0x0100) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CNTRL_OFFSET)*/
.set L2CCAuxCrtl, (PSS_L2CC_BASE_ADDR + 0x0104) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_AUX_CNTRL_OFFSET)*/
.set L2CCTAGLatReg, (PSS_L2CC_BASE_ADDR + 0x0108) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_TAG_RAM_CNTRL_OFFSET)*/
.set L2CCDataLatReg, (PSS_L2CC_BASE_ADDR + 0x010C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_DATA_RAM_CNTRL_OFFSET)*/
.set L2CCIntClear, (PSS_L2CC_BASE_ADDR + 0x0220) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_IAR_OFFSET)*/
.set L2CCIntRaw, (PSS_L2CC_BASE_ADDR + 0x021C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_ISR_OFFSET)*/
.set SLCRlockReg, (PSS_SLCR_BASE_ADDR + 0x04) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_LOCK_OFFSET)*/
.set SLCRUnlockReg, (PSS_SLCR_BASE_ADDR + 0x08) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_UNLOCK_OFFSET)*/
.set SLCRL2cRamReg, (PSS_SLCR_BASE_ADDR + 0xA1C) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_L2C_RAM_OFFSET)*/
.set SLCRCPURSTReg, (0xF8000000 + 0x244) /*(XPS_SYS_CTRL_BASEADDR + A9_CPU_RST_CTRL_OFFSET)*/
.set EFUSEStaus, (0xF800D000 + 0x10) /*(XPS_EFUSE_BASEADDR + EFUSE_STATUS_OFFSET)*/
/* workaround for simulation not working when L1 D and I caches,MMU and L2 cache enabled - DT568997 */
.if SIM_MODE == 1
.set CRValMmuCac, 0b00000000000000 /* Disable IDC, and MMU */
.else
.set CRValMmuCac, 0b01000000000101 /* Enable IDC, and MMU */
.endif
.set CRValHiVectorAddr, 0b10000000000000 /* Set the Vector address to high, 0xFFFF0000 */
.set L2CCAuxControl, 0x72360000 /* Enable all prefetching, Cache replacement policy, Parity enable,
Event monitor bus enable and Way Size (64 KB) */
.set L2CCControl, 0x01 /* Enable L2CC */
.set L2CCTAGLatency, 0x0111 /* latency for TAG RAM */
.set L2CCDataLatency, 0x0121 /* latency for DATA RAM */
.set SLCRlockKey, 0x767B /* SLCR lock key */
.set SLCRUnlockKey, 0xDF0D /* SLCR unlock key */
.set SLCRL2cRamConfig, 0x00020202 /* SLCR L2C ram configuration */
/* Stack Pointer locations for boot code */
.set Undef_stack, __undef_stack
.set FIQ_stack, __fiq_stack
.set Abort_stack, __abort_stack
.set SPV_stack, __supervisor_stack
.set IRQ_stack, __irq_stack
.set SYS_stack, __stack
.set vector_base, _vector_table
.set FPEXC_EN, 0x40000000 /* FPU enable bit, (1 << 30) */
.section .boot,"ax"
/* this initializes the various processor modes */
_prestart:
_boot:
/* only allow cpu0 through */
mrc p15,0,r1,c0,c0,5
and r1, r1, #0xf
cmp r1, #0
beq CheckEFUSE
b OKToRun
CheckEFUSE:
ldr r0,=EFUSEStaus
ldr r1,[r0] /* Read eFuse setting */
ands r1,r1,#0x80 /* Check whether device is having single core */
beq OKToRun
/* single core device, reset cpu1 */
ldr r0,=SLCRUnlockReg /* Load SLCR base address base + unlock register */
ldr r1,=SLCRUnlockKey /* set unlock key */
str r1, [r0] /* Unlock SLCR */
ldr r0,=SLCRCPURSTReg
ldr r1,[r0] /* Read CPU Software Reset Control register */
orr r1,r1,#0x22
str r1,[r0] /* Reset CPU1 */
ldr r0,=SLCRlockReg /* Load SLCR base address base + lock register */
ldr r1,=SLCRlockKey /* set lock key */
str r1, [r0] /* lock SLCR */
OKToRun:
mrc p15, 0, r0, c0, c0, 0 /* Get the revision */
and r5, r0, #0x00f00000
and r6, r0, #0x0000000f
orr r6, r6, r5, lsr #20-4
/* set VBAR to the _vector_table address in linker script */
ldr r0, =vector_base
mcr p15, 0, r0, c12, c0, 0
/*invalidate scu*/
#if USE_AMP!=1
ldr r7, =0xf8f0000c
ldr r6, =0xffff
str r6, [r7]
#endif
/* Invalidate caches and TLBs */
mov r0,#0 /* r0 = 0 */
mcr p15, 0, r0, c8, c7, 0 /* invalidate TLBs */
mcr p15, 0, r0, c7, c5, 0 /* invalidate icache */
mcr p15, 0, r0, c7, c5, 6 /* Invalidate branch predictor array */
bl invalidate_dcache /* invalidate dcache */
/* Disable MMU, if enabled */
mrc p15, 0, r0, c1, c0, 0 /* read CP15 register 1 */
bic r0, r0, #0x1 /* clear bit 0 */
mcr p15, 0, r0, c1, c0, 0 /* write value back */
#ifdef SHAREABLE_DDR
/* Mark the entire DDR memory as shareable */
ldr r3, =0x3ff /* 1024 entries to cover 1G DDR */
ldr r0, =TblBase /* MMU Table address in memory */
ldr r2, =0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
shareable_loop:
str r2, [r0] /* write the entry to MMU table */
add r0, r0, #0x4 /* next entry in the table */
add r2, r2, #0x100000 /* next section */
subs r3, r3, #1
bge shareable_loop /* loop till 1G is covered */
#endif
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the irq stack pointer */
and r2, r1, r0
orr r2, r2, #0x12 /* IRQ mode */
msr cpsr, r2
ldr r13,=IRQ_stack /* IRQ stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the supervisor stack pointer */
and r2, r1, r0
orr r2, r2, #0x13 /* supervisor mode */
msr cpsr, r2
ldr r13,=SPV_stack /* Supervisor stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the Abort stack pointer */
and r2, r1, r0
orr r2, r2, #0x17 /* Abort mode */
msr cpsr, r2
ldr r13,=Abort_stack /* Abort stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the FIQ stack pointer */
and r2, r1, r0
orr r2, r2, #0x11 /* FIQ mode */
msr cpsr, r2
ldr r13,=FIQ_stack /* FIQ stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the Undefine stack pointer */
and r2, r1, r0
orr r2, r2, #0x1b /* Undefine mode */
msr cpsr, r2
ldr r13,=Undef_stack /* Undefine stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the system stack pointer */
and r2, r1, r0
orr r2, r2, #0x1F /* SYS mode */
msr cpsr, r2
ldr r13,=SYS_stack /* SYS stack pointer */
/*set scu enable bit in scu*/
ldr r7, =0xf8f00000
ldr r0, [r7]
orr r0, r0, #0x1
str r0, [r7]
/* enable MMU and cache */
ldr r0,=TblBase /* Load MMU translation table base */
orr r0, r0, #0x5B /* Outer-cacheable, WB */
mcr 15, 0, r0, c2, c0, 0 /* TTB0 */
mvn r0,#0 /* Load MMU domains -- all ones=manager */
mcr p15,0,r0,c3,c0,0
/* Enable mmu, icahce and dcache */
ldr r0,=CRValMmuCac
mcr p15,0,r0,c1,c0,0 /* Enable cache and MMU */
dsb /* dsb allow the MMU to start up */
isb /* isb flush prefetch buffer */
/* Write to ACTLR */
mrc p15, 0, r0, c1, c0, 1 /* Read ACTLR*/
orr r0, r0, #(0x01 << 6) /* set SMP bit */
orr r0, r0, #(0x01 ) /* Cache/TLB maintenance broadcast */
mcr p15, 0, r0, c1, c0, 1 /* Write ACTLR*/
/* Invalidate L2 Cache and enable L2 Cache*/
/* For AMP, assume running on CPU1. Don't initialize L2 Cache (up to Linux) */
#if USE_AMP!=1
ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
mov r1, #0 /* force the disable bit */
str r1, [r0] /* disable the L2 Caches */
ldr r0,=L2CCAuxCrtl /* Load L2CC base address base + Aux control register */
ldr r1,[r0] /* read the register */
ldr r2,=L2CCAuxControl /* set the default bits */
orr r1,r1,r2
str r1, [r0] /* store the Aux Control Register */
ldr r0,=L2CCTAGLatReg /* Load L2CC base address base + TAG Latency address */
ldr r1,=L2CCTAGLatency /* set the latencies for the TAG*/
str r1, [r0] /* store the TAG Latency register Register */
ldr r0,=L2CCDataLatReg /* Load L2CC base address base + Data Latency address */
ldr r1,=L2CCDataLatency /* set the latencies for the Data*/
str r1, [r0] /* store the Data Latency register Register */
ldr r0,=L2CCWay /* Load L2CC base address base + way register*/
ldr r2, =0xFFFF
str r2, [r0] /* force invalidate */
ldr r0,=L2CCSync /* need to poll 0x730, PSS_L2CC_CACHE_SYNC_OFFSET */
/* Load L2CC base address base + sync register*/
/* poll for completion */
Sync: ldr r1, [r0]
cmp r1, #0
bne Sync
ldr r0,=L2CCIntRaw /* clear pending interrupts */
ldr r1,[r0]
ldr r0,=L2CCIntClear
str r1,[r0]
ldr r0,=SLCRUnlockReg /* Load SLCR base address base + unlock register */
ldr r1,=SLCRUnlockKey /* set unlock key */
str r1, [r0] /* Unlock SLCR */
ldr r0,=SLCRL2cRamReg /* Load SLCR base address base + l2c Ram Control register */
ldr r1,=SLCRL2cRamConfig /* set the configuration value */
str r1, [r0] /* store the L2c Ram Control Register */
ldr r0,=SLCRlockReg /* Load SLCR base address base + lock register */
ldr r1,=SLCRlockKey /* set lock key */
str r1, [r0] /* lock SLCR */
ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
ldr r1,[r0] /* read the register */
mov r2, #L2CCControl /* set the enable bit */
orr r1,r1,r2
str r1, [r0] /* enable the L2 Caches */
#endif
mov r0, r0
mrc p15, 0, r1, c1, c0, 2 /* read cp access control register (CACR) into r1 */
orr r1, r1, #(0xf << 20) /* enable full access for p10 & p11 */
mcr p15, 0, r1, c1, c0, 2 /* write back into CACR */
/* enable vfp */
fmrx r1, FPEXC /* read the exception register */
orr r1,r1, #FPEXC_EN /* set VFP enable bit, leave the others in orig state */
fmxr FPEXC, r1 /* write back the exception register */
mrc p15,0,r0,c1,c0,0 /* flow prediction enable */
orr r0, r0, #(0x01 << 11) /* #0x8000 */
mcr p15,0,r0,c1,c0,0
mrc p15,0,r0,c1,c0,1 /* read Auxiliary Control Register */
orr r0, r0, #(0x1 << 2) /* enable Dside prefetch */
orr r0, r0, #(0x1 << 1) /* enable L2 Prefetch hint */
mcr p15,0,r0,c1,c0,1 /* write Auxiliary Control Register */
mrs r0, cpsr /* get the current PSR */
bic r0, r0, #0x100 /* enable asynchronous abort exception */
msr cpsr_xsf, r0
b _start /* jump to C startup code */
and r0, r0, r0 /* no op */
.Ldone: b .Ldone /* Paranoia: we should never get here */
/*
*************************************************************************
*
* invalidate_dcache - invalidate the entire d-cache by set/way
*
* Note: for Cortex-A9, there is no cp instruction for invalidating
* the whole D-cache. Need to invalidate each line.
*
*************************************************************************
*/
invalidate_dcache:
mrc p15, 1, r0, c0, c0, 1 /* read CLIDR */
ands r3, r0, #0x7000000
mov r3, r3, lsr #23 /* cache level value (naturally aligned) */
beq finished
mov r10, #0 /* start with level 0 */
loop1:
add r2, r10, r10, lsr #1 /* work out 3xcachelevel */
mov r1, r0, lsr r2 /* bottom 3 bits are the Cache type for this level */
and r1, r1, #7 /* get those 3 bits alone */
cmp r1, #2
blt skip /* no cache or only instruction cache at this level */
mcr p15, 2, r10, c0, c0, 0 /* write the Cache Size selection register */
isb /* isb to sync the change to the CacheSizeID reg */
mrc p15, 1, r1, c0, c0, 0 /* reads current Cache Size ID register */
and r2, r1, #7 /* extract the line length field */
add r2, r2, #4 /* add 4 for the line length offset (log2 16 bytes) */
ldr r4, =0x3ff
ands r4, r4, r1, lsr #3 /* r4 is the max number on the way size (right aligned) */
clz r5, r4 /* r5 is the bit position of the way size increment */
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 /* r7 is the max number of the index size (right aligned) */
loop2:
mov r9, r4 /* r9 working copy of the max way size (right aligned) */
loop3:
orr r11, r10, r9, lsl r5 /* factor in the way number and cache number into r11 */
orr r11, r11, r7, lsl r2 /* factor in the index number */
mcr p15, 0, r11, c7, c6, 2 /* invalidate by set/way */
subs r9, r9, #1 /* decrement the way number */
bge loop3
subs r7, r7, #1 /* decrement the index */
bge loop2
skip:
add r10, r10, #2 /* increment the cache number */
cmp r3, r10
bgt loop1
finished:
mov r10, #0 /* switch back to cache level 0 */
mcr p15, 2, r10, c0, c0, 0 /* select current cache level in cssr */
dsb
isb
bx lr
.end
/**
* @} End of "addtogroup a9_boot_code".
*/