Skip to content

Commit

Permalink
Improve FF OSD overlay
Browse files Browse the repository at this point in the history
  • Loading branch information
IanSB committed Jan 25, 2021
1 parent 47b49f0 commit ed6087a
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 19 deletions.
185 changes: 169 additions & 16 deletions src/capture_line_fast_simple_16bpp.S
Expand Up @@ -20,33 +20,58 @@
.global capture_line_default_simple_leading_both_16bpp

.macro SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO reg
// Pixel in GPIO 13.. 2 -> 15.. 0
and r9, r8, #0x0f << PIXEL_BASE
eor r10, \reg, r9, lsr #(PIXEL_BASE - 1)
and r12, r8, #0x0f << (PIXEL_BASE + 4)
eor r10, r10, r12, lsl #(3 - PIXEL_BASE)
and r9, r8, #0x0f << (PIXEL_BASE + 8)
tst r8, #MUX_MASK
eor r10, r10, r9, lsl #(4 - PIXEL_BASE)
orrne r3, #BIT_PROBE
.endm

.macro SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI reg
// Pixel in GPIO 13.. 2 -> 31.. 16
and r9, r8, #0x0f << PIXEL_BASE
eor r10, r10, r9, lsl #(16 - (PIXEL_BASE - 1))
and r12, r8, #0x0f << (PIXEL_BASE + 4)
eor r10, r10, r12, lsl #(16 + (3 - PIXEL_BASE))
and r9, r8, #0x0f << (PIXEL_BASE + 8)
eor r10, r10, r9, lsl #(16 + (4 - PIXEL_BASE))
and r9, r10, r14 // extract high order bits
orr \reg, r10, r9, lsr #4 // put high order bits in unused low order bits to ensure full range 5r 6g 5b
.endm


.macro OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO reg
// Pixel in GPIO 13.. 2 -> 15.. 0
and r9, r8, #0x0f << PIXEL_BASE
eor r10, \reg, r9, lsr #(PIXEL_BASE - 1)
and r12, r8, #0x0f << (PIXEL_BASE + 4)
eor r10, r10, r12, lsl #(3 - PIXEL_BASE)
and r9, r8, #0x0f << (PIXEL_BASE + 8)
eor r12, r10, r9, lsl #(4 - PIXEL_BASE)
and r9, r12, r14 // extract high order bits
tst r3, #BIT_OSD
and r9, r12, r14 // extract high order bits
movne r12, r12, lsr #1 // half intensity for menu (low order bits already 0
orreq r12, r12, r9, lsr #4 // put high order bits in unused low order bits to ensure full range 5r 6g 5b
tst r8, #MUX_MASK
orrne r3, #BIT_PROBE
movne r12, #0xff00
orrne r12, #0xff00
orrne r12, #0x00ff
.endm

.macro SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI reg
.macro OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI reg
// Pixel in GPIO 13.. 2 -> 31.. 16
and r9, r8, #0x0f << PIXEL_BASE
mov r10, r9, lsl #(16 - (PIXEL_BASE - 1))
and r9, r8, #0x0f << (PIXEL_BASE + 4)
eor r10, r10, r9, lsl #(16 + (3 - PIXEL_BASE))
and r9, r8, #0x0f << (PIXEL_BASE + 8)
eor r10, r10, r9, lsl #(16 + (4 - PIXEL_BASE))
tst r3, #BIT_OSD
and r9, r10, r14 // extract high order bits
tst r3, #BIT_OSD
movne r10, r10, lsr #1 // half intensity for menu (low order bits already 0
orreq r10, r10, r9, lsr #4 // put high order bits in unused low order bits to ensure full range 5r 6g 5b
tst r8, #MUX_MASK
Expand Down Expand Up @@ -82,39 +107,96 @@ loop_16bpp_simple\@:

mov r0, r2
pop {pc}
.endm

.macro OSD_COMMON_SIMPLE
loop_16bpp_osd_simple\@:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8
stmia r0!, {r5, r6, r7, r10}

subs r1, r1, #1
bne loop_16bpp_osd_simple\@

mov r0, r2
pop {pc}
.endm


.macro COMMON_DEFAULT
push {r2}
mov r2, r12
loop_16bpp_default\@:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8
SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8
WRITE_R5_R6_R7_R10_16BPP

subs r1, r1, #1
bne loop_16bpp_default\@

pop {r0, pc}
.endm

.macro OSD_COMMON_DEFAULT
push {r2}
mov r2, r12
loop_16bpp_osd_default\@:
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8

WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8
WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8
OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8
WRITE_R5_R6_R7_R10_16BPP

subs r1, r1, #1
bne loop_16bpp_osd_default\@

pop {r0, pc}
.endm


// The capture line function is provided the following:
// r0 = pointer to current line in frame buffer
// r1 = number of complete psync cycles to capture (=param_chars_per_line)
Expand All @@ -138,11 +220,18 @@ capture_line_fast_simple_trailing_pos_16bpp:
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_fast_simple_trailing_pos_16bpp
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
COMMON_SIMPLE
preload_capture_line_fast_simple_trailing_pos_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_simple_trailing_pos_16bpp
OSD_capture_line_fast_simple_trailing_pos_16bpp:
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
OSD_COMMON_SIMPLE



.ltorg
.align 6
Expand All @@ -153,26 +242,39 @@ capture_line_fast_simple_leading_pos_16bpp:
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_fast_simple_leading_pos_16bpp
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
COMMON_SIMPLE
preload_capture_line_fast_simple_leading_pos_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_simple_leading_pos_16bpp
OSD_capture_line_fast_simple_leading_pos_16bpp:
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
OSD_COMMON_SIMPLE



.ltorg
.align 6
// *** 16 bit ***
// *** 16 bit *** *** this one used by amiga
b preload_capture_line_fast_simple_trailing_neg_16bpp
capture_line_fast_simple_trailing_neg_16bpp:
push {lr}
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_fast_simple_trailing_neg_16bpp
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
COMMON_SIMPLE
preload_capture_line_fast_simple_trailing_neg_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_simple_trailing_neg_16bpp
OSD_capture_line_fast_simple_trailing_neg_16bpp:
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
OSD_COMMON_SIMPLE


.ltorg
.align 6
Expand All @@ -183,11 +285,18 @@ capture_line_fast_simple_leading_neg_16bpp:
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_fast_simple_leading_neg_16bpp
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
COMMON_SIMPLE
preload_capture_line_fast_simple_leading_neg_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_simple_leading_neg_16bpp
OSD_capture_line_fast_simple_leading_neg_16bpp:
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
OSD_COMMON_SIMPLE



.ltorg
.align 6
Expand All @@ -197,11 +306,17 @@ capture_line_fast_simple_trailing_both_16bpp:
push {lr}
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
tst r3, #BIT_OSD
bne OSD_capture_line_fast_simple_trailing_both_16bpp
SKIP_PSYNC_SIMPLE_TRAILING_FAST
COMMON_SIMPLE
preload_capture_line_fast_simple_trailing_both_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_simple_trailing_both_16bpp
OSD_capture_line_fast_simple_trailing_both_16bpp:
SKIP_PSYNC_SIMPLE_TRAILING_FAST
OSD_COMMON_SIMPLE


.ltorg
.align 6
Expand All @@ -211,12 +326,16 @@ capture_line_fast_simple_leading_both_16bpp:
push {lr}
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
tst r3, #BIT_OSD
bne OSD_capture_line_fast_simple_leading_both_16bpp
SKIP_PSYNC_SIMPLE_LEADING_FAST
COMMON_SIMPLE
preload_capture_line_fast_simple_leading_both_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_fast_simple_leading_both_16bpp

OSD_capture_line_fast_simple_leading_both_16bpp:
SKIP_PSYNC_SIMPLE_LEADING_FAST
OSD_COMMON_SIMPLE

//*********************************************

Expand All @@ -230,13 +349,19 @@ capture_line_default_simple_trailing_pos_16bpp:
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_default_simple_trailing_pos_16bpp
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
COMMON_DEFAULT
preload_capture_line_default_simple_trailing_pos_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_default_simple_trailing_pos_16bpp

OSD_capture_line_default_simple_trailing_pos_16bpp:
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
OSD_COMMON_DEFAULT
.ltorg


.align 6
// *** 16 bit ***
b preload_capture_line_default_simple_leading_pos_16bpp
Expand All @@ -246,11 +371,16 @@ capture_line_default_simple_leading_pos_16bpp:
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_default_simple_leading_pos_16bpp
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
COMMON_DEFAULT
preload_capture_line_default_simple_leading_pos_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_default_simple_leading_pos_16bpp
OSD_capture_line_default_simple_leading_pos_16bpp:
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
OSD_COMMON_DEFAULT

.ltorg
.align 6
Expand All @@ -262,11 +392,17 @@ capture_line_default_simple_trailing_neg_16bpp:
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_default_simple_trailing_neg_16bpp
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
COMMON_DEFAULT
preload_capture_line_default_simple_trailing_neg_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_default_simple_trailing_neg_16bpp
OSD_capture_line_default_simple_trailing_neg_16bpp:
SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST
OSD_COMMON_DEFAULT


.ltorg
.align 6
Expand All @@ -278,11 +414,16 @@ capture_line_default_simple_leading_neg_16bpp:
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later)
tst r3, #BIT_OSD
bne OSD_capture_line_default_simple_leading_neg_16bpp
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
COMMON_DEFAULT
preload_capture_line_default_simple_leading_neg_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_default_simple_leading_neg_16bpp
OSD_capture_line_default_simple_leading_neg_16bpp:
SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST
OSD_COMMON_DEFAULT

.ltorg
.align 6
Expand All @@ -293,11 +434,17 @@ capture_line_default_simple_trailing_both_16bpp:
mov r12, r2
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
tst r3, #BIT_OSD
bne capture_line_default_simple_trailing_both_16bpp
SKIP_PSYNC_SIMPLE_TRAILING_FAST
COMMON_DEFAULT
preload_capture_line_default_simple_trailing_both_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_default_simple_trailing_both_16bpp
OSD_capture_line_default_simple_trailing_both_16bpp:
SKIP_PSYNC_SIMPLE_TRAILING_FAST
OSD_COMMON_DEFAULT


.ltorg
.align 6
Expand All @@ -308,8 +455,14 @@ capture_line_default_simple_leading_both_16bpp:
mov r12, r2
SETUP_VSYNC_DEBUG_16BPP_R11
SETUP_TWELVE_BITS_MASK_R14
tst r3, #BIT_OSD
bne OSD_capture_line_default_simple_leading_both_16bpp
SKIP_PSYNC_SIMPLE_LEADING_FAST
COMMON_DEFAULT
preload_capture_line_default_simple_leading_both_16bpp:
SETUP_DUMMY_PARAMETERS
b capture_line_default_simple_leading_both_16bpp
b capture_line_default_simple_leading_both_16bpp
OSD_capture_line_default_simple_leading_both_16bpp:
SKIP_PSYNC_SIMPLE_LEADING_FAST
OSD_COMMON_DEFAULT

0 comments on commit ed6087a

Please sign in to comment.