diff --git a/src/capture_line_fast_simple_16bpp.S b/src/capture_line_fast_simple_16bpp.S index af59d50d..0531598b 100644 --- a/src/capture_line_fast_simple_16bpp.S +++ b/src/capture_line_fast_simple_16bpp.S @@ -20,6 +20,31 @@ .global capture_line_default_simple_leading_both_16bpp .macro SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO reg + // Pixel in GPIO 13.. 2 -> 15.. 0 + and r9, r8, #0x0f << PIXEL_BASE + eor r10, \reg, r9, lsr #(PIXEL_BASE - 1) + and r12, r8, #0x0f << (PIXEL_BASE + 4) + eor r10, r10, r12, lsl #(3 - PIXEL_BASE) + and r9, r8, #0x0f << (PIXEL_BASE + 8) + tst r8, #MUX_MASK + eor r10, r10, r9, lsl #(4 - PIXEL_BASE) + orrne r3, #BIT_PROBE +.endm + +.macro SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI reg + // Pixel in GPIO 13.. 2 -> 31.. 16 + and r9, r8, #0x0f << PIXEL_BASE + eor r10, r10, r9, lsl #(16 - (PIXEL_BASE - 1)) + and r12, r8, #0x0f << (PIXEL_BASE + 4) + eor r10, r10, r12, lsl #(16 + (3 - PIXEL_BASE)) + and r9, r8, #0x0f << (PIXEL_BASE + 8) + eor r10, r10, r9, lsl #(16 + (4 - PIXEL_BASE)) + and r9, r10, r14 // extract high order bits + orr \reg, r10, r9, lsr #4 // put high order bits in unused low order bits to ensure full range 5r 6g 5b +.endm + + +.macro OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO reg // Pixel in GPIO 13.. 2 -> 15.. 0 and r9, r8, #0x0f << PIXEL_BASE eor r10, \reg, r9, lsr #(PIXEL_BASE - 1) @@ -27,17 +52,17 @@ eor r10, r10, r12, lsl #(3 - PIXEL_BASE) and r9, r8, #0x0f << (PIXEL_BASE + 8) eor r12, r10, r9, lsl #(4 - PIXEL_BASE) - and r9, r12, r14 // extract high order bits tst r3, #BIT_OSD + and r9, r12, r14 // extract high order bits movne r12, r12, lsr #1 // half intensity for menu (low order bits already 0 orreq r12, r12, r9, lsr #4 // put high order bits in unused low order bits to ensure full range 5r 6g 5b tst r8, #MUX_MASK orrne r3, #BIT_PROBE - movne r12, #0xff00 + orrne r12, #0xff00 orrne r12, #0x00ff .endm -.macro SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI reg +.macro OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI reg // Pixel in GPIO 13.. 2 -> 31.. 16 and r9, r8, #0x0f << PIXEL_BASE mov r10, r9, lsl #(16 - (PIXEL_BASE - 1)) @@ -45,8 +70,8 @@ eor r10, r10, r9, lsl #(16 + (3 - PIXEL_BASE)) and r9, r8, #0x0f << (PIXEL_BASE + 8) eor r10, r10, r9, lsl #(16 + (4 - PIXEL_BASE)) + tst r3, #BIT_OSD and r9, r10, r14 // extract high order bits - tst r3, #BIT_OSD movne r10, r10, lsr #1 // half intensity for menu (low order bits already 0 orreq r10, r10, r9, lsr #4 // put high order bits in unused low order bits to ensure full range 5r 6g 5b tst r8, #MUX_MASK @@ -82,39 +107,96 @@ loop_16bpp_simple\@: mov r0, r2 pop {pc} +.endm +.macro OSD_COMMON_SIMPLE +loop_16bpp_osd_simple\@: + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8 + + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8 + stmia r0!, {r5, r6, r7, r10} + + subs r1, r1, #1 + bne loop_16bpp_osd_simple\@ + + mov r0, r2 + pop {pc} .endm + .macro COMMON_DEFAULT push {r2} mov r2, r12 loop_16bpp_default\@: WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8 WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8 WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8 WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 - CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8 + SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8 WRITE_R5_R6_R7_R10_16BPP subs r1, r1, #1 bne loop_16bpp_default\@ pop {r0, pc} +.endm +.macro OSD_COMMON_DEFAULT + push {r2} + mov r2, r12 +loop_16bpp_osd_default\@: + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r5 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r6 // input in r8 + + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r7 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_LO r11 // input in r8 + WAIT_FOR_PSYNC_EDGE_FAST // expects GPLEV0 in r4, result in r8 + OSD_SIMPLE_CAPTURE_TWELVE_BITS_16BPP_HI r10 // input in r8 + WRITE_R5_R6_R7_R10_16BPP + + subs r1, r1, #1 + bne loop_16bpp_osd_default\@ + + pop {r0, pc} .endm + // The capture line function is provided the following: // r0 = pointer to current line in frame buffer // r1 = number of complete psync cycles to capture (=param_chars_per_line) @@ -138,11 +220,18 @@ capture_line_fast_simple_trailing_pos_16bpp: SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_fast_simple_trailing_pos_16bpp SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST COMMON_SIMPLE preload_capture_line_fast_simple_trailing_pos_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_fast_simple_trailing_pos_16bpp +OSD_capture_line_fast_simple_trailing_pos_16bpp: + SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST + OSD_COMMON_SIMPLE + + .ltorg .align 6 @@ -153,26 +242,39 @@ capture_line_fast_simple_leading_pos_16bpp: SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_fast_simple_leading_pos_16bpp SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST COMMON_SIMPLE preload_capture_line_fast_simple_leading_pos_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_fast_simple_leading_pos_16bpp +OSD_capture_line_fast_simple_leading_pos_16bpp: + SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST + OSD_COMMON_SIMPLE + + .ltorg .align 6 - // *** 16 bit *** + // *** 16 bit *** *** this one used by amiga b preload_capture_line_fast_simple_trailing_neg_16bpp capture_line_fast_simple_trailing_neg_16bpp: push {lr} SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_fast_simple_trailing_neg_16bpp SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST COMMON_SIMPLE preload_capture_line_fast_simple_trailing_neg_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_fast_simple_trailing_neg_16bpp +OSD_capture_line_fast_simple_trailing_neg_16bpp: + SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST + OSD_COMMON_SIMPLE + .ltorg .align 6 @@ -183,11 +285,18 @@ capture_line_fast_simple_leading_neg_16bpp: SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_fast_simple_leading_neg_16bpp SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST COMMON_SIMPLE preload_capture_line_fast_simple_leading_neg_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_fast_simple_leading_neg_16bpp +OSD_capture_line_fast_simple_leading_neg_16bpp: + SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST + OSD_COMMON_SIMPLE + + .ltorg .align 6 @@ -197,11 +306,17 @@ capture_line_fast_simple_trailing_both_16bpp: push {lr} SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 + tst r3, #BIT_OSD + bne OSD_capture_line_fast_simple_trailing_both_16bpp SKIP_PSYNC_SIMPLE_TRAILING_FAST COMMON_SIMPLE preload_capture_line_fast_simple_trailing_both_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_fast_simple_trailing_both_16bpp +OSD_capture_line_fast_simple_trailing_both_16bpp: + SKIP_PSYNC_SIMPLE_TRAILING_FAST + OSD_COMMON_SIMPLE + .ltorg .align 6 @@ -211,12 +326,16 @@ capture_line_fast_simple_leading_both_16bpp: push {lr} SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 + tst r3, #BIT_OSD + bne OSD_capture_line_fast_simple_leading_both_16bpp SKIP_PSYNC_SIMPLE_LEADING_FAST COMMON_SIMPLE preload_capture_line_fast_simple_leading_both_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_fast_simple_leading_both_16bpp - +OSD_capture_line_fast_simple_leading_both_16bpp: + SKIP_PSYNC_SIMPLE_LEADING_FAST + OSD_COMMON_SIMPLE //********************************************* @@ -230,13 +349,19 @@ capture_line_default_simple_trailing_pos_16bpp: SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_default_simple_trailing_pos_16bpp SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST COMMON_DEFAULT preload_capture_line_default_simple_trailing_pos_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_default_simple_trailing_pos_16bpp - +OSD_capture_line_default_simple_trailing_pos_16bpp: + SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST + OSD_COMMON_DEFAULT .ltorg + + .align 6 // *** 16 bit *** b preload_capture_line_default_simple_leading_pos_16bpp @@ -246,11 +371,16 @@ capture_line_default_simple_leading_pos_16bpp: SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 bic r3, r3, #PSYNC_MASK // only +ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_default_simple_leading_pos_16bpp SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST COMMON_DEFAULT preload_capture_line_default_simple_leading_pos_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_default_simple_leading_pos_16bpp +OSD_capture_line_default_simple_leading_pos_16bpp: + SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST + OSD_COMMON_DEFAULT .ltorg .align 6 @@ -262,11 +392,17 @@ capture_line_default_simple_trailing_neg_16bpp: SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_default_simple_trailing_neg_16bpp SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST COMMON_DEFAULT preload_capture_line_default_simple_trailing_neg_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_default_simple_trailing_neg_16bpp +OSD_capture_line_default_simple_trailing_neg_16bpp: + SKIP_PSYNC_SIMPLE_TRAILING_SINGLE_EDGE_FAST + OSD_COMMON_DEFAULT + .ltorg .align 6 @@ -278,11 +414,16 @@ capture_line_default_simple_leading_neg_16bpp: SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 orr r3, r3, #PSYNC_MASK // only -ve edge (inverted later) + tst r3, #BIT_OSD + bne OSD_capture_line_default_simple_leading_neg_16bpp SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST COMMON_DEFAULT preload_capture_line_default_simple_leading_neg_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_default_simple_leading_neg_16bpp +OSD_capture_line_default_simple_leading_neg_16bpp: + SKIP_PSYNC_SIMPLE_LEADING_SINGLE_EDGE_FAST + OSD_COMMON_DEFAULT .ltorg .align 6 @@ -293,11 +434,17 @@ capture_line_default_simple_trailing_both_16bpp: mov r12, r2 SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 + tst r3, #BIT_OSD + bne capture_line_default_simple_trailing_both_16bpp SKIP_PSYNC_SIMPLE_TRAILING_FAST COMMON_DEFAULT preload_capture_line_default_simple_trailing_both_16bpp: SETUP_DUMMY_PARAMETERS b capture_line_default_simple_trailing_both_16bpp +OSD_capture_line_default_simple_trailing_both_16bpp: + SKIP_PSYNC_SIMPLE_TRAILING_FAST + OSD_COMMON_DEFAULT + .ltorg .align 6 @@ -308,8 +455,14 @@ capture_line_default_simple_leading_both_16bpp: mov r12, r2 SETUP_VSYNC_DEBUG_16BPP_R11 SETUP_TWELVE_BITS_MASK_R14 + tst r3, #BIT_OSD + bne OSD_capture_line_default_simple_leading_both_16bpp SKIP_PSYNC_SIMPLE_LEADING_FAST COMMON_DEFAULT preload_capture_line_default_simple_leading_both_16bpp: SETUP_DUMMY_PARAMETERS - b capture_line_default_simple_leading_both_16bpp \ No newline at end of file + b capture_line_default_simple_leading_both_16bpp +OSD_capture_line_default_simple_leading_both_16bpp: + SKIP_PSYNC_SIMPLE_LEADING_FAST + OSD_COMMON_DEFAULT + diff --git a/src/rgb_to_fb.S b/src/rgb_to_fb.S index bffa92c4..4a3a3b27 100644 --- a/src/rgb_to_fb.S +++ b/src/rgb_to_fb.S @@ -665,14 +665,20 @@ skip_line_loop: skip_line_loop_exit: push {r1-r5, r11} - + push {r3} + orr r3, r3, #BIT_OSD + ldr r12, capture_address + sub r12, r12, #4 + // Call preload capture line function (runs all paths of capture code to preload it into cache - OSD version) + // waits for csync so loses one line + blx r12 + pop {r3} ldr r12, capture_address sub r12, r12, #4 // Call preload capture line function (runs all paths of capture code to preload it into cache) // waits for csync so loses one line blx r12 pop {r1-r5, r11} - mov r6, #0 str r6, total_hsync_period @@ -736,7 +742,8 @@ process_line_loop: // Load the address of the capture_line function into r12 mov r0, r11 - + tst r3, #BIT_OSD + orrne r3, r3, #BIT_NO_SCANLINES //disable scanlines if OSD on // Call capture line function blx r12 // exits with h sync timestamp in r0