lib: better optimized casecompare() and ncasecompare()#16311
Closed
sergio-nsk wants to merge 1 commit intocurl:masterfrom
Closed
lib: better optimized casecompare() and ncasecompare()#16311sergio-nsk wants to merge 1 commit intocurl:masterfrom
sergio-nsk wants to merge 1 commit intocurl:masterfrom
Conversation
Less 'jne` or `je` CPU instructions.
Contributor
Author
|
Compare Clang 19.1.0 with -O3 casecompare_old: casecompare_new:
movzx edx, byte ptr [rdi] movzx ecx, byte ptr [rdi]
test dl, dl test cl, cl
je .LBB1_1 je .LBB2_5
inc rdi inc rdi
lea rcx, [rip + touppermap] lea rax, [rip + touppermap]
.LBB1_3: .LBB2_2:
movzx r8d, byte ptr [rsi] movzx ecx, cl
test r8, r8 movzx ecx, byte ptr [rcx + rax]
setne al movzx edx, byte ptr [rsi]
je .LBB1_7 cmp cl, byte ptr [rdx + rax]
movzx edx, dl jne .LBB2_3
movzx edx, byte ptr [rdx + rcx] inc rsi
cmp dl, byte ptr [r8 + rcx] movzx ecx, byte ptr [rdi]
jne .LBB1_5 inc rdi
inc rsi test cl, cl
movzx edx, byte ptr [rdi] jne .LBB2_2
inc rdi .LBB2_5:
test dl, dl xor eax, eax
jne .LBB1_3 cmp byte ptr [rsi], 0
jmp .LBB1_7 sete al
.LBB1_1: ret
mov al, 1 .LBB2_3:
.LBB1_7: xor eax, eax
cmp byte ptr [rsi], 0 ret
setne cl
xor cl, al
movzx eax, cl
ret
.LBB1_5:
xor eax, eax
retncasecompare_old: ncasecompare_new:
movzx eax, byte ptr [rdi] movzx ecx, byte ptr [rdi]
test al, al test cl, cl
je .LBB3_1 sete al
inc rdi test rdx, rdx
lea rcx, [rip + touppermap] sete r8b
.LBB3_3: or r8b, al
movzx r8d, byte ptr [rsi] jne .LBB4_6
test r8, r8 lea r8, [rdx - 1]
je .LBB3_9 xor eax, eax
test rdx, rdx lea r9, [rip + touppermap]
je .LBB3_9 xor r10d, r10d
movzx eax, al .LBB4_2:
movzx eax, byte ptr [rax + rcx] movzx ecx, cl
cmp al, byte ptr [r8 + rcx] movzx ecx, byte ptr [rcx + r9]
jne .LBB3_6 movzx r11d, byte ptr [rsi + r10]
dec rdx cmp cl, byte ptr [r11 + r9]
inc rsi jne .LBB4_9
movzx eax, byte ptr [rdi] movzx ecx, byte ptr [rdi + r10 + 1]
inc rdi lea r11, [r10 + 1]
test al, al test cl, cl
jne .LBB3_3 je .LBB4_5
xor eax, eax cmp r8, r10
.LBB3_9: mov r10, r11
movzx eax, al jne .LBB4_2
test rdx, rdx .LBB4_5:
je .LBB3_11 sub rdx, r11
.LBB3_12: add rsi, r11
lea rcx, [rip + touppermap] .LBB4_6:
movzx edx, byte ptr [rax + rcx] test rdx, rdx
movzx esi, byte ptr [rsi] je .LBB4_7
xor eax, eax movzx eax, cl
cmp dl, byte ptr [rsi + rcx] lea rcx, [rip + touppermap]
sete al movzx edx, byte ptr [rax + rcx]
ret movzx esi, byte ptr [rsi]
.LBB3_1: xor eax, eax
xor eax, eax cmp dl, byte ptr [rsi + rcx]
test rdx, rdx sete al
jne .LBB3_12 .LBB4_9:
.LBB3_11: ret
mov eax, 1 .LBB4_7:
ret mov eax, 1
.LBB3_6: ret
xor eax, eax
ret |
Contributor
|
My gut reaction was that removing the check of |
|
Analysis of PR #16311 at 4fe08e05: Test 2048 failed, which has NOT been flaky recently, so there could be a real issue in this PR. Test 363 failed, which has NOT been flaky recently, so there could be a real issue in this PR. Generated by Testclutch |
Contributor
|
lgtm.🎉 |
Member
|
Thanks! |
pps83
pushed a commit
to pps83/curl
that referenced
this pull request
Apr 26, 2025
Less 'jne` or `je` CPU instructions. Closes curl#16311
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Less
jneorjeCPU instructions.Compare CGG 14.2 with -O3