Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport] 8139457: Relax alignment of array elements #67

Merged
merged 4 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1230,7 +1230,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
len,
tmp1,
tmp2,
arrayOopDesc::header_size(op->type()),
arrayOopDesc::base_offset_in_bytes(op->type()),
array_element_size(op->type()),
op->klass()->as_register(),
*op->stub()->entry());
Expand Down
15 changes: 12 additions & 3 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register

if (len->is_valid()) {
strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
int base_offset = arrayOopDesc::length_offset_in_bytes() + BytesPerInt;
if (!is_aligned(base_offset, BytesPerWord)) {
assert(is_aligned(base_offset, BytesPerInt), "must be 4-byte aligned");
// Clear gap/first 4 bytes following the length field.
strw(zr, Address(obj, base_offset));
}
} else if (UseCompressedClassPointers) {
store_klass_gap(obj, zr);
}
Expand Down Expand Up @@ -271,7 +277,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register

verify_oop(obj);
}
void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) {
void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int base_offset_in_bytes, int f, Register klass, Label& slow_case) {
assert_different_registers(obj, len, t1, t2, klass);

// determine alignment mask
Expand All @@ -284,16 +290,19 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1,

const Register arr_size = t2; // okay to be the same
// align object end
mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
mov(arr_size, (int32_t)base_offset_in_bytes + MinObjAlignmentInBytesMask);
add(arr_size, arr_size, len, ext::uxtw, f);
andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);

try_allocate(obj, arr_size, 0, t1, t2, slow_case);

initialize_header(obj, klass, len, t1, t2);

// Align-up to word boundary, because we clear the 4 bytes potentially
// following the length field in initialize_header().
int base_offset = align_up(base_offset_in_bytes, BytesPerWord);
// clear rest of allocated space
initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
initialize_body(obj, arr_size, base_offset, t1, t2);
if (Compilation::current()->bailed_out()) {
return;
}
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -100,7 +100,7 @@ using MacroAssembler::null_check;
// header_size: size of object header in words
// f : element scale factor
// slow_case : exit to slow case implementation if fast allocation fails
void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, int f, Register klass, Label& slow_case);
void allocate_array(Register obj, Register len, Register t, Register t2, int base_offset_in_bytes, int f, Register klass, Label& slow_case);

int rsp_offset() const { return _rsp_offset; }
void set_rsp_offset(int n) { _rsp_offset = n; }
Expand Down
42 changes: 23 additions & 19 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5028,12 +5028,22 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
Label DONE, SAME;
Register tmp1 = rscratch1;
Register tmp2 = rscratch2;
Register cnt2 = tmp2; // cnt2 only used in array length compare
int elem_per_word = wordSize/elem_size;
int log_elem_size = exact_log2(elem_size);
int klass_offset = arrayOopDesc::klass_offset_in_bytes();
int length_offset = arrayOopDesc::length_offset_in_bytes();
int base_offset
= arrayOopDesc::base_offset_in_bytes(elem_size == 2 ? T_CHAR : T_BYTE);
// When the length offset is not aligned to 8 bytes,
// then we align it down. This is valid because the new
// offset will always be the klass which is the same
// for type arrays.
int start_offset = align_down(length_offset, BytesPerWord);
int extra_length = base_offset - start_offset;
assert(start_offset == length_offset || start_offset == klass_offset,
"start offset must be 8-byte-aligned or be the klass offset");
assert(base_offset != start_offset, "must include the length field");
extra_length = extra_length / elem_size; // We count in elements, not bytes.
int stubBytesThreshold = 3 * 64 + (UseSIMDForArrayEquals ? 0 : 16);

assert(elem_size == 1 || elem_size == 2, "must be char or byte");
Expand Down Expand Up @@ -5067,11 +5077,10 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
// return false;
bind(A_IS_NOT_NULL);
ldrw(cnt1, Address(a1, length_offset));
ldrw(cnt2, Address(a2, length_offset));
eorw(tmp5, cnt1, cnt2);
cbnzw(tmp5, DONE);
lea(a1, Address(a1, base_offset));
lea(a2, Address(a2, base_offset));
// Increase loop counter by diff between base- and actual start-offset.
addw(cnt1, cnt1, extra_length);
lea(a1, Address(a1, start_offset));
lea(a2, Address(a2, start_offset));
// Check for short strings, i.e. smaller than wordSize.
subs(cnt1, cnt1, elem_per_word);
br(Assembler::LT, SHORT);
Expand Down Expand Up @@ -5134,18 +5143,18 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
cbz(a1, DONE);
ldrw(cnt1, Address(a1, length_offset));
cbz(a2, DONE);
ldrw(cnt2, Address(a2, length_offset));
// Increase loop counter by diff between base- and actual start-offset.
addw(cnt1, cnt1, extra_length);

// on most CPUs a2 is still "locked"(surprisingly) in ldrw and it's
// faster to perform another branch before comparing a1 and a2
cmp(cnt1, (u1)elem_per_word);
br(LE, SHORT); // short or same
ldr(tmp3, Address(pre(a1, base_offset)));
ldr(tmp3, Address(pre(a1, start_offset)));
subs(zr, cnt1, stubBytesThreshold);
br(GE, STUB);
ldr(tmp4, Address(pre(a2, base_offset)));
ldr(tmp4, Address(pre(a2, start_offset)));
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
cmp(cnt2, cnt1);
br(NE, DONE);

// Main 16 byte comparison loop with 2 exits
bind(NEXT_DWORD); {
Expand Down Expand Up @@ -5177,9 +5186,7 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
b(LAST_CHECK);

bind(STUB);
ldr(tmp4, Address(pre(a2, base_offset)));
cmp(cnt2, cnt1);
br(NE, DONE);
ldr(tmp4, Address(pre(a2, start_offset)));
if (elem_size == 2) { // convert to byte counter
lsl(cnt1, cnt1, 1);
}
Expand All @@ -5200,12 +5207,9 @@ address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
mov(result, a2);
b(DONE);
bind(SHORT);
cmp(cnt2, cnt1);
br(NE, DONE);
cbz(cnt1, SAME);
sub(tmp5, zr, cnt1, LSL, 3 + log_elem_size);
ldr(tmp3, Address(a1, base_offset));
ldr(tmp4, Address(a2, base_offset));
ldr(tmp3, Address(a1, start_offset));
ldr(tmp4, Address(a2, start_offset));
bind(LAST_CHECK);
eor(tmp4, tmp3, tmp4);
lslv(tmp5, tmp4, tmp5);
Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
op->tmp1()->as_register(),
op->tmp2()->as_register(),
op->tmp3()->as_register(),
arrayOopDesc::header_size(op->type()),
arrayOopDesc::base_offset_in_bytes(op->type()),
type2aelembytes(op->type()),
op->klass()->as_register(),
*op->stub()->entry());
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2023 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -2300,7 +2300,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
op->tmp1()->as_register(),
op->tmp2()->as_register(),
op->tmp3()->as_register(),
arrayOopDesc::header_size(op->type()),
arrayOopDesc::base_offset_in_bytes(op->type()),
type2aelembytes(op->type()),
op->klass()->as_register(),
*op->stub()->entry());
Expand Down
20 changes: 15 additions & 5 deletions src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -333,7 +333,7 @@ void C1_MacroAssembler::allocate_array(
Register t1, // temp register
Register t2, // temp register
Register t3, // temp register
int hdr_size, // object header size in words
int base_offset_in_bytes, // elements offset in bytes
int elt_size, // element size in bytes
Register klass, // object klass
Label& slow_case // continuation point if fast allocation fails
Expand Down Expand Up @@ -365,7 +365,7 @@ void C1_MacroAssembler::allocate_array(
sldi(t1, len, log2_elt_size);
arr_len_in_bytes = t1;
}
addi(arr_size, arr_len_in_bytes, hdr_size * wordSize + MinObjAlignmentInBytesMask); // Add space for header & alignment.
addi(arr_size, arr_len_in_bytes, base_offset_in_bytes + MinObjAlignmentInBytesMask); // Add space for header & alignment.
clrrdi(arr_size, arr_size, LogMinObjAlignmentInBytes); // Align array size.

// Allocate space & initialize header.
Expand All @@ -375,8 +375,18 @@ void C1_MacroAssembler::allocate_array(
// Initialize body.
const Register base = t2;
const Register index = t3;
addi(base, obj, hdr_size * wordSize); // compute address of first element
addi(index, arr_size, -(hdr_size * wordSize)); // compute index = number of bytes to clear
addi(base, obj, base_offset_in_bytes); // compute address of first element
addi(index, arr_size, -(base_offset_in_bytes)); // compute index = number of bytes to clear

// Zero first 4 bytes, if start offset is not word aligned.
if (!is_aligned(base_offset_in_bytes, BytesPerWord)) {
assert(is_aligned(base_offset_in_bytes, BytesPerInt), "must be 4-byte aligned");
li(t1, 0);
stw(t1, 0, base);
addi(base, base, BytesPerInt);
// Note: initialize_body will align index down, no need to correct it here.
}

initialize_body(base, index);

if (CURRENT_ENV->dtrace_alloc_probes()) {
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -80,7 +80,7 @@
Register t1, // temp register
Register t2, // temp register
Register t3, // temp register
int hdr_size, // object header size in words
int base_offset_in_bytes, // elements offset in bytes
int elt_size, // element size in bytes
Register klass, // object klass
Label& slow_case // continuation point if fast allocation fails
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
Expand Down Expand Up @@ -1040,7 +1040,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
len,
tmp1,
tmp2,
arrayOopDesc::header_size(op->type()),
arrayOopDesc::base_offset_in_bytes(op->type()),
array_element_size(op->type()),
op->klass()->as_register(),
*op->stub()->entry());
Expand Down
18 changes: 14 additions & 4 deletions src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
Expand Down Expand Up @@ -181,6 +181,12 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register

if (len->is_valid()) {
sw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
int base_offset = arrayOopDesc::length_offset_in_bytes() + BytesPerInt;
if (!is_aligned(base_offset, BytesPerWord)) {
assert(is_aligned(base_offset, BytesPerInt), "must be 4-byte aligned");
// Clear gap/first 4 bytes following the length field.
sw(zr, Address(obj, base_offset));
}
} else if (UseCompressedClassPointers) {
store_klass_gap(obj, zr);
}
Expand Down Expand Up @@ -280,7 +286,7 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register
verify_oop(obj);
}

void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case) {
void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int base_offset_in_bytes, int f, Register klass, Label& slow_case) {
assert_different_registers(obj, len, tmp1, tmp2, klass);

// determine alignment mask
Expand All @@ -292,17 +298,21 @@ void C1_MacroAssembler::allocate_array(Register obj, Register len, Register tmp1

const Register arr_size = tmp2; // okay to be the same
// align object end
mv(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
mv(arr_size, (int32_t)base_offset_in_bytes + MinObjAlignmentInBytesMask);
shadd(arr_size, len, arr_size, t0, f);
andi(arr_size, arr_size, ~(uint)MinObjAlignmentInBytesMask);

try_allocate(obj, arr_size, 0, tmp1, tmp2, slow_case);

initialize_header(obj, klass, len, tmp1, tmp2);

// Align-up to word boundary, because we clear the 4 bytes potentially
// following the length field in initialize_header().
int base_offset = align_up(base_offset_in_bytes, BytesPerWord);

// clear rest of allocated space
const Register len_zero = len;
initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
initialize_body(obj, arr_size, base_offset, len_zero);

membar(MacroAssembler::StoreStore);

Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
* Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
Expand Down Expand Up @@ -101,7 +101,7 @@ using MacroAssembler::null_check;
// header_size: size of object header in words
// f : element scale factor
// slow_case : exit to slow case implementation if fast allocation fails
void allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int header_size, int f, Register klass, Label& slow_case);
void allocate_array(Register obj, Register len, Register tmp1, Register tmp2, int base_offset_in_bytes, int f, Register klass, Label& slow_case);

int rsp_offset() const { return _rsp_offset; }

Expand Down
2 changes: 1 addition & 1 deletion src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2385,7 +2385,7 @@ void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
op->len()->as_register(),
op->tmp1()->as_register(),
op->tmp2()->as_register(),
arrayOopDesc::header_size(op->type()),
arrayOopDesc::base_offset_in_bytes(op->type()),
type2aelembytes(op->type()),
op->klass()->as_register(),
*op->stub()->entry());
Expand Down
12 changes: 6 additions & 6 deletions src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -296,7 +296,7 @@ void C1_MacroAssembler::allocate_array(
Register len, // array length
Register t1, // temp register
Register t2, // temp register
int hdr_size, // object header size in words
int base_offset_in_bytes, // elements offset in bytes
int elt_size, // element size in bytes
Register klass, // object klass
Label& slow_case // Continuation point if fast allocation fails.
Expand All @@ -322,8 +322,8 @@ void C1_MacroAssembler::allocate_array(
case 8: z_sllg(arr_size, len, 3); break;
default: ShouldNotReachHere();
}
add2reg(arr_size, hdr_size * wordSize + MinObjAlignmentInBytesMask); // Add space for header & alignment.
z_nill(arr_size, (~MinObjAlignmentInBytesMask) & 0xffff); // Align array size.
add2reg(arr_size, base_offset_in_bytes + MinObjAlignmentInBytesMask); // Add space for header & alignment.
z_nill(arr_size, (~MinObjAlignmentInBytesMask) & 0xffff); // Align array size.

try_allocate(obj, arr_size, 0, t1, slow_case);

Expand All @@ -333,9 +333,9 @@ void C1_MacroAssembler::allocate_array(
Label done;
Register object_fields = t1;
Register Rzero = Z_R1_scratch;
z_aghi(arr_size, -(hdr_size * BytesPerWord));
z_aghi(arr_size, -base_offset_in_bytes);
z_bre(done); // Jump if size of fields is zero.
z_la(object_fields, hdr_size * BytesPerWord, obj);
z_la(object_fields, base_offset_in_bytes, obj);
z_xgr(Rzero, Rzero);
initialize_body(object_fields, arr_size, Rzero);
bind(done);
Expand Down
4 changes: 2 additions & 2 deletions src/hotspot/cpu/s390/c1_MacroAssembler_s390.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2023 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
Expand Down Expand Up @@ -86,7 +86,7 @@
Register len, // array length
Register t1, // temp register
Register t2, // temp register
int hdr_size, // object header size in words
int base_offset_in_bytes, // elements offset in bytes
int elt_size, // element size in bytes
Register klass, // object klass
Label& slow_case // Continuation point if fast allocation fails.
Expand Down
Loading
Loading