Skip to content

Commit

Permalink
VM: Optimize RegExp.matchAsPrefix(...) by generating a sticky RegExp …
Browse files Browse the repository at this point in the history
…specialization.

This is the same as a sticky RegExp flag in ES2015.

Overlay some RegExp fields on top of each other - given that they should never be used simultaneously.

BUG=http://dartbug.com/27810
R=rmacnak@google.com

Review URL: https://codereview.chromium.org/2510783002 .
  • Loading branch information
mraleph committed Nov 17, 2016
1 parent 547b8f4 commit 2403444
Show file tree
Hide file tree
Showing 23 changed files with 249 additions and 100 deletions.
25 changes: 20 additions & 5 deletions runtime/lib/regexp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,34 @@ DEFINE_NATIVE_ENTRY(RegExp_getGroupCount, 1) {
}


DEFINE_NATIVE_ENTRY(RegExp_ExecuteMatch, 3) {
// This function is intrinsified. See Intrinsifier::RegExp_ExecuteMatch.
static RawObject* ExecuteMatch(Zone* zone,
NativeArguments* arguments,
bool sticky) {
const RegExp& regexp = RegExp::CheckedHandle(arguments->NativeArgAt(0));
ASSERT(!regexp.IsNull());
GET_NON_NULL_NATIVE_ARGUMENT(String, subject, arguments->NativeArgAt(1));
GET_NON_NULL_NATIVE_ARGUMENT(Smi, start_index, arguments->NativeArgAt(2));

if (FLAG_interpret_irregexp || FLAG_precompiled_runtime) {
if (FLAG_interpret_irregexp) {
return BytecodeRegExpMacroAssembler::Interpret(regexp, subject, start_index,
zone);
/*sticky=*/sticky, zone);
}

return IRRegExpMacroAssembler::Execute(regexp, subject, start_index, zone);
return IRRegExpMacroAssembler::Execute(regexp, subject, start_index,
/*sticky=*/sticky, zone);
}


DEFINE_NATIVE_ENTRY(RegExp_ExecuteMatch, 3) {
// This function is intrinsified. See Intrinsifier::RegExp_ExecuteMatch.
return ExecuteMatch(zone, arguments, /*sticky=*/false);
}


DEFINE_NATIVE_ENTRY(RegExp_ExecuteMatchSticky, 3) {
// This function is intrinsified. See Intrinsifier::RegExp_ExecuteMatchSticky.
return ExecuteMatch(zone, arguments, /*sticky=*/true);
}


} // namespace dart
8 changes: 4 additions & 4 deletions runtime/lib/regexp_patch.dart
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,8 @@ class _RegExp implements RegExp {
if (start < 0 || start > string.length) {
throw new RangeError.range(start, 0, string.length);
}
// Inefficient check that searches for a later match too.
// Change this when possible.
List<int> list = _ExecuteMatch(string, start);
List<int> list = _ExecuteMatchSticky(string, start);
if (list == null) return null;
if (list[0] != start) return null;
return new _RegExpMatch(this, string, list);
}

Expand Down Expand Up @@ -238,6 +235,9 @@ class _RegExp implements RegExp {

List _ExecuteMatch(String str, int start_index)
native "RegExp_ExecuteMatch";

List _ExecuteMatchSticky(String str, int start_index)
native "RegExp_ExecuteMatchSticky";
}

class _AllMatchesIterable extends IterableBase<Match> {
Expand Down
1 change: 1 addition & 0 deletions runtime/vm/bootstrap_natives.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ namespace dart {
V(RegExp_getIsCaseSensitive, 1) \
V(RegExp_getGroupCount, 1) \
V(RegExp_ExecuteMatch, 3) \
V(RegExp_ExecuteMatchSticky, 3) \
V(List_allocate, 2) \
V(List_getIndexed, 2) \
V(List_setIndexed, 3) \
Expand Down
10 changes: 10 additions & 0 deletions runtime/vm/intrinsifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1160,6 +1160,16 @@ bool Intrinsifier::Build_DoubleRound(FlowGraph* flow_graph) {

return BuildInvokeMathCFunction(&builder, MethodRecognizer::kDoubleRound);
}


void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
IntrinsifyRegExpExecuteMatch(assembler, /*sticky=*/false);
}


void Intrinsifier::RegExp_ExecuteMatchSticky(Assembler* assembler) {
IntrinsifyRegExpExecuteMatch(assembler, /*sticky=*/true);
}
#endif // !defined(TARGET_ARCH_DBC)


Expand Down
2 changes: 2 additions & 0 deletions runtime/vm/intrinsifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class Intrinsifier : public AllStatic {
GRAPH_INTRINSICS_LIST(DECLARE_FUNCTION)

#undef DECLARE_FUNCTION

static void IntrinsifyRegExpExecuteMatch(Assembler* assembler, bool sticky);
#endif
};

Expand Down
6 changes: 4 additions & 2 deletions runtime/vm/intrinsifier_arm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2218,7 +2218,8 @@ void Intrinsifier::TwoByteString_equality(Assembler* assembler) {
}


void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
void Intrinsifier::IntrinsifyRegExpExecuteMatch(Assembler* assembler,
bool sticky) {
if (FLAG_interpret_irregexp) return;

static const intptr_t kRegExpParamOffset = 2 * kWordSize;
Expand All @@ -2237,7 +2238,8 @@ void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
__ LoadClassId(R1, R1);
__ AddImmediate(R1, R1, -kOneByteStringCid);
__ add(R1, R2, Operand(R1, LSL, kWordSizeLog2));
__ ldr(R0, FieldAddress(R1, RegExp::function_offset(kOneByteStringCid)));
__ ldr(R0,
FieldAddress(R1, RegExp::function_offset(kOneByteStringCid, sticky)));

// Registers are now set up for the lazy compile stub. It expects the function
// in R0, the argument descriptor in R4, and IC-Data in R9.
Expand Down
6 changes: 4 additions & 2 deletions runtime/vm/intrinsifier_arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2284,7 +2284,8 @@ void Intrinsifier::TwoByteString_equality(Assembler* assembler) {
}


void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
void Intrinsifier::IntrinsifyRegExpExecuteMatch(Assembler* assembler,
bool sticky) {
if (FLAG_interpret_irregexp) return;

static const intptr_t kRegExpParamOffset = 2 * kWordSize;
Expand All @@ -2303,7 +2304,8 @@ void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
__ LoadClassId(R1, R1);
__ AddImmediate(R1, R1, -kOneByteStringCid);
__ add(R1, R2, Operand(R1, LSL, kWordSizeLog2));
__ ldr(R0, FieldAddress(R1, RegExp::function_offset(kOneByteStringCid)));
__ ldr(R0,
FieldAddress(R1, RegExp::function_offset(kOneByteStringCid, sticky)));

// Registers are now set up for the lazy compile stub. It expects the function
// in R0, the argument descriptor in R4, and IC-Data in R5.
Expand Down
7 changes: 4 additions & 3 deletions runtime/vm/intrinsifier_ia32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2230,7 +2230,8 @@ void Intrinsifier::TwoByteString_equality(Assembler* assembler) {
}


void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
void Intrinsifier::IntrinsifyRegExpExecuteMatch(Assembler* assembler,
bool sticky) {
if (FLAG_interpret_irregexp) return;

static const intptr_t kRegExpParamOffset = 3 * kWordSize;
Expand All @@ -2248,8 +2249,8 @@ void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
__ movl(EDI, Address(ESP, kStringParamOffset));
__ LoadClassId(EDI, EDI);
__ SubImmediate(EDI, Immediate(kOneByteStringCid));
__ movl(EAX, FieldAddress(EBX, EDI, TIMES_4,
RegExp::function_offset(kOneByteStringCid)));
__ movl(EAX, FieldAddress(EBX, EDI, TIMES_4, RegExp::function_offset(
kOneByteStringCid, sticky)));

// Registers are now set up for the lazy compile stub. It expects the function
// in EAX, the argument descriptor in EDX, and IC-Data in ECX.
Expand Down
6 changes: 4 additions & 2 deletions runtime/vm/intrinsifier_mips.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2339,7 +2339,8 @@ void Intrinsifier::TwoByteString_equality(Assembler* assembler) {
}


void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
void Intrinsifier::IntrinsifyRegExpExecuteMatch(Assembler* assembler,
bool sticky) {
if (FLAG_interpret_irregexp) return;

static const intptr_t kRegExpParamOffset = 2 * kWordSize;
Expand All @@ -2359,7 +2360,8 @@ void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
__ AddImmediate(T2, -kOneByteStringCid);
__ sll(T2, T2, kWordSizeLog2);
__ addu(T2, T2, T1);
__ lw(T0, FieldAddress(T2, RegExp::function_offset(kOneByteStringCid)));
__ lw(T0,
FieldAddress(T2, RegExp::function_offset(kOneByteStringCid, sticky)));

// Registers are now set up for the lazy compile stub. It expects the function
// in T0, the argument descriptor in S4, and IC-Data in S5.
Expand Down
7 changes: 4 additions & 3 deletions runtime/vm/intrinsifier_x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2192,7 +2192,8 @@ void Intrinsifier::TwoByteString_equality(Assembler* assembler) {
}


void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
void Intrinsifier::IntrinsifyRegExpExecuteMatch(Assembler* assembler,
bool sticky) {
if (FLAG_interpret_irregexp) return;

static const intptr_t kRegExpParamOffset = 3 * kWordSize;
Expand All @@ -2210,8 +2211,8 @@ void Intrinsifier::RegExp_ExecuteMatch(Assembler* assembler) {
__ movq(RDI, Address(RSP, kStringParamOffset));
__ LoadClassId(RDI, RDI);
__ SubImmediate(RDI, Immediate(kOneByteStringCid));
__ movq(RAX, FieldAddress(RBX, RDI, TIMES_8,
RegExp::function_offset(kOneByteStringCid)));
__ movq(RAX, FieldAddress(RBX, RDI, TIMES_8, RegExp::function_offset(
kOneByteStringCid, sticky)));

// Registers are now set up for the lazy compile stub. It expects the function
// in RAX, the argument descriptor in R10, and IC-Data in RCX.
Expand Down
2 changes: 2 additions & 0 deletions runtime/vm/method_recognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ namespace dart {
0x25a786de) \
V(_GrowableList, add, GrowableArray_add, Dynamic, 0x0d1358ed) \
V(_RegExp, _ExecuteMatch, RegExp_ExecuteMatch, Dynamic, 0x6036d7fa) \
V(_RegExp, _ExecuteMatchSticky, RegExp_ExecuteMatchSticky, Dynamic, \
0x6036d7fa) \
V(Object, ==, ObjectEquals, Bool, 0x11662ed8) \
V(Object, get:runtimeType, ObjectRuntimeType, Type, 0x00e7c26b) \
V(Object, _haveSameRuntimeType, ObjectHaveSameRuntimeType, Bool, 0x72aad7e2) \
Expand Down
45 changes: 36 additions & 9 deletions runtime/vm/object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5696,21 +5696,36 @@ RawRegExp* Function::regexp() const {
}


class StickySpecialization : public BitField<intptr_t, bool, 0, 1> {};
class StringSpecializationCid
: public BitField<intptr_t, intptr_t, 1, RawObject::kClassIdTagSize> {};


intptr_t Function::string_specialization_cid() const {
ASSERT(kind() == RawFunction::kIrregexpFunction);
const Array& pair = Array::Cast(Object::Handle(raw_ptr()->data_));
return Smi::Value(Smi::RawCast(pair.At(1)));
return StringSpecializationCid::decode(Smi::Value(Smi::RawCast(pair.At(1))));
}


bool Function::is_sticky_specialization() const {
ASSERT(kind() == RawFunction::kIrregexpFunction);
const Array& pair = Array::Cast(Object::Handle(raw_ptr()->data_));
return StickySpecialization::decode(Smi::Value(Smi::RawCast(pair.At(1))));
}


void Function::SetRegExpData(const RegExp& regexp,
intptr_t string_specialization_cid) const {
intptr_t string_specialization_cid,
bool sticky) const {
ASSERT(kind() == RawFunction::kIrregexpFunction);
ASSERT(RawObject::IsStringClassId(string_specialization_cid));
ASSERT(raw_ptr()->data_ == Object::null());
const Array& pair = Array::Handle(Array::New(2, Heap::kOld));
pair.SetAt(0, regexp);
pair.SetAt(1, Smi::Handle(Smi::New(string_specialization_cid)));
pair.SetAt(1, Smi::Handle(Smi::New(StickySpecialization::encode(sticky) |
StringSpecializationCid::encode(
string_specialization_cid))));
set_data(pair);
}

Expand Down Expand Up @@ -22394,16 +22409,28 @@ void RegExp::set_pattern(const String& pattern) const {
}


void RegExp::set_function(intptr_t cid, const Function& value) const {
StorePointer(FunctionAddr(cid), value.raw());
void RegExp::set_function(intptr_t cid,
bool sticky,
const Function& value) const {
StorePointer(FunctionAddr(cid, sticky), value.raw());
}


void RegExp::set_bytecode(bool is_one_byte, const TypedData& bytecode) const {
if (is_one_byte) {
StorePointer(&raw_ptr()->one_byte_bytecode_, bytecode.raw());
void RegExp::set_bytecode(bool is_one_byte,
bool sticky,
const TypedData& bytecode) const {
if (sticky) {
if (is_one_byte) {
StorePointer(&raw_ptr()->one_byte_sticky_.bytecode_, bytecode.raw());
} else {
StorePointer(&raw_ptr()->two_byte_sticky_.bytecode_, bytecode.raw());
}
} else {
StorePointer(&raw_ptr()->two_byte_bytecode_, bytecode.raw());
if (is_one_byte) {
StorePointer(&raw_ptr()->one_byte_.bytecode_, bytecode.raw());
} else {
StorePointer(&raw_ptr()->two_byte_.bytecode_, bytecode.raw());
}
}
}

Expand Down
62 changes: 43 additions & 19 deletions runtime/vm/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -2215,8 +2215,10 @@ class Function : public Object {

RawRegExp* regexp() const;
intptr_t string_specialization_cid() const;
bool is_sticky_specialization() const;
void SetRegExpData(const RegExp& regexp,
intptr_t string_specialization_cid) const;
intptr_t string_specialization_cid,
bool sticky) const;

RawString* native_name() const;
void set_native_name(const String& name) const;
Expand Down Expand Up @@ -8451,37 +8453,59 @@ class RegExp : public Instance {
return raw_ptr()->num_bracket_expressions_;
}

RawTypedData* bytecode(bool is_one_byte) const {
return is_one_byte ? raw_ptr()->one_byte_bytecode_
: raw_ptr()->two_byte_bytecode_;
RawTypedData* bytecode(bool is_one_byte, bool sticky) const {
if (sticky) {
return is_one_byte ? raw_ptr()->one_byte_sticky_.bytecode_
: raw_ptr()->two_byte_sticky_.bytecode_;
} else {
return is_one_byte ? raw_ptr()->one_byte_.bytecode_
: raw_ptr()->two_byte_.bytecode_;
}
}

static intptr_t function_offset(intptr_t cid) {
switch (cid) {
case kOneByteStringCid:
return OFFSET_OF(RawRegExp, one_byte_function_);
case kTwoByteStringCid:
return OFFSET_OF(RawRegExp, two_byte_function_);
case kExternalOneByteStringCid:
return OFFSET_OF(RawRegExp, external_one_byte_function_);
case kExternalTwoByteStringCid:
return OFFSET_OF(RawRegExp, external_two_byte_function_);
static intptr_t function_offset(intptr_t cid, bool sticky) {
if (sticky) {
switch (cid) {
case kOneByteStringCid:
return OFFSET_OF(RawRegExp, one_byte_sticky_.function_);
case kTwoByteStringCid:
return OFFSET_OF(RawRegExp, two_byte_sticky_.function_);
case kExternalOneByteStringCid:
return OFFSET_OF(RawRegExp, external_one_byte_sticky_function_);
case kExternalTwoByteStringCid:
return OFFSET_OF(RawRegExp, external_two_byte_sticky_function_);
}
} else {
switch (cid) {
case kOneByteStringCid:
return OFFSET_OF(RawRegExp, one_byte_.function_);
case kTwoByteStringCid:
return OFFSET_OF(RawRegExp, two_byte_.function_);
case kExternalOneByteStringCid:
return OFFSET_OF(RawRegExp, external_one_byte_function_);
case kExternalTwoByteStringCid:
return OFFSET_OF(RawRegExp, external_two_byte_function_);
}
}

UNREACHABLE();
return -1;
}

RawFunction** FunctionAddr(intptr_t cid) const {
RawFunction** FunctionAddr(intptr_t cid, bool sticky) const {
return reinterpret_cast<RawFunction**>(
FieldAddrAtOffset(function_offset(cid)));
FieldAddrAtOffset(function_offset(cid, sticky)));
}

RawFunction* function(intptr_t cid) const { return *FunctionAddr(cid); }
RawFunction* function(intptr_t cid, bool sticky) const {
return *FunctionAddr(cid, sticky);
}

void set_pattern(const String& pattern) const;
void set_function(intptr_t cid, const Function& value) const;
void set_bytecode(bool is_one_byte, const TypedData& bytecode) const;
void set_function(intptr_t cid, bool sticky, const Function& value) const;
void set_bytecode(bool is_one_byte,
bool sticky,
const TypedData& bytecode) const;

void set_num_bracket_expressions(intptr_t value) const;
void set_is_global() const { set_flags(flags() | kGlobal); }
Expand Down
Loading

0 comments on commit 2403444

Please sign in to comment.