Skip to content

Commit 6ca25ec

Browse files
committed
Bug 1499170 - Add an atom bit to know whether we're ascii lowercase. r=njn
And thus massively speed up ascii-case-insensitive atom comparisons when both atoms are lowercase (which is the common case by far). This removes almost all the slow selector-matching in this page, and it seems an easier fix than storing the lowercased version of all class-names in quirks mode in elements and selectors... Differential Revision: https://phabricator.services.mozilla.com/D10945 --HG-- extra : moz-landing-system : lando
1 parent 601e1df commit 6ca25ec

File tree

10 files changed

+141
-117
lines changed

10 files changed

+141
-117
lines changed

dom/base/nsAttrValue.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,16 +1169,13 @@ nsAttrValue::Contains(nsAtom* aValue, nsCaseTreatment aCaseSensitive) const
11691169
case eAtomBase:
11701170
{
11711171
nsAtom* atom = GetAtomValue();
1172-
11731172
if (aCaseSensitive == eCaseMatters) {
11741173
return aValue == atom;
11751174
}
11761175

11771176
// For performance reasons, don't do a full on unicode case insensitive
11781177
// string comparison. This is only used for quirks mode anyway.
1179-
return
1180-
nsContentUtils::EqualsIgnoreASCIICase(nsDependentAtomString(aValue),
1181-
nsDependentAtomString(atom));
1178+
return nsContentUtils::EqualsIgnoreASCIICase(aValue, atom);
11821179
}
11831180
default:
11841181
{
@@ -1188,16 +1185,11 @@ nsAttrValue::Contains(nsAtom* aValue, nsCaseTreatment aCaseSensitive) const
11881185
return array->Contains(aValue);
11891186
}
11901187

1191-
nsDependentAtomString val1(aValue);
1192-
1193-
for (RefPtr<nsAtom> *cur = array->Elements(),
1194-
*end = cur + array->Length();
1195-
cur != end; ++cur) {
1188+
for (RefPtr<nsAtom>& cur : *array) {
11961189
// For performance reasons, don't do a full on unicode case
11971190
// insensitive string comparison. This is only used for quirks mode
11981191
// anyway.
1199-
if (nsContentUtils::EqualsIgnoreASCIICase(val1,
1200-
nsDependentAtomString(*cur))) {
1192+
if (nsContentUtils::EqualsIgnoreASCIICase(aValue, cur)) {
12011193
return true;
12021194
}
12031195
}

dom/base/nsContentUtils.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2058,6 +2058,25 @@ class nsContentUtils
20582058

20592059
static JSContext *GetCurrentJSContext();
20602060

2061+
/**
2062+
* Case insensitive comparison between two atoms.
2063+
*/
2064+
static bool EqualsIgnoreASCIICase(nsAtom* aAtom1, nsAtom* aAtom2)
2065+
{
2066+
if (aAtom1 == aAtom2) {
2067+
return true;
2068+
}
2069+
2070+
// If both are ascii lowercase already, we know that the slow comparison
2071+
// below is going to return false.
2072+
if (aAtom1->IsAsciiLowercase() && aAtom2->IsAsciiLowercase()) {
2073+
return false;
2074+
}
2075+
2076+
return EqualsIgnoreASCIICase(nsDependentAtomString(aAtom1),
2077+
nsDependentAtomString(aAtom2));
2078+
}
2079+
20612080
/**
20622081
* Case insensitive comparison between two strings. However it only ignores
20632082
* case for ASCII characters a-z.

servo/components/style/gecko/regen_atoms.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616
import build
1717

1818

19-
# Matches lines like `GK_ATOM(foo, "foo", 0x12345678, nsStaticAtom, PseudoElementAtom)`.
20-
PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*(0x[0-9a-f]+),\s*([^,]*),\s*([^)]*)\)',
19+
# Matches lines like `GK_ATOM(foo, "foo", 0x12345678, true, nsStaticAtom, PseudoElementAtom)`.
20+
PATTERN = re.compile('^GK_ATOM\(([^,]*),[^"]*"([^"]*)",\s*(0x[0-9a-f]+),\s*[^,]*,\s*([^,]*),\s*([^)]*)\)',
2121
re.MULTILINE)
2222
FILE = "include/nsGkAtomList.h"
2323

servo/components/style/gecko_string_cache/mod.rs

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,19 @@ impl WeakAtom {
175175
/// Returns whether this atom is static.
176176
#[inline]
177177
pub fn is_static(&self) -> bool {
178-
unsafe { (*self.as_ptr()).mIsStatic() != 0 }
178+
self.0.mIsStatic() != 0
179+
}
180+
181+
/// Returns whether this atom is ascii lowercase.
182+
#[inline]
183+
fn is_ascii_lowercase(&self) -> bool {
184+
self.0.mIsAsciiLowercase() != 0
179185
}
180186

181187
/// Returns the length of the atom string.
182188
#[inline]
183189
pub fn len(&self) -> u32 {
184-
unsafe { (*self.as_ptr()).mLength() }
190+
self.0.mLength()
185191
}
186192

187193
/// Returns whether this atom is the empty string.
@@ -199,55 +205,61 @@ impl WeakAtom {
199205

200206
/// Convert this atom to ASCII lower-case
201207
pub fn to_ascii_lowercase(&self) -> Atom {
208+
if self.is_ascii_lowercase() {
209+
return self.clone();
210+
}
211+
202212
let slice = self.as_slice();
203-
match slice
204-
.iter()
205-
.position(|&char16| (b'A' as u16) <= char16 && char16 <= (b'Z' as u16))
206-
{
207-
None => self.clone(),
208-
Some(i) => {
209-
let mut buffer: [u16; 64] = unsafe { mem::uninitialized() };
210-
let mut vec;
211-
let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) {
212-
buffer_prefix.copy_from_slice(slice);
213-
buffer_prefix
214-
} else {
215-
vec = slice.to_vec();
216-
&mut vec
217-
};
218-
for char16 in &mut mutable_slice[i..] {
219-
if *char16 <= 0x7F {
220-
*char16 = (*char16 as u8).to_ascii_lowercase() as u16
221-
}
222-
}
223-
Atom::from(&*mutable_slice)
224-
},
213+
let mut buffer: [u16; 64] = unsafe { mem::uninitialized() };
214+
let mut vec;
215+
let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) {
216+
buffer_prefix.copy_from_slice(slice);
217+
buffer_prefix
218+
} else {
219+
vec = slice.to_vec();
220+
&mut vec
221+
};
222+
for char16 in &mut *mutable_slice {
223+
if *char16 <= 0x7F {
224+
*char16 = (*char16 as u8).to_ascii_lowercase() as u16
225+
}
225226
}
227+
Atom::from(&*mutable_slice)
226228
}
227229

228230
/// Return whether two atoms are ASCII-case-insensitive matches
231+
#[inline]
229232
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
230233
if self == other {
231234
return true;
232235
}
233236

237+
// If we know both atoms are ascii-lowercase, then we can stick with
238+
// pointer equality.
239+
if self.is_ascii_lowercase() && other.is_ascii_lowercase() {
240+
debug_assert!(!self.eq_ignore_ascii_case_slow(other));
241+
return false;
242+
}
243+
244+
self.eq_ignore_ascii_case_slow(other)
245+
}
246+
247+
fn eq_ignore_ascii_case_slow(&self, other: &Self) -> bool {
234248
let a = self.as_slice();
235249
let b = other.as_slice();
236-
a.len() == b.len() && a.iter().zip(b).all(|(&a16, &b16)| {
250+
251+
if a.len() != b.len() {
252+
return false;
253+
}
254+
255+
a.iter().zip(b).all(|(&a16, &b16)| {
237256
if a16 <= 0x7F && b16 <= 0x7F {
238257
(a16 as u8).eq_ignore_ascii_case(&(b16 as u8))
239258
} else {
240259
a16 == b16
241260
}
242261
})
243262
}
244-
245-
/// Return whether this atom is an ASCII-case-insensitive match for the given string
246-
pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool {
247-
self.chars()
248-
.map(|r| r.map(|c: char| c.to_ascii_lowercase()))
249-
.eq(other.chars().map(|c: char| Ok(c.to_ascii_lowercase())))
250-
}
251263
}
252264

253265
impl fmt::Debug for WeakAtom {

xpcom/ds/Atom.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ def __init__(self, ident, string, ty="nsStaticAtom"):
1010
self.ty = ty
1111
self.atom_type = self.__class__.__name__
1212
self.hash = hash_string(string)
13+
self.is_ascii_lowercase = is_ascii_lowercase(string)
1314

1415

1516
class PseudoElementAtom(Atom):
@@ -52,3 +53,12 @@ def hash_string(s):
5253
for c in s:
5354
h = wrapping_multiply(GOLDEN_RATIO_U32, rotate_left_5(h) ^ ord(c))
5455
return h
56+
57+
58+
# Returns true if lowercasing this string in an ascii-case-insensitive way
59+
# would leave the string unchanged.
60+
def is_ascii_lowercase(s):
61+
for c in s:
62+
if c >= 'A' and c <= 'Z':
63+
return False
64+
return True

xpcom/ds/StaticAtoms.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2355,9 +2355,9 @@ def generate_nsgkatomlist_h(output, *ignore):
23552355
"#ifdef small\n"
23562356
"#undef small\n"
23572357
"#endif\n\n"
2358-
"// GK_ATOM(identifier, string, hash, gecko_type, atom_type)\n" +
2359-
"".join(["GK_ATOM(%s, \"%s\", 0x%08x, %s, %s)\n" %
2360-
(a.ident, a.string, a.hash, a.ty, a.atom_type)
2358+
"// GK_ATOM(identifier, string, hash, is_ascii_lower, gecko_type, atom_type)\n" +
2359+
"".join(["GK_ATOM(%s, \"%s\", 0x%08x, %s, %s, %s)\n" %
2360+
(a.ident, a.string, a.hash, str(a.is_ascii_lowercase).lower(), a.ty, a.atom_type)
23612361
for a in STATIC_ATOMS]))
23622362

23632363

xpcom/ds/nsAtom.h

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ class nsDynamicAtom;
2121
// This class encompasses both static and dynamic atoms.
2222
//
2323
// - In places where static and dynamic atoms can be used, use RefPtr<nsAtom>.
24-
// This is by far the most common case. (The exception to this is the HTML5
25-
// parser, which does its own weird thing, and uses non-refcounted dynamic
26-
// atoms.)
24+
// This is by far the most common case.
2725
//
2826
// - In places where only static atoms can appear, use nsStaticAtom* to avoid
2927
// unnecessary refcounting. This is a moderately common case.
@@ -75,6 +73,13 @@ class nsAtom
7573
//
7674
uint32_t hash() const { return mHash; }
7775

76+
// This function returns true if ToLowercaseASCII would return the string
77+
// unchanged.
78+
bool IsAsciiLowercase() const
79+
{
80+
return mIsAsciiLowercase;
81+
}
82+
7883
// We can't use NS_INLINE_DECL_THREADSAFE_REFCOUNTING because the refcounting
7984
// of this type is special.
8085
MozExternalRefCountType AddRef();
@@ -84,25 +89,29 @@ class nsAtom
8489

8590
protected:
8691
// Used by nsStaticAtom.
87-
constexpr nsAtom(uint32_t aLength, uint32_t aHash)
92+
constexpr nsAtom(uint32_t aLength, uint32_t aHash, bool aIsAsciiLowercase)
8893
: mLength(aLength)
8994
, mIsStatic(true)
95+
, mIsAsciiLowercase(aIsAsciiLowercase)
9096
, mHash(aHash)
9197
{}
9298

9399
// Used by nsDynamicAtom.
94-
nsAtom(const nsAString& aString, uint32_t aHash)
100+
nsAtom(const nsAString& aString,
101+
uint32_t aHash,
102+
bool aIsAsciiLowercase)
95103
: mLength(aString.Length())
96104
, mIsStatic(false)
105+
, mIsAsciiLowercase(aIsAsciiLowercase)
97106
, mHash(aHash)
98107
{
99108
}
100109

101110
~nsAtom() = default;
102111

103112
const uint32_t mLength:30;
104-
// NOTE: There's one free bit here.
105113
const uint32_t mIsStatic:1;
114+
const uint32_t mIsAsciiLowercase:1;
106115
const uint32_t mHash;
107116
};
108117

@@ -123,8 +132,8 @@ class nsStaticAtom : public nsAtom
123132
// Atom.py and assert in nsAtomTable::RegisterStaticAtoms that the two
124133
// hashes match.
125134
constexpr nsStaticAtom(uint32_t aLength, uint32_t aHash,
126-
uint32_t aStringOffset)
127-
: nsAtom(aLength, aHash)
135+
uint32_t aStringOffset, bool aIsAsciiLowercase)
136+
: nsAtom(aLength, aHash, aIsAsciiLowercase)
128137
, mStringOffset(aStringOffset)
129138
{}
130139

@@ -167,14 +176,10 @@ class nsDynamicAtom : public nsAtom
167176

168177
// These shouldn't be used directly, even by friend classes. The
169178
// Create()/Destroy() methods use them.
170-
static nsDynamicAtom* CreateInner(const nsAString& aString, uint32_t aHash);
171-
nsDynamicAtom(const nsAString& aString, uint32_t aHash);
179+
nsDynamicAtom(const nsAString& aString, uint32_t aHash, bool aIsAsciiLowercase);
172180
~nsDynamicAtom() {}
173181

174-
// Creation/destruction is done by friend classes. The first Create() is for
175-
// dynamic normal atoms, the second is for dynamic HTML5 atoms.
176182
static nsDynamicAtom* Create(const nsAString& aString, uint32_t aHash);
177-
static nsDynamicAtom* Create(const nsAString& aString);
178183
static void Destroy(nsDynamicAtom* aAtom);
179184

180185
mozilla::ThreadSafeAutoRefCnt mRefCnt;

0 commit comments

Comments
 (0)