Skip to content

Commit

Permalink
Improve RegExp Fuzzing.
Browse files Browse the repository at this point in the history
Improve RegExp Fuzzing by adding more interesting patterns and using a
template based on the regexp-builtins.cc fuzzer by jgruber@.
  • Loading branch information
carl-smith committed Apr 25, 2023
1 parent 28731f8 commit 48163c8
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 9 deletions.
61 changes: 59 additions & 2 deletions Sources/Fuzzilli/Environment/JavaScriptEnvironment.swift
Expand Up @@ -48,8 +48,65 @@ public class JavaScriptEnvironment: ComponentBase, Environment {
// TODO more?
public let interestingStrings = jsTypeNames

// TODO more?
public let interestingRegExps = [".", "\\d", "\\w", "\\s", "\\D", "\\W", "\\S"]
public let interestingRegExps = [
".", // Always matches.
"\\P{Any}", // Never matches.
"^", // Zero-width assertion, matches once.
"(?=.)", // Zero-width assertion, matches at every position.
"\\b", // Zero-width assertion, matches at each word boundary.
"()", // Zero-width assertion, matches at every position with groups.
"(?<a>)", // Likewise but with named groups.
"((((.).).).)", "(?<a>(?<b>(?<c>(?<d>.).).).)",
// Copied from
// https://cs.chromium.org/chromium/src/testing/libfuzzer/fuzzers/dicts/regexp.dict
"?", "abc", "()", "[]", "abc|def", "abc|def|ghi", "^xxx$",
"ab\\b\\d\\bcd", "\\w|\\d", "a*?", "abc+", "abc+?", "xyz?", "xyz??",
"xyz{0,1}", "xyz{0,1}?", "xyz{93}", "xyz{1,32}", "xyz{1,32}?", "xyz{1,}",
"xyz{1,}?", "a\\fb\\nc\\rd\\te\\vf", "a\\nb\\bc", "(?:foo)", "(?: foo )",
"foo|(bar|baz)|quux", "foo(?=bar)baz", "foo(?!bar)baz", "foo(?<=bar)baz",
"foo(?<!bar)baz", "()", "(?=)", "[]", "[x]", "[xyz]", "[a-zA-Z0-9]",
"[-123]", "[^123]", "]", "}", "[a-b-c]", "[x\\dz]", "[\\d-z]",
"[\\d-\\d]", "[z-\\d]", "\\cj\\cJ\\ci\\cI\\ck\\cK", "\\c!", "\\c_",
"\\c~", "[\\c!]", "[\\c_]", "[\\c~]", "[\\ca]", "[\\cz]", "[\\cA]",
"[\\cZ]", "[\\c1]", "\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ",
"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "\\8", "\\9", "\\11", "\\11a",
"\\011", "\\118", "\\111", "\\1111", "(x)(x)(x)\\1", "(x)(x)(x)\\2",
"(x)(x)(x)\\3", "(x)(x)(x)\\4", "(x)(x)(x)\\1*", "(x)(x)(x)\\3*",
"(x)(x)(x)\\4*", "(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11", "(a)\\1", "(a\\1)", "(\\1a)",
"(\\2)(\\1)", "(?=a){0,10}a", "(?=a){1,10}a", "(?=a){9,10}a", "(?!a)?a",
"\\1(a)", "(?!(a))\\1", "(?!\\1(a\\1)\\1)\\1",
"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1", "[\\0]", "[\\11]", "[\\11a]",
"[\\011]", "[\\00011]", "[\\118]", "[\\111]", "[\\1111]", "\\x60",
"\\x3z", "\\c", "\\u0034", "\\u003z", "foo[z]*", "\\u{12345}",
"\\u{12345}\\u{23456}", "\\u{12345}{3}", "\\u{12345}*", "\\ud808\\udf45*",
"[\\ud808\\udf45-\\ud809\\udccc]", "a", "a|b", "a\\n", "a$", "a\\b!",
"a\\Bb", "a*?", "a?", "a??", "a{0,1}?", "a{1,2}?", "a+?", "(a)", "(a)\\1",
"(\\1a)", "\\1(a)", "a\\s", "a\\S", "a\\D", "a\\w", "a\\W", "a.", "a\\q",
"a[a]", "a[^a]", "a[a-z]", "a(?:b)", "a(?=b)", "a(?!b)", "\\x60",
"\\u0060", "\\cA", "\\q", "\\1112", "(a)\\1", "(?!a)?a\\1",
"(?:(?=a))a\\1", "a{}", "a{,}", "a{", "a{z}", "a{12z}", "a{12,",
"a{12,3b", "{}", "{,}", "{", "{z}", "{1z}", "{12,", "{12,3b", "a", "abc",
"a[bc]d", "a|bc", "ab|c", "a||bc", "(?:ab)", "(?:ab|cde)", "(?:ab)|cde",
"(ab)", "(ab|cde)", "(ab)\\1", "(ab|cde)\\1", "(?:ab)?", "(?:ab)+", "a?",
"a+", "a??", "a*?", "a+?", "(?:a?)?", "(?:a+)?", "(?:a?)+", "(?:a*)+",
"(?:a+)+", "(?:a?)*", "(?:a*)*", "(?:a+)*", "a{0}", "(?:a+){0,0}", "a*b",
"a+b", "a*b|c", "a+b|c", "(?:a{5,1000000}){3,1000000}", "(?:ab){4,7}",
"a\\bc", "a\\sc", "a\\Sc", "a(?=b)c", "a(?=bbb|bb)c", "a(?!bbb|bb)c",
"\\xe2\\x81\\xa3", "[\\xe2\\x81\\xa3]", "\\xed\\xb0\\x80", "\\xed\\xa0\\x80",
"(\\xed\\xb0\\x80)\\x01", "((\\xed\\xa0\\x80))\\x02", "\\xf0\\x9f\\x92\\xa9", "\\x01",
"\\x0f", "[-\\xf0\\x9f\\x92\\xa9]+", "[\\xf0\\x9f\\x92\\xa9-\\xf4\\x8f\\xbf\\xbf]",
"(?<=)", "(?<=a)", "(?<!)", "(?<!a)", "(?<a>)", "(?<a>.)",
"(?<a>.)\\k<a>", "\\p{Script=Greek}", "\\P{sc=Greek}",
"\\p{Script_Extensions=Greek}", "\\P{scx=Greek}",
"\\p{General_Category=Decimal_Number}", "\\P{gc=Decimal_Number}",
"\\p{gc=Nd}", "\\P{Decimal_Number}", "\\p{Nd}", "\\P{Any}",
"\\p{Changes_When_NFKC_Casefolded}",
"[\\p{Script_Extensions=Greek}--[α-γ]]",
"[\\p{Script_Extensions=Mongolian}&&\\p{Number}]",
"[\\q{abc|def|0|5}--\\d]"
]

public let interestingRegExpQuantifiers = ["*", "+", "?"]

public let intType = JSType.integer
Expand Down
37 changes: 30 additions & 7 deletions Sources/Fuzzilli/FuzzIL/JsOperations.swift
Expand Up @@ -154,6 +154,10 @@ public struct RegExpFlags: OptionSet, Hashable {

public func asString() -> String {
var strRepr = ""

// These flags are mutually exclusive, will lead to runtime exceptions if used together
assert(!(contains(.unicode) && contains(.unicodeSets)))

for (flag, char) in RegExpFlags.flagToCharDict {
if contains(flag) {
strRepr += char
Expand All @@ -178,22 +182,39 @@ public struct RegExpFlags: OptionSet, Hashable {
flags.formUnion(.unicode)
case "y":
flags.formUnion(.sticky)
case "d":
flags.formUnion(.hasIndices)
case "v":
flags.formUnion(.unicodeSets)
default:
return nil
}
}
// These flags are mutually exclusive, will lead to runtime exceptions if used together
assert(!(flags.contains(.unicode) && flags.contains(.unicodeSets)))
return flags
}

static let caseInsensitive = RegExpFlags(rawValue: 1 << 0)
static let global = RegExpFlags(rawValue: 1 << 1)
static let multiline = RegExpFlags(rawValue: 1 << 2)
static let dotall = RegExpFlags(rawValue: 1 << 3)
static let unicode = RegExpFlags(rawValue: 1 << 4)
static let sticky = RegExpFlags(rawValue: 1 << 5)
static let caseInsensitive = RegExpFlags(rawValue: 1 << 0) // i
static let global = RegExpFlags(rawValue: 1 << 1) // g
static let multiline = RegExpFlags(rawValue: 1 << 2) // m
static let dotall = RegExpFlags(rawValue: 1 << 3) // s
static let unicode = RegExpFlags(rawValue: 1 << 4) // u
static let sticky = RegExpFlags(rawValue: 1 << 5) // y
static let hasIndices = RegExpFlags(rawValue: 1 << 6) // d
static let unicodeSets = RegExpFlags(rawValue: 1 << 7) // v

public static func random() -> RegExpFlags {
return RegExpFlags(rawValue: UInt32.random(in: 0..<(1<<6)))
var flags = RegExpFlags(rawValue: UInt32.random(in: 0..<(1<<8)))
if flags.contains(.unicode) && flags.contains(.unicodeSets) {
// clear one of them as they are mutually exclusive, they will throw a runtime exception if used together.
withEqualProbability({
flags.subtract(.unicode)
}, {
flags.subtract(.unicodeSets)
})
}
return flags
}

private static let flagToCharDict: [RegExpFlags:String] = [
Expand All @@ -203,6 +224,8 @@ public struct RegExpFlags: OptionSet, Hashable {
.dotall: "s",
.unicode: "u",
.sticky: "y",
.hasIndices: "d",
.unicodeSets: "v",
]
}

Expand Down
101 changes: 101 additions & 0 deletions Sources/FuzzilliCli/Profiles/V8Profile.swift
Expand Up @@ -304,6 +304,106 @@ fileprivate let MapTransitionsTemplate = ProgramTemplate("MapTransitionsTemplate
}
}

// This template fuzzes the RegExp engine.
// It finds bugs like: crbug.com/1437346 and crbug.com/1439691.
fileprivate let RegExpFuzzerTemplate = ProgramTemplate("RegExpFuzzerTemplate") { b in
// Taken from: https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:v8/test/fuzzer/regexp-builtins.cc;l=212;drc=a61b95c63b0b75c1cfe872d9c8cdf927c226046e
let twoByteSubjectString = "f\\uD83D\\uDCA9ba\\u2603"

let replacementCandidates = [
"'X'",
"'$1$2$3'",
"'$$$&$`$\\'$1'",
"() => 'X'",
"(arg0, arg1, arg2, arg3, arg4) => arg0 + arg1 + arg2 + arg3 + arg4",
"() => 42"
]

let lastIndices = [
"undefined", "-1", "0",
"1", "2", "3",
"4", "5", "6",
"7", "8", "9",
"50", "4294967296", "2147483647",
"2147483648", "NaN", "Not a Number"
]

let f = b.buildPlainFunction(with: .parameters(n: 0)) { _ in
let pattern = probability(0.5) ? chooseUniform(from: b.fuzzer.environment.interestingRegExps) : b.randomString()
let regExpVar = b.loadRegExp(pattern, RegExpFlags.random())

let lastIndex = chooseUniform(from: lastIndices)
let lastIndexString = b.loadString(lastIndex)

b.setProperty("lastIndex", of: regExpVar, to: lastIndexString)

let subjectVar: Variable

if probability(0.1) {
subjectVar = b.loadString(twoByteSubjectString)
} else {
subjectVar = b.loadString(b.randomString())
}

let resultVar = b.loadNull()

b.buildTryCatchFinally(tryBody: {
let symbol = b.loadBuiltin("Symbol")
withEqualProbability({
let res = b.callMethod("exec", on: regExpVar, withArgs: [subjectVar])
b.reassign(resultVar, to: res)
}, {
let prop = b.getProperty("match", of: symbol)
let res = b.callComputedMethod(prop, on: regExpVar, withArgs: [subjectVar])
b.reassign(resultVar, to: res)
}, {
let prop = b.getProperty("replace", of: symbol)
let replacement = withEqualProbability({
b.loadString(b.randomString())
}, {
b.loadString(chooseUniform(from: replacementCandidates))
})
let res = b.callComputedMethod(prop, on: regExpVar, withArgs: [subjectVar, replacement])
b.reassign(resultVar, to: res)
}, {
let prop = b.getProperty("search", of: symbol)
let res = b.callComputedMethod(prop, on: regExpVar, withArgs: [subjectVar])
b.reassign(resultVar, to: res)
}, {
let prop = b.getProperty("split", of: symbol)
let randomSplitLimit = withEqualProbability({
"undefined"
}, {
"'not a number'"
}, {
String(b.randomInt())
})
let limit = b.loadString(randomSplitLimit)
let res = b.callComputedMethod(symbol, on: regExpVar, withArgs: [subjectVar, limit])
b.reassign(resultVar, to: res)
}, {
let res = b.callMethod("test", on: regExpVar, withArgs: [subjectVar])
b.reassign(resultVar, to: res)
})
}, catchBody: { _ in
})

b.build(n: 7)

b.doReturn(resultVar)
}

b.eval("%SetForceSlowPath(false)");
// compile the regexp once
b.callFunction(f)
let resFast = b.callFunction(f)
b.eval("%SetForceSlowPath(true)");
let resSlow = b.callFunction(f)
b.eval("%SetForceSlowPath(false)");

b.build(n: 15)
}

let v8Profile = Profile(
processArgs: { randomize in
var args = [
Expand Down Expand Up @@ -432,6 +532,7 @@ let v8Profile = Profile(

additionalProgramTemplates: WeightedList<ProgramTemplate>([
(MapTransitionsTemplate, 1),
(RegExpFuzzerTemplate, 1),
]),

disabledCodeGenerators: [],
Expand Down

0 comments on commit 48163c8

Please sign in to comment.