From 9849e59bff594ac31575ac1278163123432716b2 Mon Sep 17 00:00:00 2001 From: "ricow@chromium.org" Date: Thu, 8 Sep 2011 16:33:10 +0000 Subject: [PATCH] Version 3.6.2. Added "dependencies" target to top-level Makefile. Added ability to turn profiler on/off in d8. Added "soname_version" parameter to common.gypi, v8.gyp, and Makefile. Fixed several crash bugs. git-svn-id: https://v8.googlecode.com/svn/trunk@9201 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- .gitignore | 5 +- ChangeLog | 31 +- Makefile | 12 +- build/common.gypi | 3 + src/SConscript | 18 +- src/api.cc | 1 + src/ast.cc | 5 - src/ast.h | 9 - src/bignum-dtoa.cc | 7 +- src/compiler.cc | 1 + src/conversions-inl.h | 20 +- src/conversions.cc | 5 +- src/conversions.h | 6 +- src/d8.cc | 27 +- src/d8.h | 15 +- src/dateparser.h | 1 - src/dtoa.cc | 7 +- src/fast-dtoa.cc | 6 +- src/fixed-dtoa.cc | 6 +- src/full-codegen.cc | 19 +- src/globals.h | 29 + src/heap.cc | 44 +- src/hydrogen.cc | 12 +- src/isolate.cc | 1 - src/mips/full-codegen-mips.cc | 625 ++++++------- src/objects-debug.cc | 6 +- src/objects.cc | 1 - src/parser.cc | 8 +- src/parser.h | 3 +- src/platform-win32.cc | 70 +- src/platform.h | 69 +- src/preparser-api.cc | 8 +- src/preparser.cc | 270 +++++- src/preparser.h | 103 +- src/prettyprinter.cc | 15 - src/rewriter.cc | 1 - src/runtime.cc | 322 ++++--- src/scanner-base.cc | 1090 --------------------- src/scanner-base.h | 562 ----------- src/scanner-character-streams.cc | 328 +++++++ src/scanner-character-streams.h | 129 +++ src/scanner.cc | 1196 +++++++++++++++++++----- src/scanner.h | 559 +++++++++-- src/smart-pointer.h | 43 +- src/strtod.cc | 7 +- src/utils.h | 78 +- src/v8conversions.cc | 1 - src/version.cc | 2 +- src/win32-math.cc | 106 +++ src/win32-math.h | 61 ++ test/cctest/test-parsing.cc | 10 +- test/cctest/test-profile-generator.cc | 6 +- test/cctest/test-utils.cc | 12 + test/mjsunit/string-replace.js | 5 + test/preparser/duplicate-parameter.pyt | 90 ++ test/preparser/duplicate-property.pyt | 162 ++++ test/preparser/testcfg.py | 6 +- tools/gyp/v8.gyp | 61 +- tools/presubmit.py | 8 +- tools/push-to-trunk.sh | 424 +++++++++ 60 files changed, 3961 insertions(+), 2776 deletions(-) delete mode 100644 src/scanner-base.cc delete mode 100644 src/scanner-base.h create mode 100644 src/scanner-character-streams.cc create mode 100644 src/scanner-character-streams.h create mode 100644 src/win32-math.cc create mode 100644 src/win32-math.h create mode 100644 test/preparser/duplicate-parameter.pyt create mode 100644 test/preparser/duplicate-property.pyt create mode 100755 tools/push-to-trunk.sh diff --git a/.gitignore b/.gitignore index 253639dc5db..b61faef74ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,17 +1,18 @@ *.a *.exe +*.idb *.lib *.log *.map *.mk *.ncb +*.pdb *.pyc *.scons* +*.so *.suo *.user *.xcodeproj -*.idb -*.pdb #*# *~ .cpplint-cache diff --git a/ChangeLog b/ChangeLog index d3e0dc642a0..817aa3a4ee2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2011-09-08: Version 3.6.2 + + Added "dependencies" target to top-level Makefile. + + Added ability to turn profiler on/off in d8. + + Added "soname_version" parameter to common.gypi, v8.gyp, and Makefile. + + Fixed several crash bugs. + + 2011-09-07: Version 3.6.1 Fixed a bug in abrupt exit from with or catch inside finally. @@ -14,23 +25,23 @@ 2011-09-05: Version 3.6.0 - Fixed a bug when optimizing named function expression (issue 1647). + Fixed a bug when optimizing named function expression (issue 1647). - Fixed a bug when optimizing f.call.apply (issue 1650). + Fixed a bug when optimizing f.call.apply (issue 1650). - Made arguments and caller always be null on native functions - (issues 1548 and 1643). + Made arguments and caller always be null on native functions + (issues 1548 and 1643). - Fixed issue 1648 (cross-compiling x64 targeting ia32). + Fixed issue 1648 (cross-compiling x64 targeting ia32). - Fixed issue 371 (d8 printing of strings containing \0). + Fixed issue 371 (d8 printing of strings containing \0). - Fixed order of evaluation in arguments to parseInt (issue 1649). + Fixed order of evaluation in arguments to parseInt (issue 1649). - Fixed a problem with large heap snapshots in Chrome DevTools - (issue 1658, chromium issue 89268). + Fixed a problem with large heap snapshots in Chrome DevTools + (issue 1658, chromium issue 89268). - Upped default maximum heap size from 512M to 700M. + Upped default maximum heap size from 512M to 700M. 2011-08-31: Version 3.5.10 diff --git a/Makefile b/Makefile index 618bbef1d06..a7b27317a3a 100644 --- a/Makefile +++ b/Makefile @@ -68,8 +68,13 @@ ifeq ($(vfp3), off) else GYPFLAGS += -Dv8_can_use_vfp_instructions=true endif +# soname_version=1.2.3 +ifdef soname_version + GYPFLAGS += -Dsoname_version=$(soname_version) +endif # ----------------- available targets: -------------------- +# - "dependencies": pulls in external dependencies (currently: GYP) # - any arch listed in ARCHES (see below) # - any mode listed in MODES # - every combination ., e.g. "ia32.release" @@ -98,7 +103,7 @@ CHECKS = $(addsuffix .check,$(BUILDS)) # File where previously used GYPFLAGS are stored. ENVFILE = $(OUTDIR)/environment -.PHONY: all check clean $(ENVFILE).new \ +.PHONY: all check clean dependencies $(ENVFILE).new \ $(ARCHES) $(MODES) $(BUILDS) $(CHECKS) $(addsuffix .clean,$(ARCHES)) \ $(addsuffix .check,$(MODES)) $(addsuffix .check,$(ARCHES)) @@ -170,3 +175,8 @@ $(ENVFILE): $(ENVFILE).new # Stores current GYPFLAGS in a file. $(ENVFILE).new: @mkdir -p $(OUTDIR); echo "GYPFLAGS=$(GYPFLAGS)" > $(ENVFILE).new; + +# Dependencies. +dependencies: + svn checkout --force http://gyp.googlecode.com/svn/trunk build/gyp \ + --revision 1026 diff --git a/build/common.gypi b/build/common.gypi index 9b32608178f..4e896e019a8 100644 --- a/build/common.gypi +++ b/build/common.gypi @@ -72,6 +72,9 @@ 'v8_use_snapshot%': 'true', 'host_os%': '<(OS)', 'v8_use_liveobjectlist%': 'false', + + # For a shared library build, results in "libv8-<(soname_version).so". + 'soname_version%': '', }, 'target_defaults': { 'conditions': [ diff --git a/src/SConscript b/src/SConscript index 453a7c6a76e..52607f15c5e 100644 --- a/src/SConscript +++ b/src/SConscript @@ -111,8 +111,8 @@ SOURCES = { runtime.cc runtime-profiler.cc safepoint-table.cc - scanner-base.cc scanner.cc + scanner-character-streams.cc scopeinfo.cc scopes.cc serialize.cc @@ -222,7 +222,7 @@ SOURCES = { 'os:solaris': ['platform-solaris.cc', 'platform-posix.cc'], 'os:cygwin': ['platform-cygwin.cc', 'platform-posix.cc'], 'os:nullos': ['platform-nullos.cc'], - 'os:win32': ['platform-win32.cc'], + 'os:win32': ['platform-win32.cc', 'win32-math.cc'], 'mode:release': [], 'mode:debug': [ 'objects-debug.cc', 'prettyprinter.cc', 'regexp-macro-assembler-tracer.cc' @@ -233,15 +233,25 @@ SOURCES = { PREPARSER_SOURCES = { 'all': Split(""" allocation.cc + bignum.cc + bignum-dtoa.cc + cached-powers.cc + conversions.cc + diy-fp.cc + dtoa.cc + fast-dtoa.cc + fixed-dtoa.cc hashmap.cc preparse-data.cc preparser.cc preparser-api.cc - scanner-base.cc + scanner.cc + strtod.cc token.cc unicode.cc utils.cc - """) + """), + 'os:win32': ['win32-math.cc'] } diff --git a/src/api.cc b/src/api.cc index 0207f51eb42..26558c42924 100644 --- a/src/api.cc +++ b/src/api.cc @@ -44,6 +44,7 @@ #include "platform.h" #include "profile-generator-inl.h" #include "runtime-profiler.h" +#include "scanner-character-streams.h" #include "serialize.h" #include "snapshot.h" #include "v8threads.h" diff --git a/src/ast.cc b/src/ast.cc index 8b8a2a884ec..a44d9ee460e 100644 --- a/src/ast.cc +++ b/src/ast.cc @@ -404,11 +404,6 @@ bool WithStatement::IsInlineable() const { } -bool ExitContextStatement::IsInlineable() const { - return false; -} - - bool SwitchStatement::IsInlineable() const { return false; } diff --git a/src/ast.h b/src/ast.h index 0eacb421029..2b32cdfa91f 100644 --- a/src/ast.h +++ b/src/ast.h @@ -62,7 +62,6 @@ namespace internal { V(BreakStatement) \ V(ReturnStatement) \ V(WithStatement) \ - V(ExitContextStatement) \ V(SwitchStatement) \ V(DoWhileStatement) \ V(WhileStatement) \ @@ -681,14 +680,6 @@ class WithStatement: public Statement { }; -class ExitContextStatement: public Statement { - public: - virtual bool IsInlineable() const; - - DECLARE_NODE_TYPE(ExitContextStatement) -}; - - class CaseClause: public ZoneObject { public: CaseClause(Isolate* isolate, diff --git a/src/bignum-dtoa.cc b/src/bignum-dtoa.cc index 088dd79f550..a9616909d0f 100644 --- a/src/bignum-dtoa.cc +++ b/src/bignum-dtoa.cc @@ -1,4 +1,4 @@ -// Copyright 2010 the V8 project authors. All rights reserved. +// Copyright 2011 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -27,7 +27,10 @@ #include -#include "v8.h" +#include "../include/v8stdint.h" +#include "checks.h" +#include "utils.h" + #include "bignum-dtoa.h" #include "bignum.h" diff --git a/src/compiler.cc b/src/compiler.cc index c7e78067cfe..ba6bb42bfa2 100644 --- a/src/compiler.cc +++ b/src/compiler.cc @@ -41,6 +41,7 @@ #include "parser.h" #include "rewriter.h" #include "runtime-profiler.h" +#include "scanner-character-streams.h" #include "scopeinfo.h" #include "scopes.h" #include "vm-state-inl.h" diff --git a/src/conversions-inl.h b/src/conversions-inl.h index b828638568a..41cf0d54c21 100644 --- a/src/conversions-inl.h +++ b/src/conversions-inl.h @@ -32,13 +32,16 @@ #include #include // Required for DBL_MAX and on Win32 for finite() #include +#include "globals.h" // Required for V8_INFINITY // ---------------------------------------------------------------------------- // Extra POSIX/ANSI functions for Win32/MSVC. #include "conversions.h" -#include "strtod.h" +#include "double.h" #include "platform.h" +#include "scanner.h" +#include "strtod.h" namespace v8 { namespace internal { @@ -87,12 +90,15 @@ static inline double DoubleToInteger(double x) { int32_t DoubleToInt32(double x) { int32_t i = FastD2I(x); if (FastI2D(i) == x) return i; - static const double two32 = 4294967296.0; - static const double two31 = 2147483648.0; - if (!isfinite(x) || x == 0) return 0; - if (x < 0 || x >= two32) x = modulo(x, two32); - x = (x >= 0) ? floor(x) : ceil(x) + two32; - return (int32_t) ((x >= two31) ? x - two32 : x); + Double d(x); + int exponent = d.Exponent(); + if (exponent < 0) { + if (exponent <= -Double::kSignificandSize) return 0; + return d.Sign() * static_cast(d.Significand() >> -exponent); + } else { + if (exponent > 31) return 0; + return d.Sign() * static_cast(d.Significand() << exponent); + } } diff --git a/src/conversions.cc b/src/conversions.cc index c34fe519c42..5bfddd04c01 100644 --- a/src/conversions.cc +++ b/src/conversions.cc @@ -26,11 +26,11 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include "conversions-inl.h" #include "dtoa.h" -#include "scanner-base.h" #include "strtod.h" #include "utils.h" @@ -38,7 +38,6 @@ namespace v8 { namespace internal { - double StringToDouble(UnicodeCache* unicode_cache, const char* str, int flags, double empty_string_val) { const char* end = str + StrLength(str); @@ -390,7 +389,7 @@ char* DoubleToRadixCString(double value, int radix) { int integer_pos = kBufferSize - 2; do { integer_buffer[integer_pos--] = - chars[static_cast(modulo(integer_part, radix))]; + chars[static_cast(fmod(integer_part, radix))]; integer_part /= radix; } while (integer_part >= 1.0); // Sanity check. diff --git a/src/conversions.h b/src/conversions.h index 0f8d5da8ee6..e51ad6501cb 100644 --- a/src/conversions.h +++ b/src/conversions.h @@ -30,11 +30,13 @@ #include -#include "scanner-base.h" +#include "utils.h" namespace v8 { namespace internal { +class UnicodeCache; + // Maximum number of significant digits in decimal representation. // The longest possible double in decimal representation is // (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 @@ -125,6 +127,8 @@ double StringToDouble(UnicodeCache* unicode_cache, int flags, double empty_string_val = 0); +const int kDoubleToCStringMinBufferSize = 100; + // Converts a double to a string value according to ECMA-262 9.8.1. // The buffer should be large enough for any floating point number. // 100 characters is enough. diff --git a/src/d8.cc b/src/d8.cc index 5c604368920..93b383d9acd 100644 --- a/src/d8.cc +++ b/src/d8.cc @@ -210,6 +210,18 @@ Handle Shell::Write(const Arguments& args) { } +Handle Shell::EnableProfiler(const Arguments& args) { + V8::ResumeProfiler(); + return Undefined(); +} + + +Handle Shell::DisableProfiler(const Arguments& args) { + V8::PauseProfiler(); + return Undefined(); +} + + Handle Shell::Read(const Arguments& args) { String::Utf8Value file(args[0]); if (*file == NULL) { @@ -656,6 +668,10 @@ Handle Shell::CreateGlobalTemplate() { global_template->Set(String::New("load"), FunctionTemplate::New(Load)); global_template->Set(String::New("quit"), FunctionTemplate::New(Quit)); global_template->Set(String::New("version"), FunctionTemplate::New(Version)); + global_template->Set(String::New("enableProfiler"), + FunctionTemplate::New(EnableProfiler)); + global_template->Set(String::New("disableProfiler"), + FunctionTemplate::New(DisableProfiler)); // Bind the handlers for external arrays. global_template->Set(String::New("Int8Array"), @@ -1021,7 +1037,7 @@ i::Thread::Options SourceGroup::GetThreadOptions() { void SourceGroup::ExecuteInThread() { Isolate* isolate = Isolate::New(); do { - if (next_semaphore_ != NULL) next_semaphore_->Wait(); + if (!next_semaphore_.is_empty()) next_semaphore_->Wait(); { Isolate::Scope iscope(isolate); Locker lock(isolate); @@ -1033,15 +1049,15 @@ void SourceGroup::ExecuteInThread() { } context.Dispose(); } - if (done_semaphore_ != NULL) done_semaphore_->Signal(); + if (!done_semaphore_.is_empty()) done_semaphore_->Signal(); } while (!Shell::options.last_run); isolate->Dispose(); } void SourceGroup::StartExecuteInThread() { - if (thread_ == NULL) { - thread_ = new IsolateThread(this); + if (thread_.is_empty()) { + thread_ = i::SmartPointer(new IsolateThread(this)); thread_->Start(); } next_semaphore_->Signal(); @@ -1049,10 +1065,9 @@ void SourceGroup::StartExecuteInThread() { void SourceGroup::WaitForThread() { - if (thread_ == NULL) return; + if (thread_.is_empty()) return; if (Shell::options.last_run) { thread_->Join(); - thread_ = NULL; } else { done_semaphore_->Wait(); } diff --git a/src/d8.h b/src/d8.h index 28321f56dae..3ec03907ed1 100644 --- a/src/d8.h +++ b/src/d8.h @@ -28,11 +28,11 @@ #ifndef V8_D8_H_ #define V8_D8_H_ - #ifndef V8_SHARED -#include "v8.h" #include "allocation.h" #include "hashmap.h" +#include "smart-pointer.h" +#include "v8.h" #else #include "../include/v8.h" #endif // V8_SHARED @@ -122,11 +122,10 @@ class SourceGroup { #ifndef V8_SHARED next_semaphore_(v8::internal::OS::CreateSemaphore(0)), done_semaphore_(v8::internal::OS::CreateSemaphore(0)), - thread_(NULL), #endif // V8_SHARED argv_(NULL), begin_offset_(0), - end_offset_(0) { } + end_offset_(0) {} void Begin(char** argv, int offset) { argv_ = const_cast(argv); @@ -158,9 +157,9 @@ class SourceGroup { static i::Thread::Options GetThreadOptions(); void ExecuteInThread(); - i::Semaphore* next_semaphore_; - i::Semaphore* done_semaphore_; - i::Thread* thread_; + i::SmartPointer next_semaphore_; + i::SmartPointer done_semaphore_; + i::SmartPointer thread_; #endif // V8_SHARED void ExitShell(int exit_code); @@ -248,6 +247,8 @@ class Shell : public i::AllStatic { static Handle Yield(const Arguments& args); static Handle Quit(const Arguments& args); static Handle Version(const Arguments& args); + static Handle EnableProfiler(const Arguments& args); + static Handle DisableProfiler(const Arguments& args); static Handle Read(const Arguments& args); static Handle ReadLine(const Arguments& args); static Handle Load(const Arguments& args); diff --git a/src/dateparser.h b/src/dateparser.h index 4bd320e901d..4777e35f66c 100644 --- a/src/dateparser.h +++ b/src/dateparser.h @@ -30,7 +30,6 @@ #include "allocation.h" #include "char-predicates-inl.h" -#include "scanner-base.h" namespace v8 { namespace internal { diff --git a/src/dtoa.cc b/src/dtoa.cc index b857a5dc599..00233a88293 100644 --- a/src/dtoa.cc +++ b/src/dtoa.cc @@ -1,4 +1,4 @@ -// Copyright 2010 the V8 project authors. All rights reserved. +// Copyright 2011 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -27,7 +27,10 @@ #include -#include "v8.h" +#include "../include/v8stdint.h" +#include "checks.h" +#include "utils.h" + #include "dtoa.h" #include "bignum-dtoa.h" diff --git a/src/fast-dtoa.cc b/src/fast-dtoa.cc index c7f6aa17564..e62bd01fbb5 100644 --- a/src/fast-dtoa.cc +++ b/src/fast-dtoa.cc @@ -1,4 +1,4 @@ -// Copyright 2010 the V8 project authors. All rights reserved. +// Copyright 2011 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -25,7 +25,9 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include "v8.h" +#include "../include/v8stdint.h" +#include "checks.h" +#include "utils.h" #include "fast-dtoa.h" diff --git a/src/fixed-dtoa.cc b/src/fixed-dtoa.cc index 8ad88f6528a..1fd974c3e23 100644 --- a/src/fixed-dtoa.cc +++ b/src/fixed-dtoa.cc @@ -1,4 +1,4 @@ -// Copyright 2010 the V8 project authors. All rights reserved. +// Copyright 2011 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -27,7 +27,9 @@ #include -#include "v8.h" +#include "../include/v8stdint.h" +#include "checks.h" +#include "utils.h" #include "double.h" #include "fixed-dtoa.h" diff --git a/src/full-codegen.cc b/src/full-codegen.cc index d810bb3dc08..53ace82fe7a 100644 --- a/src/full-codegen.cc +++ b/src/full-codegen.cc @@ -96,11 +96,6 @@ void BreakableStatementChecker::VisitWithStatement(WithStatement* stmt) { } -void BreakableStatementChecker::VisitExitContextStatement( - ExitContextStatement* stmt) { -} - - void BreakableStatementChecker::VisitSwitchStatement(SwitchStatement* stmt) { // Switch statements breakable if the tag expression is. Visit(stmt->tag()); @@ -989,17 +984,6 @@ void FullCodeGenerator::VisitWithStatement(WithStatement* stmt) { } -void FullCodeGenerator::VisitExitContextStatement(ExitContextStatement* stmt) { - Comment cmnt(masm_, "[ ExitContextStatement"); - SetStatementPosition(stmt); - - // Pop context. - LoadContextField(context_register(), Context::PREVIOUS_INDEX); - // Update local stack frame context field. - StoreToFrameField(StandardFrameConstants::kContextOffset, context_register()); -} - - void FullCodeGenerator::VisitDoWhileStatement(DoWhileStatement* stmt) { Comment cmnt(masm_, "[ DoWhileStatement"); SetStatementPosition(stmt); @@ -1147,6 +1131,9 @@ void FullCodeGenerator::VisitTryCatchStatement(TryCatchStatement* stmt) { { WithOrCatch body(this); Visit(stmt->catch_block()); } + // Restore the context. + LoadContextField(context_register(), Context::PREVIOUS_INDEX); + StoreToFrameField(StandardFrameConstants::kContextOffset, context_register()); scope_ = saved_scope; __ jmp(&done); diff --git a/src/globals.h b/src/globals.h index bb9d0a8e8bb..6c6966aee53 100644 --- a/src/globals.h +++ b/src/globals.h @@ -28,6 +28,35 @@ #ifndef V8_GLOBALS_H_ #define V8_GLOBALS_H_ +// Define V8_INFINITY +#define V8_INFINITY INFINITY + +// GCC specific stuff +#ifdef __GNUC__ + +#define __GNUC_VERSION_FOR_INFTY__ (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) + +// Unfortunately, the INFINITY macro cannot be used with the '-pedantic' +// warning flag and certain versions of GCC due to a bug: +// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11931 +// For now, we use the more involved template-based version from , but +// only when compiling with GCC versions affected by the bug (2.96.x - 4.0.x) +// __GNUC_PREREQ is not defined in GCC for Mac OS X, so we define our own macro +#if __GNUC_VERSION_FOR_INFTY__ >= 29600 && __GNUC_VERSION_FOR_INFTY__ < 40100 +#include +#undef V8_INFINITY +#define V8_INFINITY std::numeric_limits::infinity() +#endif +#undef __GNUC_VERSION_FOR_INFTY__ + +#endif // __GNUC__ + +#ifdef _MSC_VER +#undef V8_INFINITY +#define V8_INFINITY HUGE_VAL +#endif + + #include "../include/v8stdint.h" namespace v8 { diff --git a/src/heap.cc b/src/heap.cc index a480e9e331e..0ca138f337f 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -41,7 +41,6 @@ #include "natives.h" #include "objects-visiting.h" #include "runtime-profiler.h" -#include "scanner-base.h" #include "scopeinfo.h" #include "snapshot.h" #include "v8threads.h" @@ -2259,8 +2258,8 @@ bool Heap::CreateInitialObjects() { Object* StringSplitCache::Lookup( FixedArray* cache, String* string, String* pattern) { if (!string->IsSymbol() || !pattern->IsSymbol()) return Smi::FromInt(0); - uintptr_t hash = string->Hash(); - uintptr_t index = ((hash & (kStringSplitCacheSize - 1)) & + uint32_t hash = string->Hash(); + uint32_t index = ((hash & (kStringSplitCacheSize - 1)) & ~(kArrayEntriesPerCacheEntry - 1)); if (cache->get(index + kStringOffset) == string && cache->get(index + kPatternOffset) == pattern) { @@ -2281,30 +2280,29 @@ void StringSplitCache::Enter(Heap* heap, String* pattern, FixedArray* array) { if (!string->IsSymbol() || !pattern->IsSymbol()) return; - uintptr_t hash = string->Hash(); - array->set_map(heap->fixed_cow_array_map()); - uintptr_t index = ((hash & (kStringSplitCacheSize - 1)) & + uint32_t hash = string->Hash(); + uint32_t index = ((hash & (kStringSplitCacheSize - 1)) & ~(kArrayEntriesPerCacheEntry - 1)); if (cache->get(index + kStringOffset) == Smi::FromInt(0)) { cache->set(index + kStringOffset, string); cache->set(index + kPatternOffset, pattern); cache->set(index + kArrayOffset, array); - return; - } - uintptr_t index2 = - ((index + kArrayEntriesPerCacheEntry) & (kStringSplitCacheSize - 1)); - if (cache->get(index2 + kStringOffset) == Smi::FromInt(0)) { - cache->set(index2 + kStringOffset, string); - cache->set(index2 + kPatternOffset, pattern); - cache->set(index2 + kArrayOffset, array); - return; + } else { + uint32_t index2 = + ((index + kArrayEntriesPerCacheEntry) & (kStringSplitCacheSize - 1)); + if (cache->get(index2 + kStringOffset) == Smi::FromInt(0)) { + cache->set(index2 + kStringOffset, string); + cache->set(index2 + kPatternOffset, pattern); + cache->set(index2 + kArrayOffset, array); + } else { + cache->set(index2 + kStringOffset, Smi::FromInt(0)); + cache->set(index2 + kPatternOffset, Smi::FromInt(0)); + cache->set(index2 + kArrayOffset, Smi::FromInt(0)); + cache->set(index + kStringOffset, string); + cache->set(index + kPatternOffset, pattern); + cache->set(index + kArrayOffset, array); + } } - cache->set(index2 + kStringOffset, Smi::FromInt(0)); - cache->set(index2 + kPatternOffset, Smi::FromInt(0)); - cache->set(index2 + kArrayOffset, Smi::FromInt(0)); - cache->set(index + kStringOffset, string); - cache->set(index + kPatternOffset, pattern); - cache->set(index + kArrayOffset, array); if (array->length() < 100) { // Limit how many new symbols we want to make. for (int i = 0; i < array->length(); i++) { String* str = String::cast(array->get(i)); @@ -2315,6 +2313,7 @@ void StringSplitCache::Enter(Heap* heap, } } } + array->set_map(heap->fixed_cow_array_map()); } @@ -3623,6 +3622,9 @@ MaybeObject* Heap::ReinitializeJSGlobalProxy(JSFunction* constructor, MaybeObject* Heap::AllocateStringFromAscii(Vector string, PretenureFlag pretenure) { + if (string.length() == 1) { + return Heap::LookupSingleCharacterStringFromCode(string[0]); + } Object* result; { MaybeObject* maybe_result = AllocateRawAsciiString(string.length(), pretenure); diff --git a/src/hydrogen.cc b/src/hydrogen.cc index 01046bf9bfe..ab25299b78c 100644 --- a/src/hydrogen.cc +++ b/src/hydrogen.cc @@ -1675,7 +1675,9 @@ void HInferRepresentation::Analyze() { bool change = true; while (change) { change = false; - for (int i = 0; i < phi_count; ++i) { + // We normally have far more "forward edges" than "backward edges", + // so we terminate faster when we walk backwards. + for (int i = phi_count - 1; i >= 0; --i) { HPhi* phi = phi_list->at(i); for (HUseIterator it(phi->uses()); !it.Done(); it.Advance()) { HValue* use = it.value(); @@ -2652,14 +2654,6 @@ void HGraphBuilder::VisitWithStatement(WithStatement* stmt) { } -void HGraphBuilder::VisitExitContextStatement(ExitContextStatement* stmt) { - ASSERT(!HasStackOverflow()); - ASSERT(current_block() != NULL); - ASSERT(current_block()->HasPredecessor()); - return Bailout("ExitContextStatement"); -} - - void HGraphBuilder::VisitSwitchStatement(SwitchStatement* stmt) { ASSERT(!HasStackOverflow()); ASSERT(current_block() != NULL); diff --git a/src/isolate.cc b/src/isolate.cc index d3669029778..afb9624875c 100644 --- a/src/isolate.cc +++ b/src/isolate.cc @@ -43,7 +43,6 @@ #include "messages.h" #include "regexp-stack.h" #include "runtime-profiler.h" -#include "scanner.h" #include "scopeinfo.h" #include "serialize.h" #include "simulator.h" diff --git a/src/mips/full-codegen-mips.cc b/src/mips/full-codegen-mips.cc index 385e57ae663..d690ca3fcd0 100644 --- a/src/mips/full-codegen-mips.cc +++ b/src/mips/full-codegen-mips.cc @@ -200,14 +200,14 @@ void FullCodeGenerator::Generate(CompilationInfo* info) { // Copy any necessary parameters into the context. int num_parameters = info->scope()->num_parameters(); for (int i = 0; i < num_parameters; i++) { - Slot* slot = scope()->parameter(i)->rewrite(); - if (slot != NULL && slot->type() == Slot::CONTEXT) { + Variable* var = scope()->parameter(i); + if (var->IsContextSlot()) { int parameter_offset = StandardFrameConstants::kCallerSPOffset + (num_parameters - 1 - i) * kPointerSize; // Load parameter from stack. __ lw(a0, MemOperand(fp, parameter_offset)); // Store it in the context. - __ li(a1, Operand(Context::SlotOffset(slot->index()))); + __ li(a1, Operand(Context::SlotOffset(var->index()))); __ addu(a2, cp, a1); __ sw(a0, MemOperand(a2, 0)); // Update the write barrier. This clobbers all involved @@ -252,7 +252,7 @@ void FullCodeGenerator::Generate(CompilationInfo* info) { ArgumentsAccessStub stub(type); __ CallStub(&stub); - Move(arguments->rewrite(), v0, a1, a2); + SetVar(arguments, v0, a1, a2); } if (FLAG_trace) { @@ -271,7 +271,8 @@ void FullCodeGenerator::Generate(CompilationInfo* info) { // For named function expressions, declare the function name as a // constant. if (scope()->is_function_scope() && scope()->function() != NULL) { - EmitDeclaration(scope()->function(), Variable::CONST, NULL); + int ignored = 0; + EmitDeclaration(scope()->function(), Variable::CONST, NULL, &ignored); } VisitDeclarations(scope()->declarations()); } @@ -371,24 +372,27 @@ void FullCodeGenerator::EmitReturnSequence() { } -void FullCodeGenerator::EffectContext::Plug(Slot* slot) const { +void FullCodeGenerator::EffectContext::Plug(Variable* var) const { + ASSERT(var->IsStackAllocated() || var->IsContextSlot()); } -void FullCodeGenerator::AccumulatorValueContext::Plug(Slot* slot) const { - codegen()->Move(result_register(), slot); +void FullCodeGenerator::AccumulatorValueContext::Plug(Variable* var) const { + ASSERT(var->IsStackAllocated() || var->IsContextSlot()); + codegen()->GetVar(result_register(), var); } -void FullCodeGenerator::StackValueContext::Plug(Slot* slot) const { - codegen()->Move(result_register(), slot); +void FullCodeGenerator::StackValueContext::Plug(Variable* var) const { + ASSERT(var->IsStackAllocated() || var->IsContextSlot()); + codegen()->GetVar(result_register(), var); __ push(result_register()); } -void FullCodeGenerator::TestContext::Plug(Slot* slot) const { +void FullCodeGenerator::TestContext::Plug(Variable* var) const { // For simplicity we always test the accumulator register. - codegen()->Move(result_register(), slot); + codegen()->GetVar(result_register(), var); codegen()->PrepareForBailoutBeforeSplit(TOS_REG, false, NULL, NULL); codegen()->DoTest(this); } @@ -621,30 +625,56 @@ void FullCodeGenerator::Split(Condition cc, } -MemOperand FullCodeGenerator::EmitSlotSearch(Slot* slot, Register scratch) { - switch (slot->type()) { - case Slot::PARAMETER: - case Slot::LOCAL: - return MemOperand(fp, SlotOffset(slot)); - case Slot::CONTEXT: { - int context_chain_length = - scope()->ContextChainLength(slot->var()->scope()); - __ LoadContext(scratch, context_chain_length); - return ContextOperand(scratch, slot->index()); - } - case Slot::LOOKUP: - case Slot::GLOBAL: - UNREACHABLE(); +MemOperand FullCodeGenerator::StackOperand(Variable* var) { + ASSERT(var->IsStackAllocated()); + // Offset is negative because higher indexes are at lower addresses. + int offset = -var->index() * kPointerSize; + // Adjust by a (parameter or local) base offset. + if (var->IsParameter()) { + offset += (info_->scope()->num_parameters() + 1) * kPointerSize; + } else { + offset += JavaScriptFrameConstants::kLocal0Offset; + } + return MemOperand(fp, offset); +} + + +MemOperand FullCodeGenerator::VarOperand(Variable* var, Register scratch) { + ASSERT(var->IsContextSlot() || var->IsStackAllocated()); + if (var->IsContextSlot()) { + int context_chain_length = scope()->ContextChainLength(var->scope()); + __ LoadContext(scratch, context_chain_length); + return ContextOperand(scratch, var->index()); + } else { + return StackOperand(var); } - UNREACHABLE(); - return MemOperand(v0, 0); } -void FullCodeGenerator::Move(Register destination, Slot* source) { +void FullCodeGenerator::GetVar(Register dest, Variable* var) { // Use destination as scratch. - MemOperand slot_operand = EmitSlotSearch(source, destination); - __ lw(destination, slot_operand); + MemOperand location = VarOperand(var, dest); + __ lw(dest, location); +} + + +void FullCodeGenerator::SetVar(Variable* var, + Register src, + Register scratch0, + Register scratch1) { + ASSERT(var->IsContextSlot() || var->IsStackAllocated()); + ASSERT(!scratch0.is(src)); + ASSERT(!scratch0.is(scratch1)); + ASSERT(!scratch1.is(src)); + MemOperand location = VarOperand(var, scratch0); + __ sw(src, location); + // Emit the write barrier code if the location is in the heap. + if (var->IsContextSlot()) { + __ RecordWrite(scratch0, + Operand(Context::SlotOffset(var->index())), + scratch1, + src); + } } @@ -674,48 +704,33 @@ void FullCodeGenerator::PrepareForBailoutBeforeSplit(State state, } -void FullCodeGenerator::Move(Slot* dst, - Register src, - Register scratch1, - Register scratch2) { - ASSERT(dst->type() != Slot::LOOKUP); // Not yet implemented. - ASSERT(!scratch1.is(src) && !scratch2.is(src)); - MemOperand location = EmitSlotSearch(dst, scratch1); - __ sw(src, location); - // Emit the write barrier code if the location is in the heap. - if (dst->type() == Slot::CONTEXT) { - __ RecordWrite(scratch1, - Operand(Context::SlotOffset(dst->index())), - scratch2, - src); - } -} - - void FullCodeGenerator::EmitDeclaration(VariableProxy* proxy, Variable::Mode mode, - FunctionLiteral* function) { - Comment cmnt(masm_, "[ Declaration"); + FunctionLiteral* function, + int* global_count) { + // If it was not possible to allocate the variable at compile time, we + // need to "declare" it at runtime to make sure it actually exists in the + // local context. Variable* variable = proxy->var(); - ASSERT(variable != NULL); // Must have been resolved. - Slot* slot = variable->rewrite(); - ASSERT(slot != NULL); - switch (slot->type()) { - case Slot::PARAMETER: - case Slot::LOCAL: + switch (variable->location()) { + case Variable::UNALLOCATED: + ++(*global_count); + break; + + case Variable::PARAMETER: + case Variable::LOCAL: if (function != NULL) { + Comment cmnt(masm_, "[ Declaration"); VisitForAccumulatorValue(function); - __ sw(result_register(), MemOperand(fp, SlotOffset(slot))); + __ sw(result_register(), StackOperand(variable)); } else if (mode == Variable::CONST || mode == Variable::LET) { + Comment cmnt(masm_, "[ Declaration"); __ LoadRoot(t0, Heap::kTheHoleValueRootIndex); - __ sw(t0, MemOperand(fp, SlotOffset(slot))); + __ sw(t0, StackOperand(variable)); } break; - case Slot::CONTEXT: - // We bypass the general EmitSlotSearch because we know more about - // this specific context. - + case Variable::CONTEXT: // The variable in the decl always resides in the current function // context. ASSERT_EQ(0, scope()->ContextChainLength(variable->scope())); @@ -730,24 +745,27 @@ void FullCodeGenerator::EmitDeclaration(VariableProxy* proxy, a1, Operand(t0)); } if (function != NULL) { + Comment cmnt(masm_, "[ Declaration"); VisitForAccumulatorValue(function); - __ sw(result_register(), ContextOperand(cp, slot->index())); - int offset = Context::SlotOffset(slot->index()); + __ sw(result_register(), ContextOperand(cp, variable->index())); + int offset = Context::SlotOffset(variable->index()); // We know that we have written a function, which is not a smi. __ mov(a1, cp); __ RecordWrite(a1, Operand(offset), a2, result_register()); PrepareForBailoutForId(proxy->id(), NO_REGISTERS); } else if (mode == Variable::CONST || mode == Variable::LET) { + Comment cmnt(masm_, "[ Declaration"); __ LoadRoot(at, Heap::kTheHoleValueRootIndex); - __ sw(at, ContextOperand(cp, slot->index())); + __ sw(at, ContextOperand(cp, variable->index())); // No write barrier since the_hole_value is in old space. PrepareForBailoutForId(proxy->id(), NO_REGISTERS); } break; - case Slot::LOOKUP: { + case Variable::LOOKUP: { + Comment cmnt(masm_, "[ Declaration"); __ li(a2, Operand(variable->name())); - // Declaration nodes are always introduced in one of two modes. + // Declaration nodes are always introduced in one of three modes. ASSERT(mode == Variable::VAR || mode == Variable::CONST || mode == Variable::LET); @@ -766,23 +784,17 @@ void FullCodeGenerator::EmitDeclaration(VariableProxy* proxy, __ Push(cp, a2, a1, a0); } else { ASSERT(Smi::FromInt(0) == 0); - // No initial value! - __ mov(a0, zero_reg); // Operand(Smi::FromInt(0))); + __ mov(a0, zero_reg); // Smi::FromInt(0) indicates no initial value. __ Push(cp, a2, a1, a0); } __ CallRuntime(Runtime::kDeclareContextSlot, 4); break; } - - case Slot::GLOBAL: - UNREACHABLE(); } } -void FullCodeGenerator::VisitDeclaration(Declaration* decl) { - EmitDeclaration(decl->proxy(), decl->mode(), decl->fun()); -} +void FullCodeGenerator::VisitDeclaration(Declaration* decl) { } void FullCodeGenerator::DeclareGlobals(Handle pairs) { @@ -1095,10 +1107,9 @@ void FullCodeGenerator::VisitVariableProxy(VariableProxy* expr) { } -void FullCodeGenerator::EmitLoadGlobalSlotCheckExtensions( - Slot* slot, - TypeofState typeof_state, - Label* slow) { +void FullCodeGenerator::EmitLoadGlobalCheckExtensions(Variable* var, + TypeofState typeof_state, + Label* slow) { Register current = cp; Register next = a1; Register temp = a2; @@ -1142,7 +1153,7 @@ void FullCodeGenerator::EmitLoadGlobalSlotCheckExtensions( } __ lw(a0, GlobalObjectOperand()); - __ li(a2, Operand(slot->var()->name())); + __ li(a2, Operand(var->name())); RelocInfo::Mode mode = (typeof_state == INSIDE_TYPEOF) ? RelocInfo::CODE_TARGET : RelocInfo::CODE_TARGET_CONTEXT; @@ -1151,15 +1162,14 @@ void FullCodeGenerator::EmitLoadGlobalSlotCheckExtensions( } -MemOperand FullCodeGenerator::ContextSlotOperandCheckExtensions( - Slot* slot, - Label* slow) { - ASSERT(slot->type() == Slot::CONTEXT); +MemOperand FullCodeGenerator::ContextSlotOperandCheckExtensions(Variable* var, + Label* slow) { + ASSERT(var->IsContextSlot()); Register context = cp; Register next = a3; Register temp = t0; - for (Scope* s = scope(); s != slot->var()->scope(); s = s->outer_scope()) { + for (Scope* s = scope(); s != var->scope(); s = s->outer_scope()) { if (s->num_heap_slots() > 0) { if (s->calls_eval()) { // Check that extension is NULL. @@ -1178,60 +1188,32 @@ MemOperand FullCodeGenerator::ContextSlotOperandCheckExtensions( // This function is used only for loads, not stores, so it's safe to // return an cp-based operand (the write barrier cannot be allowed to // destroy the cp register). - return ContextOperand(context, slot->index()); + return ContextOperand(context, var->index()); } -void FullCodeGenerator::EmitDynamicLoadFromSlotFastCase( - Slot* slot, - TypeofState typeof_state, - Label* slow, - Label* done) { +void FullCodeGenerator::EmitDynamicLookupFastCase(Variable* var, + TypeofState typeof_state, + Label* slow, + Label* done) { // Generate fast-case code for variables that might be shadowed by // eval-introduced variables. Eval is used a lot without // introducing variables. In those cases, we do not want to // perform a runtime call for all variables in the scope // containing the eval. - if (slot->var()->mode() == Variable::DYNAMIC_GLOBAL) { - EmitLoadGlobalSlotCheckExtensions(slot, typeof_state, slow); + if (var->mode() == Variable::DYNAMIC_GLOBAL) { + EmitLoadGlobalCheckExtensions(var, typeof_state, slow); __ Branch(done); - } else if (slot->var()->mode() == Variable::DYNAMIC_LOCAL) { - Slot* potential_slot = slot->var()->local_if_not_shadowed()->rewrite(); - Expression* rewrite = slot->var()->local_if_not_shadowed()->rewrite(); - if (potential_slot != NULL) { - // Generate fast case for locals that rewrite to slots. - __ lw(v0, ContextSlotOperandCheckExtensions(potential_slot, slow)); - if (potential_slot->var()->mode() == Variable::CONST) { - __ LoadRoot(at, Heap::kTheHoleValueRootIndex); - __ subu(at, v0, at); // Sub as compare: at == 0 on eq. - __ LoadRoot(a0, Heap::kUndefinedValueRootIndex); - __ movz(v0, a0, at); // Conditional move. - } - __ Branch(done); - } else if (rewrite != NULL) { - // Generate fast case for calls of an argument function. - Property* property = rewrite->AsProperty(); - if (property != NULL) { - VariableProxy* obj_proxy = property->obj()->AsVariableProxy(); - Literal* key_literal = property->key()->AsLiteral(); - if (obj_proxy != NULL && - key_literal != NULL && - obj_proxy->IsArguments() && - key_literal->handle()->IsSmi()) { - // Load arguments object if there are no eval-introduced - // variables. Then load the argument from the arguments - // object using keyed load. - __ lw(a1, - ContextSlotOperandCheckExtensions(obj_proxy->var()->rewrite(), - slow)); - __ li(a0, Operand(key_literal->handle())); - Handle ic = - isolate()->builtins()->KeyedLoadIC_Initialize(); - __ Call(ic, RelocInfo::CODE_TARGET, GetPropertyId(property)); - __ Branch(done); - } - } + } else if (var->mode() == Variable::DYNAMIC_LOCAL) { + Variable* local = var->local_if_not_shadowed(); + __ lw(v0, ContextSlotOperandCheckExtensions(local, slow)); + if (local->mode() == Variable::CONST) { + __ LoadRoot(at, Heap::kTheHoleValueRootIndex); + __ subu(at, v0, at); // Sub as compare: at == 0 on eq. + __ LoadRoot(a0, Heap::kUndefinedValueRootIndex); + __ movz(v0, a0, at); // Conditional move: return Undefined if TheHole. } + __ Branch(done); } } @@ -1241,66 +1223,62 @@ void FullCodeGenerator::EmitVariableLoad(VariableProxy* proxy) { SetSourcePosition(proxy->position()); Variable* var = proxy->var(); - // Three cases: non-this global variables, lookup slots, and all other - // types of slots. - Slot* slot = var->rewrite(); - ASSERT((var->is_global() && !var->is_this()) == (slot == NULL)); - - if (slot == NULL) { - Comment cmnt(masm_, "Global variable"); - // Use inline caching. Variable name is passed in a2 and the global - // object (receiver) in a0. - __ lw(a0, GlobalObjectOperand()); - __ li(a2, Operand(var->name())); - Handle ic = isolate()->builtins()->LoadIC_Initialize(); - __ Call(ic, RelocInfo::CODE_TARGET_CONTEXT); - context()->Plug(v0); - - } else if (slot->type() == Slot::LOOKUP) { - Label done, slow; - - // Generate code for loading from variables potentially shadowed - // by eval-introduced variables. - EmitDynamicLoadFromSlotFastCase(slot, NOT_INSIDE_TYPEOF, &slow, &done); - - __ bind(&slow); - Comment cmnt(masm_, "Lookup slot"); - __ li(a1, Operand(var->name())); - __ Push(cp, a1); // Context and name. - __ CallRuntime(Runtime::kLoadContextSlot, 2); - __ bind(&done); + // Three cases: global variables, lookup variables, and all other types of + // variables. + switch (var->location()) { + case Variable::UNALLOCATED: { + Comment cmnt(masm_, "Global variable"); + // Use inline caching. Variable name is passed in a2 and the global + // object (receiver) in a0. + __ lw(a0, GlobalObjectOperand()); + __ li(a2, Operand(var->name())); + Handle ic = isolate()->builtins()->LoadIC_Initialize(); + __ Call(ic, RelocInfo::CODE_TARGET_CONTEXT); + context()->Plug(v0); + break; + } - context()->Plug(v0); + case Variable::PARAMETER: + case Variable::LOCAL: + case Variable::CONTEXT: { + Comment cmnt(masm_, var->IsContextSlot() + ? "Context variable" + : "Stack variable"); + if (var->mode() != Variable::LET && var->mode() != Variable::CONST) { + context()->Plug(var); + } else { + // Let and const need a read barrier. + GetVar(v0, var); + __ LoadRoot(at, Heap::kTheHoleValueRootIndex); + __ subu(at, v0, at); // Sub as compare: at == 0 on eq. + if (var->mode() == Variable::LET) { + Label done; + __ Branch(&done, ne, at, Operand(zero_reg)); + __ li(a0, Operand(var->name())); + __ push(a0); + __ CallRuntime(Runtime::kThrowReferenceError, 1); + __ bind(&done); + } else { + __ LoadRoot(a0, Heap::kUndefinedValueRootIndex); + __ movz(v0, a0, at); // Conditional move: Undefined if TheHole. + } + context()->Plug(v0); + } + break; + } - } else { - Comment cmnt(masm_, (slot->type() == Slot::CONTEXT) - ? "Context slot" - : "Stack slot"); - if (var->mode() == Variable::CONST) { - // Constants may be the hole value if they have not been initialized. - // Unhole them. - MemOperand slot_operand = EmitSlotSearch(slot, a0); - __ lw(v0, slot_operand); - __ LoadRoot(at, Heap::kTheHoleValueRootIndex); - __ subu(at, v0, at); // Sub as compare: at == 0 on eq. - __ LoadRoot(a0, Heap::kUndefinedValueRootIndex); - __ movz(v0, a0, at); // Conditional move. - context()->Plug(v0); - } else if (var->mode() == Variable::LET) { - // Let bindings may be the hole value if they have not been initialized. - // Throw a type error in this case. - Label done; - MemOperand slot_operand = EmitSlotSearch(slot, a0); - __ lw(v0, slot_operand); - __ LoadRoot(a1, Heap::kTheHoleValueRootIndex); - __ Branch(&done, ne, v0, Operand(a1)); - __ li(v0, Operand(var->name())); - __ push(v0); - __ CallRuntime(Runtime::kThrowReferenceError, 1); + case Variable::LOOKUP: { + Label done, slow; + // Generate code for loading from variables potentially shadowed + // by eval-introduced variables. + EmitDynamicLookupFastCase(var, NOT_INSIDE_TYPEOF, &slow, &done); + __ bind(&slow); + Comment cmnt(masm_, "Lookup variable"); + __ li(a1, Operand(var->name())); + __ Push(cp, a1); // Context and name. + __ CallRuntime(Runtime::kLoadContextSlot, 2); __ bind(&done); context()->Plug(v0); - } else { - context()->Plug(slot); } } } @@ -1839,14 +1817,8 @@ void FullCodeGenerator::EmitAssignment(Expression* expr, int bailout_ast_id) { void FullCodeGenerator::EmitVariableAssignment(Variable* var, Token::Value op) { - ASSERT(var != NULL); - ASSERT(var->is_global() || var->rewrite() != NULL); - - if (var->is_global()) { - ASSERT(!var->is_this()); - // Assignment to a global variable. Use inline caching for the - // assignment. Right-hand-side value is passed in a0, variable name in - // a2, and the global object in a1. + if (var->IsUnallocated()) { + // Global var, const, or let. __ mov(a0, result_register()); __ li(a2, Operand(var->name())); __ lw(a1, GlobalObjectOperand()); @@ -1856,121 +1828,83 @@ void FullCodeGenerator::EmitVariableAssignment(Variable* var, __ Call(ic, RelocInfo::CODE_TARGET_CONTEXT); } else if (op == Token::INIT_CONST) { - // Like var declarations, const declarations are hoisted to function - // scope. However, unlike var initializers, const initializers are able - // to drill a hole to that function context, even from inside a 'with' - // context. We thus bypass the normal static scope lookup. - Slot* slot = var->rewrite(); - Label skip; - switch (slot->type()) { - case Slot::PARAMETER: - // No const parameters. - UNREACHABLE(); - break; - case Slot::LOCAL: - // Detect const reinitialization by checking for the hole value. - __ lw(a1, MemOperand(fp, SlotOffset(slot))); - __ LoadRoot(t0, Heap::kTheHoleValueRootIndex); - __ Branch(&skip, ne, a1, Operand(t0)); - __ sw(result_register(), MemOperand(fp, SlotOffset(slot))); - break; - case Slot::CONTEXT: - case Slot::LOOKUP: - __ push(result_register()); - __ li(a0, Operand(slot->var()->name())); - __ Push(cp, a0); // Context and name. - __ CallRuntime(Runtime::kInitializeConstContextSlot, 3); - break; - case Slot::GLOBAL: - UNREACHABLE(); + // Const initializers need a write barrier. + ASSERT(!var->IsParameter()); // No const parameters. + if (var->IsStackLocal()) { + Label skip; + __ lw(a1, StackOperand(var)); + __ LoadRoot(t0, Heap::kTheHoleValueRootIndex); + __ Branch(&skip, ne, a1, Operand(t0)); + __ sw(result_register(), StackOperand(var)); + __ bind(&skip); + } else { + ASSERT(var->IsContextSlot() || var->IsLookupSlot()); + // Like var declarations, const declarations are hoisted to function + // scope. However, unlike var initializers, const initializers are + // able to drill a hole to that function context, even from inside a + // 'with' context. We thus bypass the normal static scope lookup for + // var->IsContextSlot(). + __ push(v0); + __ li(a0, Operand(var->name())); + __ Push(cp, a0); // Context and name. + __ CallRuntime(Runtime::kInitializeConstContextSlot, 3); } - __ bind(&skip); + } else if (var->mode() == Variable::LET && op != Token::INIT_LET) { - // Perform the assignment for non-const variables. Const assignments - // are simply skipped. - Slot* slot = var->AsSlot(); - switch (slot->type()) { - case Slot::PARAMETER: - case Slot::LOCAL: { - Label assign; - // Check for an initialized let binding. - __ lw(a1, MemOperand(fp, SlotOffset(slot))); - __ LoadRoot(t0, Heap::kTheHoleValueRootIndex); - __ Branch(&assign, ne, a1, Operand(t0)); - __ li(a1, Operand(var->name())); - __ push(a1); - __ CallRuntime(Runtime::kThrowReferenceError, 1); - // Perform the assignment. - __ bind(&assign); - __ sw(result_register(), MemOperand(fp, SlotOffset(slot))); - break; - } - case Slot::CONTEXT: { - // Let variables may be the hole value if they have not been - // initialized. Throw a type error in this case. - Label assign; - MemOperand target = EmitSlotSearch(slot, a1); - // Check for an initialized let binding. - __ lw(a3, target); - __ LoadRoot(t0, Heap::kTheHoleValueRootIndex); - __ Branch(&assign, ne, a3, Operand(t0)); - __ li(a3, Operand(var->name())); - __ push(a3); - __ CallRuntime(Runtime::kThrowReferenceError, 1); - // Perform the assignment. - __ bind(&assign); - __ sw(result_register(), target); + // Non-initializing assignment to let variable needs a write barrier. + if (var->IsLookupSlot()) { + __ push(v0); // Value. + __ li(a1, Operand(var->name())); + __ li(a0, Operand(Smi::FromInt(strict_mode_flag()))); + __ Push(cp, a1, a0); // Context, name, strict mode. + __ CallRuntime(Runtime::kStoreContextSlot, 4); + } else { + ASSERT(var->IsStackAllocated() || var->IsContextSlot()); + Label assign; + MemOperand location = VarOperand(var, a1); + __ lw(a3, location); + __ LoadRoot(t0, Heap::kTheHoleValueRootIndex); + __ Branch(&assign, ne, a3, Operand(t0)); + __ li(a3, Operand(var->name())); + __ push(a3); + __ CallRuntime(Runtime::kThrowReferenceError, 1); + // Perform the assignment. + __ bind(&assign); + __ sw(result_register(), location); + if (var->IsContextSlot()) { // RecordWrite may destroy all its register arguments. __ mov(a3, result_register()); - int offset = Context::SlotOffset(slot->index()); + int offset = Context::SlotOffset(var->index()); __ RecordWrite(a1, Operand(offset), a2, a3); - break; } - case Slot::LOOKUP: - // Call the runtime for the assignment. - __ push(v0); // Value. - __ li(a1, Operand(slot->var()->name())); - __ li(a0, Operand(Smi::FromInt(strict_mode_flag()))); - __ Push(cp, a1, a0); // Context, name, strict mode. - __ CallRuntime(Runtime::kStoreContextSlot, 4); - break; } } else if (var->mode() != Variable::CONST) { - // Perform the assignment for non-const variables. Const assignments - // are simply skipped. - Slot* slot = var->rewrite(); - switch (slot->type()) { - case Slot::PARAMETER: - case Slot::LOCAL: - // Perform the assignment. - __ sw(result_register(), MemOperand(fp, SlotOffset(slot))); - break; - - case Slot::CONTEXT: { - MemOperand target = EmitSlotSearch(slot, a1); - // Perform the assignment and issue the write barrier. - __ sw(result_register(), target); - // RecordWrite may destroy all its register arguments. - __ mov(a3, result_register()); - int offset = FixedArray::kHeaderSize + slot->index() * kPointerSize; - __ RecordWrite(a1, Operand(offset), a2, a3); - break; + // Assignment to var or initializing assignment to let. + if (var->IsStackAllocated() || var->IsContextSlot()) { + MemOperand location = VarOperand(var, a1); + if (FLAG_debug_code && op == Token::INIT_LET) { + // Check for an uninitialized let binding. + __ lw(a2, location); + __ LoadRoot(t0, Heap::kTheHoleValueRootIndex); + __ Check(eq, "Let binding re-initialization.", a2, Operand(t0)); } - - case Slot::LOOKUP: - // Call the runtime for the assignment. - __ push(v0); // Value. - __ li(a1, Operand(slot->var()->name())); - __ li(a0, Operand(Smi::FromInt(strict_mode_flag()))); - __ Push(cp, a1, a0); // Context, name, strict mode. - __ CallRuntime(Runtime::kStoreContextSlot, 4); - break; - - case Slot::GLOBAL: - UNREACHABLE(); + // Perform the assignment. + __ sw(v0, location); + if (var->IsContextSlot()) { + __ mov(a3, v0); + __ RecordWrite(a1, Operand(Context::SlotOffset(var->index())), a2, a3); + } + } else { + ASSERT(var->IsLookupSlot()); + __ push(v0); // Value. + __ li(a1, Operand(var->name())); + __ li(a0, Operand(Smi::FromInt(strict_mode_flag()))); + __ Push(cp, a1, a0); // Context, name, strict mode. + __ CallRuntime(Runtime::kStoreContextSlot, 4); } } + // Non-initializing assignments to consts are ignored. } @@ -2211,10 +2145,11 @@ void FullCodeGenerator::VisitCall(Call* expr) { #endif Comment cmnt(masm_, "[ Call"); - Expression* fun = expr->expression(); - Variable* var = fun->AsVariableProxy()->AsVariable(); + Expression* callee = expr->expression(); + VariableProxy* proxy = callee->AsVariableProxy(); + Property* property = callee->AsProperty(); - if (var != NULL && var->is_possibly_eval()) { + if (proxy != NULL && proxy->var()->is_possibly_eval()) { // In a call to eval, we first call %ResolvePossiblyDirectEval to // resolve the function we need to call and the receiver of the // call. Then we call the resolved function using the given @@ -2223,7 +2158,7 @@ void FullCodeGenerator::VisitCall(Call* expr) { int arg_count = args->length(); { PreservePositionScope pos_scope(masm()->positions_recorder()); - VisitForStackValue(fun); + VisitForStackValue(callee); __ LoadRoot(a2, Heap::kUndefinedValueRootIndex); __ push(a2); // Reserved receiver slot. @@ -2231,16 +2166,16 @@ void FullCodeGenerator::VisitCall(Call* expr) { for (int i = 0; i < arg_count; i++) { VisitForStackValue(args->at(i)); } + // If we know that eval can only be shadowed by eval-introduced // variables we attempt to load the global eval function directly // in generated code. If we succeed, there is no need to perform a // context lookup in the runtime system. Label done; - if (var->rewrite() != NULL && var->mode() == Variable::DYNAMIC_GLOBAL) { + Variable* var = proxy->var(); + if (!var->IsUnallocated() && var->mode() == Variable::DYNAMIC_GLOBAL) { Label slow; - EmitLoadGlobalSlotCheckExtensions(var->rewrite(), - NOT_INSIDE_TYPEOF, - &slow); + EmitLoadGlobalCheckExtensions(var, NOT_INSIDE_TYPEOF, &slow); // Push the function and resolve eval. __ push(v0); EmitResolvePossiblyDirectEval(SKIP_CONTEXT_LOOKUP, arg_count); @@ -2248,14 +2183,12 @@ void FullCodeGenerator::VisitCall(Call* expr) { __ bind(&slow); } - // Push copy of the function (found below the arguments) and + // Push a copy of the function (found below the arguments) and // resolve eval. __ lw(a1, MemOperand(sp, (arg_count + 1) * kPointerSize)); __ push(a1); EmitResolvePossiblyDirectEval(PERFORM_CONTEXT_LOOKUP, arg_count); - if (done.is_linked()) { - __ bind(&done); - } + __ bind(&done); // The runtime call returns a pair of values in v0 (function) and // v1 (receiver). Touch up the stack with the right values. @@ -2271,30 +2204,26 @@ void FullCodeGenerator::VisitCall(Call* expr) { // Restore context register. __ lw(cp, MemOperand(fp, StandardFrameConstants::kContextOffset)); context()->DropAndPlug(1, v0); - } else if (var != NULL && !var->is_this() && var->is_global()) { + } else if (proxy != NULL && proxy->var()->IsUnallocated()) { // Push global object as receiver for the call IC. __ lw(a0, GlobalObjectOperand()); __ push(a0); - EmitCallWithIC(expr, var->name(), RelocInfo::CODE_TARGET_CONTEXT); - } else if (var != NULL && var->rewrite() != NULL && - var->rewrite()->type() == Slot::LOOKUP) { + EmitCallWithIC(expr, proxy->name(), RelocInfo::CODE_TARGET_CONTEXT); + } else if (proxy != NULL && proxy->var()->IsLookupSlot()) { // Call to a lookup slot (dynamically introduced variable). Label slow, done; { PreservePositionScope scope(masm()->positions_recorder()); // Generate code for loading from variables potentially shadowed // by eval-introduced variables. - EmitDynamicLoadFromSlotFastCase(var->rewrite(), - NOT_INSIDE_TYPEOF, - &slow, - &done); + EmitDynamicLookupFastCase(proxy->var(), NOT_INSIDE_TYPEOF, &slow, &done); } __ bind(&slow); // Call the runtime to find the function to call (returned in v0) // and the object holding it (returned in v1). __ push(context_register()); - __ li(a2, Operand(var->name())); + __ li(a2, Operand(proxy->name())); __ push(a2); __ CallRuntime(Runtime::kLoadContextSlot, 2); __ Push(v0, v1); // Function, receiver. @@ -2319,26 +2248,21 @@ void FullCodeGenerator::VisitCall(Call* expr) { // by LoadContextSlot. That object could be the hole if the // receiver is implicitly the global object. EmitCallWithStub(expr, RECEIVER_MIGHT_BE_IMPLICIT); - } else if (fun->AsProperty() != NULL) { - // Call to an object property. - Property* prop = fun->AsProperty(); - Literal* key = prop->key()->AsLiteral(); - if (key != NULL && key->handle()->IsSymbol()) { - // Call to a named property, use call IC. - { PreservePositionScope scope(masm()->positions_recorder()); - VisitForStackValue(prop->obj()); - } - EmitCallWithIC(expr, key->handle(), RelocInfo::CODE_TARGET); + } else if (property != NULL) { + { PreservePositionScope scope(masm()->positions_recorder()); + VisitForStackValue(property->obj()); + } + if (property->key()->IsPropertyName()) { + EmitCallWithIC(expr, + property->key()->AsLiteral()->handle(), + RelocInfo::CODE_TARGET); } else { - // Call to a keyed property. - { PreservePositionScope scope(masm()->positions_recorder()); - VisitForStackValue(prop->obj()); - } - EmitKeyedCallWithIC(expr, prop->key()); + EmitKeyedCallWithIC(expr, property->key()); } } else { + // Call to an arbitrary expression not handled specially above. { PreservePositionScope scope(masm()->positions_recorder()); - VisitForStackValue(fun); + VisitForStackValue(callee); } // Load global receiver object. __ lw(a1, GlobalObjectOperand()); @@ -3668,32 +3592,32 @@ void FullCodeGenerator::VisitUnaryOperation(UnaryOperation* expr) { switch (expr->op()) { case Token::DELETE: { Comment cmnt(masm_, "[ UnaryOperation (DELETE)"); - Property* prop = expr->expression()->AsProperty(); - Variable* var = expr->expression()->AsVariableProxy()->AsVariable(); + Property* property = expr->expression()->AsProperty(); + VariableProxy* proxy = expr->expression()->AsVariableProxy(); - if (prop != NULL) { - VisitForStackValue(prop->obj()); - VisitForStackValue(prop->key()); + if (property != NULL) { + VisitForStackValue(property->obj()); + VisitForStackValue(property->key()); __ li(a1, Operand(Smi::FromInt(strict_mode_flag()))); __ push(a1); __ InvokeBuiltin(Builtins::DELETE, CALL_FUNCTION); context()->Plug(v0); - } else if (var != NULL) { + } else if (proxy != NULL) { + Variable* var = proxy->var(); // Delete of an unqualified identifier is disallowed in strict mode - // but "delete this" is. + // but "delete this" is allowed. ASSERT(strict_mode_flag() == kNonStrictMode || var->is_this()); - if (var->is_global()) { + if (var->IsUnallocated()) { __ lw(a2, GlobalObjectOperand()); __ li(a1, Operand(var->name())); __ li(a0, Operand(Smi::FromInt(kNonStrictMode))); __ Push(a2, a1, a0); __ InvokeBuiltin(Builtins::DELETE, CALL_FUNCTION); context()->Plug(v0); - } else if (var->rewrite() != NULL && - var->rewrite()->type() != Slot::LOOKUP) { + } else if (var->IsStackAllocated() || var->IsContextSlot()) { // Result of deleting non-global, non-dynamic variables is false. // The subexpression does not have side effects. - context()->Plug(false); + context()->Plug(var->is_this()); } else { // Non-global variable. Call the runtime to try to delete from the // context where the variable was introduced. @@ -3968,8 +3892,10 @@ void FullCodeGenerator::VisitCountOperation(CountOperation* expr) { void FullCodeGenerator::VisitForTypeofValue(Expression* expr) { + ASSERT(!context()->IsEffect()); + ASSERT(!context()->IsTest()); VariableProxy* proxy = expr->AsVariableProxy(); - if (proxy != NULL && !proxy->var()->is_this() && proxy->var()->is_global()) { + if (proxy != NULL && proxy->var()->IsUnallocated()) { Comment cmnt(masm_, "Global variable"); __ lw(a0, GlobalObjectOperand()); __ li(a2, Operand(proxy->name())); @@ -3979,15 +3905,12 @@ void FullCodeGenerator::VisitForTypeofValue(Expression* expr) { __ Call(ic); PrepareForBailout(expr, TOS_REG); context()->Plug(v0); - } else if (proxy != NULL && - proxy->var()->rewrite() != NULL && - proxy->var()->rewrite()->type() == Slot::LOOKUP) { + } else if (proxy != NULL && proxy->var()->IsLookupSlot()) { Label done, slow; // Generate code for loading from variables potentially shadowed // by eval-introduced variables. - Slot* slot = proxy->var()->rewrite(); - EmitDynamicLoadFromSlotFastCase(slot, INSIDE_TYPEOF, &slow, &done); + EmitDynamicLookupFastCase(proxy->var(), INSIDE_TYPEOF, &slow, &done); __ bind(&slow); __ li(a0, Operand(proxy->name())); diff --git a/src/objects-debug.cc b/src/objects-debug.cc index 4da360b81e3..a3dc19357f1 100644 --- a/src/objects-debug.cc +++ b/src/objects-debug.cc @@ -257,9 +257,9 @@ void JSObject::JSObjectVerify() { (map()->inobject_properties() + properties()->length() - map()->NextFreePropertyIndex())); } - ASSERT(map()->has_fast_elements() == - (elements()->map() == GetHeap()->fixed_array_map() || - elements()->map() == GetHeap()->fixed_cow_array_map())); + ASSERT_EQ(map()->has_fast_elements(), + (elements()->map() == GetHeap()->fixed_array_map() || + elements()->map() == GetHeap()->fixed_cow_array_map())); ASSERT(map()->has_fast_elements() == HasFastElements()); } diff --git a/src/objects.cc b/src/objects.cc index 76b57d86aa2..00ea4f23db5 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -41,7 +41,6 @@ #include "objects-visiting.h" #include "macro-assembler.h" #include "safepoint-table.h" -#include "scanner-base.h" #include "string-stream.h" #include "utils.h" #include "vm-state-inl.h" diff --git a/src/parser.cc b/src/parser.cc index 056133449cd..d64e7b7600e 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -39,6 +39,7 @@ #include "platform.h" #include "preparser.h" #include "runtime.h" +#include "scanner-character-streams.h" #include "scopeinfo.h" #include "string-stream.h" @@ -2216,8 +2217,6 @@ TryStatement* Parser::ParseTryStatement(bool* ok) { Expect(Token::RPAREN, CHECK_OK); if (peek() == Token::LBRACE) { - // Rewrite the catch body { B } to a block: - // { { B } ExitContext; }. Target target(&this->target_stack_, &catch_collector); catch_scope = NewScope(top_scope_, Scope::CATCH_SCOPE, inside_with()); if (top_scope_->is_strict_mode()) { @@ -2226,14 +2225,11 @@ TryStatement* Parser::ParseTryStatement(bool* ok) { Variable::Mode mode = harmony_block_scoping_ ? Variable::LET : Variable::VAR; catch_variable = catch_scope->DeclareLocal(name, mode); - catch_block = new(zone()) Block(isolate(), NULL, 2, false); Scope* saved_scope = top_scope_; top_scope_ = catch_scope; - Block* catch_body = ParseBlock(NULL, CHECK_OK); + catch_block = ParseBlock(NULL, CHECK_OK); top_scope_ = saved_scope; - catch_block->AddStatement(catch_body); - catch_block->AddStatement(new(zone()) ExitContextStatement()); } else { Expect(Token::LBRACE, CHECK_OK); } diff --git a/src/parser.h b/src/parser.h index 381ff27143b..3312f2f56a8 100644 --- a/src/parser.h +++ b/src/parser.h @@ -30,10 +30,9 @@ #include "allocation.h" #include "ast.h" -#include "scanner.h" -#include "scopes.h" #include "preparse-data-format.h" #include "preparse-data.h" +#include "scopes.h" namespace v8 { namespace internal { diff --git a/src/platform-win32.cc b/src/platform-win32.cc index e5df5ff3bf5..4da0101eaee 100644 --- a/src/platform-win32.cc +++ b/src/platform-win32.cc @@ -1,4 +1,4 @@ -// Copyright 2006-2008 the V8 project authors. All rights reserved. +// Copyright 2011 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,76 +35,8 @@ #include "platform.h" #include "vm-state-inl.h" -// Extra POSIX/ANSI routines for Win32 when when using Visual Studio C++. Please -// refer to The Open Group Base Specification for specification of the correct -// semantics for these functions. -// (http://www.opengroup.org/onlinepubs/000095399/) #ifdef _MSC_VER -namespace v8 { -namespace internal { - -// Test for finite value - usually defined in math.h -int isfinite(double x) { - return _finite(x); -} - -} // namespace v8 -} // namespace internal - -// Test for a NaN (not a number) value - usually defined in math.h -int isnan(double x) { - return _isnan(x); -} - - -// Test for infinity - usually defined in math.h -int isinf(double x) { - return (_fpclass(x) & (_FPCLASS_PINF | _FPCLASS_NINF)) != 0; -} - - -// Test if x is less than y and both nominal - usually defined in math.h -int isless(double x, double y) { - return isnan(x) || isnan(y) ? 0 : x < y; -} - - -// Test if x is greater than y and both nominal - usually defined in math.h -int isgreater(double x, double y) { - return isnan(x) || isnan(y) ? 0 : x > y; -} - - -// Classify floating point number - usually defined in math.h -int fpclassify(double x) { - // Use the MS-specific _fpclass() for classification. - int flags = _fpclass(x); - - // Determine class. We cannot use a switch statement because - // the _FPCLASS_ constants are defined as flags. - if (flags & (_FPCLASS_PN | _FPCLASS_NN)) return FP_NORMAL; - if (flags & (_FPCLASS_PZ | _FPCLASS_NZ)) return FP_ZERO; - if (flags & (_FPCLASS_PD | _FPCLASS_ND)) return FP_SUBNORMAL; - if (flags & (_FPCLASS_PINF | _FPCLASS_NINF)) return FP_INFINITE; - - // All cases should be covered by the code above. - ASSERT(flags & (_FPCLASS_SNAN | _FPCLASS_QNAN)); - return FP_NAN; -} - - -// Test sign - usually defined in math.h -int signbit(double x) { - // We need to take care of the special case of both positive - // and negative versions of zero. - if (x == 0) - return _fpclass(x) & _FPCLASS_NZ; - else - return x < 0; -} - - // Case-insensitive bounded string comparisons. Use stricmp() on Win32. Usually // defined in strings.h. int strncasecmp(const char* s1, const char* s2, int n) { diff --git a/src/platform.h b/src/platform.h index 6b2348c8903..034fe3404de 100644 --- a/src/platform.h +++ b/src/platform.h @@ -44,7 +44,22 @@ #ifndef V8_PLATFORM_H_ #define V8_PLATFORM_H_ -#define V8_INFINITY INFINITY +#ifdef __sun +# ifndef signbit +int signbit(double x); +# endif +#endif + +// GCC specific stuff +#ifdef __GNUC__ + +// Needed for va_list on at least MinGW and Android. +#include + +#define __GNUC_VERSION__ (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) + +#endif // __GNUC__ + // Windows specific stuff. #ifdef WIN32 @@ -52,27 +67,7 @@ // Microsoft Visual C++ specific stuff. #ifdef _MSC_VER -enum { - FP_NAN, - FP_INFINITE, - FP_ZERO, - FP_SUBNORMAL, - FP_NORMAL -}; - -#undef V8_INFINITY -#define V8_INFINITY HUGE_VAL - -namespace v8 { -namespace internal { -int isfinite(double x); -} } -int isnan(double x); -int isinf(double x); -int isless(double x, double y); -int isgreater(double x, double y); -int fpclassify(double x); -int signbit(double x); +#include "win32-math.h" int strncasecmp(const char* s1, const char* s2, int n); @@ -83,36 +78,6 @@ int random(); #endif // WIN32 - -#ifdef __sun -# ifndef signbit -int signbit(double x); -# endif -#endif - - -// GCC specific stuff -#ifdef __GNUC__ - -// Needed for va_list on at least MinGW and Android. -#include - -#define __GNUC_VERSION__ (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) - -// Unfortunately, the INFINITY macro cannot be used with the '-pedantic' -// warning flag and certain versions of GCC due to a bug: -// http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11931 -// For now, we use the more involved template-based version from , but -// only when compiling with GCC versions affected by the bug (2.96.x - 4.0.x) -// __GNUC_PREREQ is not defined in GCC for Mac OS X, so we define our own macro -#if __GNUC_VERSION__ >= 29600 && __GNUC_VERSION__ < 40100 -#include -#undef V8_INFINITY -#define V8_INFINITY std::numeric_limits::infinity() -#endif - -#endif // __GNUC__ - #include "atomicops.h" #include "platform-tls.h" #include "utils.h" diff --git a/src/preparser-api.cc b/src/preparser-api.cc index 80656d5d122..899489e2500 100644 --- a/src/preparser-api.cc +++ b/src/preparser-api.cc @@ -25,15 +25,19 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#ifdef _MSC_VER +#define V8_WIN32_LEAN_AND_MEAN +#include "win32-headers.h" +#endif + #include "../include/v8-preparser.h" #include "globals.h" -#include "flags.h" #include "checks.h" #include "allocation.h" #include "utils.h" #include "list.h" -#include "scanner-base.h" +#include "hashmap.h" #include "preparse-data-format.h" #include "preparse-data.h" #include "preparser.h" diff --git a/src/preparser.cc b/src/preparser.cc index 1a3dd737c5d..47d21bac15e 100644 --- a/src/preparser.cc +++ b/src/preparser.cc @@ -25,22 +25,31 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#include + #include "../include/v8stdint.h" -#include "unicode.h" -#include "globals.h" -#include "checks.h" + #include "allocation.h" -#include "utils.h" +#include "checks.h" +#include "conversions.h" +#include "conversions-inl.h" +#include "globals.h" +#include "hashmap.h" #include "list.h" - -#include "scanner-base.h" #include "preparse-data-format.h" #include "preparse-data.h" #include "preparser.h" - -#include "conversions-inl.h" +#include "unicode.h" +#include "utils.h" namespace v8 { + +#ifdef _MSC_VER +// Usually defined in math.h, but not in MSVC. +// Abstracted to work +int isfinite(double value); +#endif + namespace preparser { // Preparsing checks a JavaScript program and emits preparse-data that helps @@ -68,27 +77,22 @@ void PreParser::ReportUnexpectedToken(i::Token::Value token) { // Four of the tokens are treated specially switch (token) { case i::Token::EOS: - return ReportMessageAt(source_location.beg_pos, source_location.end_pos, - "unexpected_eos", NULL); + return ReportMessageAt(source_location, "unexpected_eos", NULL); case i::Token::NUMBER: - return ReportMessageAt(source_location.beg_pos, source_location.end_pos, - "unexpected_token_number", NULL); + return ReportMessageAt(source_location, "unexpected_token_number", NULL); case i::Token::STRING: - return ReportMessageAt(source_location.beg_pos, source_location.end_pos, - "unexpected_token_string", NULL); + return ReportMessageAt(source_location, "unexpected_token_string", NULL); case i::Token::IDENTIFIER: - return ReportMessageAt(source_location.beg_pos, source_location.end_pos, + return ReportMessageAt(source_location, "unexpected_token_identifier", NULL); case i::Token::FUTURE_RESERVED_WORD: - return ReportMessageAt(source_location.beg_pos, source_location.end_pos, - "unexpected_reserved", NULL); + return ReportMessageAt(source_location, "unexpected_reserved", NULL); case i::Token::FUTURE_STRICT_RESERVED_WORD: - return ReportMessageAt(source_location.beg_pos, source_location.end_pos, + return ReportMessageAt(source_location, "unexpected_strict_reserved", NULL); default: const char* name = i::Token::String(token); - ReportMessageAt(source_location.beg_pos, source_location.end_pos, - "unexpected_token", name); + ReportMessageAt(source_location, "unexpected_token", name); } } @@ -98,7 +102,7 @@ void PreParser::ReportUnexpectedToken(i::Token::Value token) { void PreParser::CheckOctalLiteral(int beg_pos, int end_pos, bool* ok) { i::Scanner::Location octal = scanner_->octal_position(); if (beg_pos <= octal.beg_pos && octal.end_pos <= end_pos) { - ReportMessageAt(octal.beg_pos, octal.end_pos, "strict_octal_literal", NULL); + ReportMessageAt(octal, "strict_octal_literal", NULL); scanner_->clear_octal_position(); *ok = false; } @@ -251,7 +255,7 @@ PreParser::Statement PreParser::ParseFunctionDeclaration(bool* ok) { if (identifier.IsFutureStrictReserved()) { type = "strict_reserved_word"; } - ReportMessageAt(location.beg_pos, location.end_pos, type, NULL); + ReportMessageAt(location, type, NULL); *ok = false; } return Statement::FunctionDeclaration(); @@ -313,8 +317,7 @@ PreParser::Statement PreParser::ParseVariableDeclarations( } else if (peek() == i::Token::CONST) { if (strict_mode()) { i::Scanner::Location location = scanner_->peek_location(); - ReportMessageAt(location.beg_pos, location.end_pos, - "strict_const", NULL); + ReportMessageAt(location, "strict_const", NULL); *ok = false; return Statement::Default(); } @@ -475,8 +478,7 @@ PreParser::Statement PreParser::ParseWithStatement(bool* ok) { Expect(i::Token::WITH, CHECK_OK); if (strict_mode()) { i::Scanner::Location location = scanner_->location(); - ReportMessageAt(location.beg_pos, location.end_pos, - "strict_mode_with", NULL); + ReportMessageAt(location, "strict_mode_with", NULL); *ok = false; return Statement::Default(); } @@ -612,8 +614,7 @@ PreParser::Statement PreParser::ParseThrowStatement(bool* ok) { Expect(i::Token::THROW, CHECK_OK); if (scanner_->HasAnyLineTerminatorBeforeNext()) { i::JavaScriptScanner::Location pos = scanner_->location(); - ReportMessageAt(pos.beg_pos, pos.end_pos, - "newline_after_throw", NULL); + ReportMessageAt(pos, "newline_after_throw", NULL); *ok = false; return Statement::Default(); } @@ -1025,8 +1026,7 @@ PreParser::Expression PreParser::ParsePrimaryExpression(bool* ok) { if (strict_mode()) { Next(); i::Scanner::Location location = scanner_->location(); - ReportMessageAt(location.beg_pos, location.end_pos, - "strict_reserved_word", NULL); + ReportMessageAt(location, "strict_reserved_word", NULL); *ok = false; return Expression::Default(); } @@ -1107,6 +1107,39 @@ PreParser::Expression PreParser::ParseArrayLiteral(bool* ok) { return Expression::Default(); } +void PreParser::CheckDuplicate(DuplicateFinder* finder, + i::Token::Value property, + int type, + bool* ok) { + int old_type; + if (property == i::Token::NUMBER) { + old_type = finder->AddNumber(scanner_->literal_ascii_string(), type); + } else if (scanner_->is_literal_ascii()) { + old_type = finder->AddAsciiSymbol(scanner_->literal_ascii_string(), + type); + } else { + old_type = finder->AddUC16Symbol(scanner_->literal_uc16_string(), type); + } + if (HasConflict(old_type, type)) { + if (IsDataDataConflict(old_type, type)) { + // Both are data properties. + if (!strict_mode()) return; + ReportMessageAt(scanner_->location(), + "strict_duplicate_property", NULL); + } else if (IsDataAccessorConflict(old_type, type)) { + // Both a data and an accessor property with the same name. + ReportMessageAt(scanner_->location(), + "accessor_data_property", NULL); + } else { + ASSERT(IsAccessorAccessorConflict(old_type, type)); + // Both accessors of the same type. + ReportMessageAt(scanner_->location(), + "accessor_get_set", NULL); + } + *ok = false; + } +} + PreParser::Expression PreParser::ParseObjectLiteral(bool* ok) { // ObjectLiteral :: @@ -1116,6 +1149,7 @@ PreParser::Expression PreParser::ParseObjectLiteral(bool* ok) { // )*[','] '}' Expect(i::Token::LBRACE, CHECK_OK); + DuplicateFinder duplicate_finder(scanner_->unicode_cache()); while (peek() != i::Token::RBRACE) { i::Token::Value next = peek(); switch (next) { @@ -1140,24 +1174,30 @@ PreParser::Expression PreParser::ParseObjectLiteral(bool* ok) { if (!is_keyword) { LogSymbol(); } + PropertyType type = is_getter ? kGetterProperty : kSetterProperty; + CheckDuplicate(&duplicate_finder, name, type, CHECK_OK); ParseFunctionLiteral(CHECK_OK); if (peek() != i::Token::RBRACE) { Expect(i::Token::COMMA, CHECK_OK); } continue; // restart the while } + CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK); break; } case i::Token::STRING: Consume(next); + CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK); GetStringSymbol(); break; case i::Token::NUMBER: Consume(next); + CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK); break; default: if (i::Token::IsKeyword(next)) { Consume(next); + CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK); } else { // Unexpected token. *ok = false; @@ -1182,9 +1222,7 @@ PreParser::Expression PreParser::ParseRegExpLiteral(bool seen_equal, bool* ok) { if (!scanner_->ScanRegExpPattern(seen_equal)) { Next(); - i::JavaScriptScanner::Location location = scanner_->location(); - ReportMessageAt(location.beg_pos, location.end_pos, - "unterminated_regexp", NULL); + ReportMessageAt(scanner_->location(), "unterminated_regexp", NULL); *ok = false; return Expression::Default(); } @@ -1193,9 +1231,7 @@ PreParser::Expression PreParser::ParseRegExpLiteral(bool seen_equal, if (!scanner_->ScanRegExpFlags()) { Next(); - i::JavaScriptScanner::Location location = scanner_->location(); - ReportMessageAt(location.beg_pos, location.end_pos, - "invalid_regexp_flags", NULL); + ReportMessageAt(scanner_->location(), "invalid_regexp_flags", NULL); *ok = false; return Expression::Default(); } @@ -1240,6 +1276,7 @@ PreParser::Expression PreParser::ParseFunctionLiteral(bool* ok) { Expect(i::Token::LPAREN, CHECK_OK); int start_position = scanner_->location().beg_pos; bool done = (peek() == i::Token::RPAREN); + DuplicateFinder duplicate_finder(scanner_->unicode_cache()); while (!done) { Identifier id = ParseIdentifier(CHECK_OK); if (!id.IsValidStrictVariable()) { @@ -1248,6 +1285,20 @@ PreParser::Expression PreParser::ParseFunctionLiteral(bool* ok) { id, CHECK_OK); } + int prev_value; + if (scanner_->is_literal_ascii()) { + prev_value = + duplicate_finder.AddAsciiSymbol(scanner_->literal_ascii_string(), 1); + } else { + prev_value = + duplicate_finder.AddUC16Symbol(scanner_->literal_uc16_string(), 1); + } + + if (prev_value != 0) { + SetStrictModeViolation(scanner_->location(), + "strict_param_dupe", + CHECK_OK); + } done = (peek() == i::Token::RPAREN); if (!done) { Expect(i::Token::COMMA, CHECK_OK); @@ -1399,13 +1450,18 @@ void PreParser::SetStrictModeViolation(i::Scanner::Location location, const char* type, bool* ok) { if (strict_mode()) { - ReportMessageAt(location.beg_pos, location.end_pos, type, NULL); + ReportMessageAt(location, type, NULL); *ok = false; return; } // Delay report in case this later turns out to be strict code // (i.e., for function names and parameters prior to a "use strict" // directive). + // It's safe to overwrite an existing violation. + // It's either from a function that turned out to be non-strict, + // or it's in the current function (and we just need to report + // one error), or it's in a unclosed nesting function that wasn't + // strict (otherwise we would already be in strict mode). strict_mode_violation_location_ = location; strict_mode_violation_type_ = type; } @@ -1417,11 +1473,9 @@ void PreParser::CheckDelayedStrictModeViolation(int beg_pos, i::Scanner::Location location = strict_mode_violation_location_; if (location.IsValid() && location.beg_pos > beg_pos && location.end_pos < end_pos) { - ReportMessageAt(location.beg_pos, location.end_pos, - strict_mode_violation_type_, NULL); + ReportMessageAt(location, strict_mode_violation_type_, NULL); *ok = false; } - strict_mode_violation_location_ = i::Scanner::Location::invalid(); } @@ -1436,7 +1490,7 @@ void PreParser::StrictModeIdentifierViolation(i::Scanner::Location location, type = "strict_reserved_word"; } if (strict_mode()) { - ReportMessageAt(location.beg_pos, location.end_pos, type, NULL); + ReportMessageAt(location, type, NULL); *ok = false; return; } @@ -1488,4 +1542,138 @@ bool PreParser::peek_any_identifier() { next == i::Token::FUTURE_RESERVED_WORD || next == i::Token::FUTURE_STRICT_RESERVED_WORD; } + + +int DuplicateFinder::AddAsciiSymbol(i::Vector key, int value) { + return AddSymbol(i::Vector::cast(key), true, value); +} + +int DuplicateFinder::AddUC16Symbol(i::Vector key, int value) { + return AddSymbol(i::Vector::cast(key), false, value); +} + +int DuplicateFinder::AddSymbol(i::Vector key, + bool is_ascii, + int value) { + uint32_t hash = Hash(key, is_ascii); + byte* encoding = BackupKey(key, is_ascii); + i::HashMap::Entry* entry = map_.Lookup(encoding, hash, true); + int old_value = static_cast(reinterpret_cast(entry->value)); + entry->value = + reinterpret_cast(static_cast(value | old_value)); + return old_value; +} + + +int DuplicateFinder::AddNumber(i::Vector key, int value) { + ASSERT(key.length() > 0); + // Quick check for already being in canonical form. + if (IsNumberCanonical(key)) { + return AddAsciiSymbol(key, value); + } + + int flags = i::ALLOW_HEX | i::ALLOW_OCTALS; + double double_value = StringToDouble(unicode_constants_, key, flags, 0.0); + int length; + const char* string; + if (!isfinite(double_value)) { + string = "Infinity"; + length = 8; // strlen("Infinity"); + } else { + string = DoubleToCString(double_value, + i::Vector(number_buffer_, kBufferSize)); + length = i::StrLength(string); + } + return AddSymbol(i::Vector(reinterpret_cast(string), + length), true, value); +} + + +bool DuplicateFinder::IsNumberCanonical(i::Vector number) { + // Test for a safe approximation of number literals that are already + // in canonical form: max 15 digits, no leading zeroes, except an + // integer part that is a single zero, and no trailing zeros below + // the decimal point. + int pos = 0; + int length = number.length(); + if (number.length() > 15) return false; + if (number[pos] == '0') { + pos++; + } else { + while (pos < length && + static_cast(number[pos] - '0') <= ('9' - '0')) pos++; + } + if (length == pos) return true; + if (number[pos] != '.') return false; + pos++; + bool invalid_last_digit = true; + while (pos < length) { + byte digit = number[pos] - '0'; + if (digit > '9' - '0') return false; + invalid_last_digit = (digit == 0); + pos++; + } + return !invalid_last_digit; +} + + +uint32_t DuplicateFinder::Hash(i::Vector key, bool is_ascii) { + // Primitive hash function, almost identical to the one used + // for strings (except that it's seeded by the length and ASCII-ness). + int length = key.length(); + uint32_t hash = (length << 1) | (is_ascii ? 1 : 0) ; + for (int i = 0; i < length; i++) { + uint32_t c = key[i]; + hash = (hash + c) * 1025; + hash ^= (hash >> 6); + } + return hash; +} + + +bool DuplicateFinder::Match(void* first, void* second) { + // Decode lengths. + // Length + ASCII-bit is encoded as base 128, most significant heptet first, + // with a 8th bit being non-zero while there are more heptets. + // The value encodes the number of bytes following, and whether the original + // was ASCII. + byte* s1 = reinterpret_cast(first); + byte* s2 = reinterpret_cast(second); + uint32_t length_ascii_field = 0; + byte c1; + do { + c1 = *s1; + if (c1 != *s2) return false; + length_ascii_field = (length_ascii_field << 7) | (c1 & 0x7f); + s1++; + s2++; + } while ((c1 & 0x80) != 0); + int length = static_cast(length_ascii_field >> 1); + return memcmp(s1, s2, length) == 0; +} + + +byte* DuplicateFinder::BackupKey(i::Vector bytes, + bool is_ascii) { + uint32_t ascii_length = (bytes.length() << 1) | (is_ascii ? 1 : 0); + backing_store_.StartSequence(); + // Emit ascii_length as base-128 encoded number, with the 7th bit set + // on the byte of every heptet except the last, least significant, one. + if (ascii_length >= (1 << 7)) { + if (ascii_length >= (1 << 14)) { + if (ascii_length >= (1 << 21)) { + if (ascii_length >= (1 << 28)) { + backing_store_.Add(static_cast((ascii_length >> 28) | 0x80)); + } + backing_store_.Add(static_cast((ascii_length >> 21) | 0x80u)); + } + backing_store_.Add(static_cast((ascii_length >> 14) | 0x80u)); + } + backing_store_.Add(static_cast((ascii_length >> 7) | 0x80u)); + } + backing_store_.Add(static_cast(ascii_length & 0x7f)); + + backing_store_.AddBlock(bytes); + return backing_store_.EndSequence().start(); +} } } // v8::preparser diff --git a/src/preparser.h b/src/preparser.h index cd0a530e8d0..b97b7cff60e 100644 --- a/src/preparser.h +++ b/src/preparser.h @@ -28,9 +28,19 @@ #ifndef V8_PREPARSER_H #define V8_PREPARSER_H +#include "token.h" +#include "scanner.h" + namespace v8 { + +namespace internal { +class UnicodeCache; +} + namespace preparser { +typedef uint8_t byte; + // Preparsing checks a JavaScript program and emits preparse-data that helps // a later parsing to be faster. // See preparse-data-format.h for the data format. @@ -46,6 +56,53 @@ namespace preparser { namespace i = v8::internal; +class DuplicateFinder { + public: + explicit DuplicateFinder(i::UnicodeCache* constants) + : unicode_constants_(constants), + backing_store_(16), + map_(&Match) { } + + int AddAsciiSymbol(i::Vector key, int value); + int AddUC16Symbol(i::Vector key, int value); + // Add a a number literal by converting it (if necessary) + // to the string that ToString(ToNumber(literal)) would generate. + // and then adding that string with AddAsciiSymbol. + // This string is the actual value used as key in an object literal, + // and the one that must be different from the other keys. + int AddNumber(i::Vector key, int value); + + private: + int AddSymbol(i::Vector key, bool is_ascii, int value); + // Backs up the key and its length in the backing store. + // The backup is stored with a base 127 encoding of the + // length (plus a bit saying whether the string is ASCII), + // followed by the bytes of the key. + byte* BackupKey(i::Vector key, bool is_ascii); + + // Compare two encoded keys (both pointing into the backing store) + // for having the same base-127 encoded lengths and ASCII-ness, + // and then having the same 'length' bytes following. + static bool Match(void* first, void* second); + // Creates a hash from a sequence of bytes. + static uint32_t Hash(i::Vector key, bool is_ascii); + // Checks whether a string containing a JS number is its canonical + // form. + static bool IsNumberCanonical(i::Vector key); + + // Size of buffer. Sufficient for using it to call DoubleToCString in + // from conversions.h. + static const int kBufferSize = 100; + + i::UnicodeCache* unicode_constants_; + // Backing store used to store strings used as hashmap keys. + i::SequenceCollector backing_store_; + i::HashMap map_; + // Buffer used for string->number->canonical string conversions. + char number_buffer_[kBufferSize]; +}; + + class PreParser { public: enum PreParseResult { @@ -53,7 +110,7 @@ class PreParser { kPreParseSuccess }; - ~PreParser() { } + ~PreParser() {} // Pre-parse the program from the character stream; returns true on // success (even if parsing failed, the pre-parse data successfully @@ -67,6 +124,45 @@ class PreParser { } private: + // Used to detect duplicates in object literals. Each of the values + // kGetterProperty, kSetterProperty and kValueProperty represents + // a type of object literal property. When parsing a property, its + // type value is stored in the DuplicateFinder for the property name. + // Values are chosen so that having intersection bits means the there is + // an incompatibility. + // I.e., you can add a getter to a property that already has a setter, since + // kGetterProperty and kSetterProperty doesn't intersect, but not if it + // already has a getter or a value. Adding the getter to an existing + // setter will store the value (kGetterProperty | kSetterProperty), which + // is incompatible with adding any further properties. + enum PropertyType { + kNone = 0, + // Bit patterns representing different object literal property types. + kGetterProperty = 1, + kSetterProperty = 2, + kValueProperty = 7, + // Helper constants. + kValueFlag = 4 + }; + + // Checks the type of conflict based on values coming from PropertyType. + bool HasConflict(int type1, int type2) { return (type1 & type2) != 0; } + bool IsDataDataConflict(int type1, int type2) { + return ((type1 & type2) & kValueFlag) != 0; + } + bool IsDataAccessorConflict(int type1, int type2) { + return ((type1 ^ type2) & kValueFlag) != 0; + } + bool IsAccessorAccessorConflict(int type1, int type2) { + return ((type1 | type2) & kValueFlag) == 0; + } + + + void CheckDuplicate(DuplicateFinder* finder, + i::Token::Value property, + int type, + bool* ok); + // These types form an algebra over syntactic categories that is just // rich enough to let us recognize and propagate the constructs that // are either being counted in the preparser data, or is important @@ -371,6 +467,11 @@ class PreParser { // Report syntax error void ReportUnexpectedToken(i::Token::Value token); + void ReportMessageAt(i::Scanner::Location location, + const char* type, + const char* name_opt) { + log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt); + } void ReportMessageAt(int start_pos, int end_pos, const char* type, diff --git a/src/prettyprinter.cc b/src/prettyprinter.cc index 2a415923312..36860a36f92 100644 --- a/src/prettyprinter.cc +++ b/src/prettyprinter.cc @@ -131,11 +131,6 @@ void PrettyPrinter::VisitWithStatement(WithStatement* node) { } -void PrettyPrinter::VisitExitContextStatement(ExitContextStatement* node) { - Print(""); -} - - void PrettyPrinter::VisitSwitchStatement(SwitchStatement* node) { PrintLabels(node->labels()); Print("switch ("); @@ -783,11 +778,6 @@ void AstPrinter::VisitWithStatement(WithStatement* node) { } -void AstPrinter::VisitExitContextStatement(ExitContextStatement* node) { - PrintIndented("EXIT CONTEXT\n"); -} - - void AstPrinter::VisitSwitchStatement(SwitchStatement* node) { IndentedScope indent(this, "SWITCH"); PrintLabelsIndented(NULL, node->labels()); @@ -1187,11 +1177,6 @@ void JsonAstBuilder::VisitWithStatement(WithStatement* stmt) { } -void JsonAstBuilder::VisitExitContextStatement(ExitContextStatement* stmt) { - TagScope tag(this, "ExitContextStatement"); -} - - void JsonAstBuilder::VisitSwitchStatement(SwitchStatement* stmt) { TagScope tag(this, "SwitchStatement"); } diff --git a/src/rewriter.cc b/src/rewriter.cc index ad6ce056b2c..3d4c2dcc126 100644 --- a/src/rewriter.cc +++ b/src/rewriter.cc @@ -208,7 +208,6 @@ void Processor::VisitWithStatement(WithStatement* node) { void Processor::VisitDeclaration(Declaration* node) {} void Processor::VisitEmptyStatement(EmptyStatement* node) {} void Processor::VisitReturnStatement(ReturnStatement* node) {} -void Processor::VisitExitContextStatement(ExitContextStatement* node) {} void Processor::VisitDebuggerStatement(DebuggerStatement* node) {} diff --git a/src/runtime.cc b/src/runtime.cc index 3e07b998230..95a24f0ef2e 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2507,7 +2507,7 @@ class ReplacementStringBuilder { class CompiledReplacement { public: CompiledReplacement() - : parts_(1), replacement_substrings_(0) {} + : parts_(1), replacement_substrings_(0), simple_hint_(false) {} void Compile(Handle replacement, int capture_count, @@ -2523,6 +2523,10 @@ class CompiledReplacement { return parts_.length(); } + bool simple_hint() { + return simple_hint_; + } + private: enum PartType { SUBJECT_PREFIX = 1, @@ -2581,7 +2585,7 @@ class CompiledReplacement { }; template - static void ParseReplacementPattern(ZoneList* parts, + static bool ParseReplacementPattern(ZoneList* parts, Vector characters, int capture_count, int subject_length) { @@ -2678,14 +2682,17 @@ class CompiledReplacement { if (length > last) { if (last == 0) { parts->Add(ReplacementPart::ReplacementString()); + return true; } else { parts->Add(ReplacementPart::ReplacementSubString(last, length)); } } + return false; } ZoneList parts_; ZoneList > replacement_substrings_; + bool simple_hint_; }; @@ -2697,16 +2704,16 @@ void CompiledReplacement::Compile(Handle replacement, String::FlatContent content = replacement->GetFlatContent(); ASSERT(content.IsFlat()); if (content.IsAscii()) { - ParseReplacementPattern(&parts_, - content.ToAsciiVector(), - capture_count, - subject_length); + simple_hint_ = ParseReplacementPattern(&parts_, + content.ToAsciiVector(), + capture_count, + subject_length); } else { ASSERT(content.IsTwoByte()); - ParseReplacementPattern(&parts_, - content.ToUC16Vector(), - capture_count, - subject_length); + simple_hint_ = ParseReplacementPattern(&parts_, + content.ToUC16Vector(), + capture_count, + subject_length); } } Isolate* isolate = replacement->GetIsolate(); @@ -2769,6 +2776,170 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder, } +void FindAsciiStringIndices(Vector subject, + char pattern, + ZoneList* indices, + unsigned int limit) { + ASSERT(limit > 0); + // Collect indices of pattern in subject using memchr. + // Stop after finding at most limit values. + const char* subject_start = reinterpret_cast(subject.start()); + const char* subject_end = subject_start + subject.length(); + const char* pos = subject_start; + while (limit > 0) { + pos = reinterpret_cast( + memchr(pos, pattern, subject_end - pos)); + if (pos == NULL) return; + indices->Add(static_cast(pos - subject_start)); + pos++; + limit--; + } +} + + +template +void FindStringIndices(Isolate* isolate, + Vector subject, + Vector pattern, + ZoneList* indices, + unsigned int limit) { + ASSERT(limit > 0); + // Collect indices of pattern in subject. + // Stop after finding at most limit values. + int pattern_length = pattern.length(); + int index = 0; + StringSearch search(isolate, pattern); + while (limit > 0) { + index = search.Search(subject, index); + if (index < 0) return; + indices->Add(index); + index += pattern_length; + limit--; + } +} + + +void FindStringIndicesDispatch(Isolate* isolate, + String* subject, + String* pattern, + ZoneList* indices, + unsigned int limit) { + { + AssertNoAllocation no_gc; + String::FlatContent subject_content = subject->GetFlatContent(); + String::FlatContent pattern_content = pattern->GetFlatContent(); + ASSERT(subject_content.IsFlat()); + ASSERT(pattern_content.IsFlat()); + if (subject_content.IsAscii()) { + Vector subject_vector = subject_content.ToAsciiVector(); + if (pattern_content.IsAscii()) { + Vector pattern_vector = pattern_content.ToAsciiVector(); + if (pattern_vector.length() == 1) { + FindAsciiStringIndices(subject_vector, + pattern_vector[0], + indices, + limit); + } else { + FindStringIndices(isolate, + subject_vector, + pattern_vector, + indices, + limit); + } + } else { + FindStringIndices(isolate, + subject_vector, + pattern_content.ToUC16Vector(), + indices, + limit); + } + } else { + Vector subject_vector = subject_content.ToUC16Vector(); + if (pattern->IsAsciiRepresentation()) { + FindStringIndices(isolate, + subject_vector, + pattern_content.ToAsciiVector(), + indices, + limit); + } else { + FindStringIndices(isolate, + subject_vector, + pattern_content.ToUC16Vector(), + indices, + limit); + } + } + } +} + + +template +MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString( + Isolate* isolate, + Handle subject, + Handle pattern_regexp, + Handle replacement) { + ASSERT(subject->IsFlat()); + ASSERT(replacement->IsFlat()); + + ZoneScope zone_space(isolate, DELETE_ON_EXIT); + ZoneList indices(8); + ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); + String* pattern = + String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); + int subject_len = subject->length(); + int pattern_len = pattern->length(); + int replacement_len = replacement->length(); + + FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff); + + int matches = indices.length(); + if (matches == 0) return *subject; + + int result_len = (replacement_len - pattern_len) * matches + subject_len; + int subject_pos = 0; + int result_pos = 0; + + Handle result; + if (ResultSeqString::kHasAsciiEncoding) { + result = Handle::cast( + isolate->factory()->NewRawAsciiString(result_len)); + } else { + result = Handle::cast( + isolate->factory()->NewRawTwoByteString(result_len)); + } + + for (int i = 0; i < matches; i++) { + // Copy non-matched subject content. + if (subject_pos < indices.at(i)) { + String::WriteToFlat(*subject, + result->GetChars() + result_pos, + subject_pos, + indices.at(i)); + result_pos += indices.at(i) - subject_pos; + } + + // Replace match. + if (replacement_len > 0) { + String::WriteToFlat(*replacement, + result->GetChars() + result_pos, + 0, + replacement_len); + result_pos += replacement_len; + } + + subject_pos = indices.at(i) + pattern_len; + } + // Add remaining subject content at the end. + if (subject_pos < subject_len) { + String::WriteToFlat(*subject, + result->GetChars() + result_pos, + subject_pos, + subject_len); + } + return *result; +} + MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( Isolate* isolate, @@ -2808,6 +2979,20 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( bool is_global = regexp_handle->GetFlags().is_global(); + // Shortcut for simple non-regexp global replacements + if (is_global && + regexp->TypeTag() == JSRegExp::ATOM && + compiled_replacement.simple_hint()) { + if (subject_handle->HasOnlyAsciiChars() && + replacement_handle->HasOnlyAsciiChars()) { + return StringReplaceStringWithString( + isolate, subject_handle, regexp_handle, replacement_handle); + } else { + return StringReplaceStringWithString( + isolate, subject_handle, regexp_handle, replacement_handle); + } + } + // Guessing the number of parts that the final result string is built // from. Global regexps can match any number of times, so we guess // conservatively. @@ -2893,6 +3078,20 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( Handle subject_handle(subject); Handle regexp_handle(regexp); + + // Shortcut for simple non-regexp global replacements + if (regexp_handle->GetFlags().is_global() && + regexp_handle->TypeTag() == JSRegExp::ATOM) { + Handle empty_string_handle(HEAP->empty_string()); + if (subject_handle->HasOnlyAsciiChars()) { + return StringReplaceStringWithString( + isolate, subject_handle, regexp_handle, empty_string_handle); + } else { + return StringReplaceStringWithString( + isolate, subject_handle, regexp_handle, empty_string_handle); + } + } + Handle last_match_info_handle(last_match_info); Handle match = RegExpImpl::Exec(regexp_handle, subject_handle, @@ -5930,49 +6129,6 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) { } -void FindAsciiStringIndices(Vector subject, - char pattern, - ZoneList* indices, - unsigned int limit) { - ASSERT(limit > 0); - // Collect indices of pattern in subject using memchr. - // Stop after finding at most limit values. - const char* subject_start = reinterpret_cast(subject.start()); - const char* subject_end = subject_start + subject.length(); - const char* pos = subject_start; - while (limit > 0) { - pos = reinterpret_cast( - memchr(pos, pattern, subject_end - pos)); - if (pos == NULL) return; - indices->Add(static_cast(pos - subject_start)); - pos++; - limit--; - } -} - - -template -void FindStringIndices(Isolate* isolate, - Vector subject, - Vector pattern, - ZoneList* indices, - unsigned int limit) { - ASSERT(limit > 0); - // Collect indices of pattern in subject. - // Stop after finding at most limit values. - int pattern_length = pattern.length(); - int index = 0; - StringSearch search(isolate, pattern); - while (limit > 0) { - index = search.Search(subject, index); - if (index < 0) return; - indices->Add(index); - index += pattern_length; - limit--; - } -} - - RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { ASSERT(args.length() == 3); HandleScope handle_scope(isolate); @@ -6012,53 +6168,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { ZoneList indices(initial_capacity); if (!pattern->IsFlat()) FlattenString(pattern); - // No allocation block. - { - AssertNoAllocation no_gc; - String::FlatContent subject_content = subject->GetFlatContent(); - String::FlatContent pattern_content = pattern->GetFlatContent(); - ASSERT(subject_content.IsFlat()); - ASSERT(pattern_content.IsFlat()); - if (subject_content.IsAscii()) { - Vector subject_vector = subject_content.ToAsciiVector(); - if (pattern_content.IsAscii()) { - Vector pattern_vector = pattern_content.ToAsciiVector(); - if (pattern_vector.length() == 1) { - FindAsciiStringIndices(subject_vector, - pattern_vector[0], - &indices, - limit); - } else { - FindStringIndices(isolate, - subject_vector, - pattern_vector, - &indices, - limit); - } - } else { - FindStringIndices(isolate, - subject_vector, - pattern_content.ToUC16Vector(), - &indices, - limit); - } - } else { - Vector subject_vector = subject_content.ToUC16Vector(); - if (pattern->IsAsciiRepresentation()) { - FindStringIndices(isolate, - subject_vector, - pattern_content.ToAsciiVector(), - &indices, - limit); - } else { - FindStringIndices(isolate, - subject_vector, - pattern_content.ToUC16Vector(), - &indices, - limit); - } - } - } + FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit); if (static_cast(indices.length()) < limit) { indices.Add(subject_length); @@ -6091,11 +6201,13 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { } if (limit == 0xffffffffu) { - StringSplitCache::Enter(isolate->heap(), - isolate->heap()->string_split_cache(), - *subject, - *pattern, - *elements); + if (result->HasFastElements()) { + StringSplitCache::Enter(isolate->heap(), + isolate->heap()->string_split_cache(), + *subject, + *pattern, + *elements); + } } return *result; diff --git a/src/scanner-base.cc b/src/scanner-base.cc deleted file mode 100644 index 62eee1a548c..00000000000 --- a/src/scanner-base.cc +++ /dev/null @@ -1,1090 +0,0 @@ -// Copyright 2011 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Features shared by parsing and pre-parsing scanners. - -#include "../include/v8stdint.h" -#include "scanner-base.h" -#include "char-predicates-inl.h" - -namespace v8 { -namespace internal { - -// ---------------------------------------------------------------------------- -// Scanner - -Scanner::Scanner(UnicodeCache* unicode_cache) - : unicode_cache_(unicode_cache) { } - - -uc32 Scanner::ScanHexNumber(int expected_length) { - ASSERT(expected_length <= 4); // prevent overflow - - uc32 digits[4] = { 0, 0, 0, 0 }; - uc32 x = 0; - for (int i = 0; i < expected_length; i++) { - digits[i] = c0_; - int d = HexValue(c0_); - if (d < 0) { - // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes - // should be illegal, but other JS VMs just return the - // non-escaped version of the original character. - - // Push back digits that we have advanced past. - for (int j = i-1; j >= 0; j--) { - PushBack(digits[j]); - } - return -1; - } - x = x * 16 + d; - Advance(); - } - - return x; -} - - - -// ---------------------------------------------------------------------------- -// JavaScriptScanner - -JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) - : Scanner(scanner_contants), - octal_pos_(Location::invalid()), - harmony_block_scoping_(false) { } - - -void JavaScriptScanner::Initialize(UC16CharacterStream* source) { - source_ = source; - // Need to capture identifiers in order to recognize "get" and "set" - // in object literals. - Init(); - // Skip initial whitespace allowing HTML comment ends just like - // after a newline and scan first token. - has_line_terminator_before_next_ = true; - SkipWhiteSpace(); - Scan(); -} - - -// Ensure that tokens can be stored in a byte. -STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); - -// Table of one-character tokens, by character (0x00..0x7f only). -static const byte one_char_tokens[] = { - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::LPAREN, // 0x28 - Token::RPAREN, // 0x29 - Token::ILLEGAL, - Token::ILLEGAL, - Token::COMMA, // 0x2c - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::COLON, // 0x3a - Token::SEMICOLON, // 0x3b - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::CONDITIONAL, // 0x3f - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::LBRACK, // 0x5b - Token::ILLEGAL, - Token::RBRACK, // 0x5d - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::ILLEGAL, - Token::LBRACE, // 0x7b - Token::ILLEGAL, - Token::RBRACE, // 0x7d - Token::BIT_NOT, // 0x7e - Token::ILLEGAL -}; - - -Token::Value JavaScriptScanner::Next() { - current_ = next_; - has_line_terminator_before_next_ = false; - has_multiline_comment_before_next_ = false; - if (static_cast(c0_) <= 0x7f) { - Token::Value token = static_cast(one_char_tokens[c0_]); - if (token != Token::ILLEGAL) { - int pos = source_pos(); - next_.token = token; - next_.location.beg_pos = pos; - next_.location.end_pos = pos + 1; - Advance(); - return current_.token; - } - } - Scan(); - return current_.token; -} - - -static inline bool IsByteOrderMark(uc32 c) { - // The Unicode value U+FFFE is guaranteed never to be assigned as a - // Unicode character; this implies that in a Unicode context the - // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF - // character expressed in little-endian byte order (since it could - // not be a U+FFFE character expressed in big-endian byte - // order). Nevertheless, we check for it to be compatible with - // Spidermonkey. - return c == 0xFEFF || c == 0xFFFE; -} - - -bool JavaScriptScanner::SkipWhiteSpace() { - int start_position = source_pos(); - - while (true) { - // We treat byte-order marks (BOMs) as whitespace for better - // compatibility with Spidermonkey and other JavaScript engines. - while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { - // IsWhiteSpace() includes line terminators! - if (unicode_cache_->IsLineTerminator(c0_)) { - // Ignore line terminators, but remember them. This is necessary - // for automatic semicolon insertion. - has_line_terminator_before_next_ = true; - } - Advance(); - } - - // If there is an HTML comment end '-->' at the beginning of a - // line (with only whitespace in front of it), we treat the rest - // of the line as a comment. This is in line with the way - // SpiderMonkey handles it. - if (c0_ == '-' && has_line_terminator_before_next_) { - Advance(); - if (c0_ == '-') { - Advance(); - if (c0_ == '>') { - // Treat the rest of the line as a comment. - SkipSingleLineComment(); - // Continue skipping white space after the comment. - continue; - } - PushBack('-'); // undo Advance() - } - PushBack('-'); // undo Advance() - } - // Return whether or not we skipped any characters. - return source_pos() != start_position; - } -} - - -Token::Value JavaScriptScanner::SkipSingleLineComment() { - Advance(); - - // The line terminator at the end of the line is not considered - // to be part of the single-line comment; it is recognized - // separately by the lexical grammar and becomes part of the - // stream of input elements for the syntactic grammar (see - // ECMA-262, section 7.4). - while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { - Advance(); - } - - return Token::WHITESPACE; -} - - -Token::Value JavaScriptScanner::SkipMultiLineComment() { - ASSERT(c0_ == '*'); - Advance(); - - while (c0_ >= 0) { - uc32 ch = c0_; - Advance(); - if (unicode_cache_->IsLineTerminator(ch)) { - // Following ECMA-262, section 7.4, a comment containing - // a newline will make the comment count as a line-terminator. - has_multiline_comment_before_next_ = true; - } - // If we have reached the end of the multi-line comment, we - // consume the '/' and insert a whitespace. This way all - // multi-line comments are treated as whitespace. - if (ch == '*' && c0_ == '/') { - c0_ = ' '; - return Token::WHITESPACE; - } - } - - // Unterminated multi-line comment. - return Token::ILLEGAL; -} - - -Token::Value JavaScriptScanner::ScanHtmlComment() { - // Check for -= - Advance(); - if (c0_ == '-') { - Advance(); - if (c0_ == '>' && has_line_terminator_before_next_) { - // For compatibility with SpiderMonkey, we skip lines that - // start with an HTML comment end '-->'. - token = SkipSingleLineComment(); - } else { - token = Token::DEC; - } - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_SUB); - } else { - token = Token::SUB; - } - break; - - case '*': - // * *= - token = Select('=', Token::ASSIGN_MUL, Token::MUL); - break; - - case '%': - // % %= - token = Select('=', Token::ASSIGN_MOD, Token::MOD); - break; - - case '/': - // / // /* /= - Advance(); - if (c0_ == '/') { - token = SkipSingleLineComment(); - } else if (c0_ == '*') { - token = SkipMultiLineComment(); - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_DIV); - } else { - token = Token::DIV; - } - break; - - case '&': - // & && &= - Advance(); - if (c0_ == '&') { - token = Select(Token::AND); - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_BIT_AND); - } else { - token = Token::BIT_AND; - } - break; - - case '|': - // | || |= - Advance(); - if (c0_ == '|') { - token = Select(Token::OR); - } else if (c0_ == '=') { - token = Select(Token::ASSIGN_BIT_OR); - } else { - token = Token::BIT_OR; - } - break; - - case '^': - // ^ ^= - token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); - break; - - case '.': - // . Number - Advance(); - if (IsDecimalDigit(c0_)) { - token = ScanNumber(true); - } else { - token = Token::PERIOD; - } - break; - - case ':': - token = Select(Token::COLON); - break; - - case ';': - token = Select(Token::SEMICOLON); - break; - - case ',': - token = Select(Token::COMMA); - break; - - case '(': - token = Select(Token::LPAREN); - break; - - case ')': - token = Select(Token::RPAREN); - break; - - case '[': - token = Select(Token::LBRACK); - break; - - case ']': - token = Select(Token::RBRACK); - break; - - case '{': - token = Select(Token::LBRACE); - break; - - case '}': - token = Select(Token::RBRACE); - break; - - case '?': - token = Select(Token::CONDITIONAL); - break; - - case '~': - token = Select(Token::BIT_NOT); - break; - - default: - if (unicode_cache_->IsIdentifierStart(c0_)) { - token = ScanIdentifierOrKeyword(); - } else if (IsDecimalDigit(c0_)) { - token = ScanNumber(false); - } else if (SkipWhiteSpace()) { - token = Token::WHITESPACE; - } else if (c0_ < 0) { - token = Token::EOS; - } else { - token = Select(Token::ILLEGAL); - } - break; - } - - // Continue scanning for tokens as long as we're just skipping - // whitespace. - } while (token == Token::WHITESPACE); - - next_.location.end_pos = source_pos(); - next_.token = token; -} - - -void JavaScriptScanner::SeekForward(int pos) { - // After this call, we will have the token at the given position as - // the "next" token. The "current" token will be invalid. - if (pos == next_.location.beg_pos) return; - int current_pos = source_pos(); - ASSERT_EQ(next_.location.end_pos, current_pos); - // Positions inside the lookahead token aren't supported. - ASSERT(pos >= current_pos); - if (pos != current_pos) { - source_->SeekForward(pos - source_->pos()); - Advance(); - // This function is only called to seek to the location - // of the end of a function (at the "}" token). It doesn't matter - // whether there was a line terminator in the part we skip. - has_line_terminator_before_next_ = false; - has_multiline_comment_before_next_ = false; - } - Scan(); -} - - -void JavaScriptScanner::ScanEscape() { - uc32 c = c0_; - Advance(); - - // Skip escaped newlines. - if (unicode_cache_->IsLineTerminator(c)) { - // Allow CR+LF newlines in multiline string literals. - if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); - // Allow LF+CR newlines in multiline string literals. - if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); - return; - } - - switch (c) { - case '\'': // fall through - case '"' : // fall through - case '\\': break; - case 'b' : c = '\b'; break; - case 'f' : c = '\f'; break; - case 'n' : c = '\n'; break; - case 'r' : c = '\r'; break; - case 't' : c = '\t'; break; - case 'u' : { - c = ScanHexNumber(4); - if (c < 0) c = 'u'; - break; - } - case 'v' : c = '\v'; break; - case 'x' : { - c = ScanHexNumber(2); - if (c < 0) c = 'x'; - break; - } - case '0' : // fall through - case '1' : // fall through - case '2' : // fall through - case '3' : // fall through - case '4' : // fall through - case '5' : // fall through - case '6' : // fall through - case '7' : c = ScanOctalEscape(c, 2); break; - } - - // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these - // should be illegal, but they are commonly handled - // as non-escaped characters by JS VMs. - AddLiteralChar(c); -} - - -// Octal escapes of the forms '\0xx' and '\xxx' are not a part of -// ECMA-262. Other JS VMs support them. -uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) { - uc32 x = c - '0'; - int i = 0; - for (; i < length; i++) { - int d = c0_ - '0'; - if (d < 0 || d > 7) break; - int nx = x * 8 + d; - if (nx >= 256) break; - x = nx; - Advance(); - } - // Anything except '\0' is an octal escape sequence, illegal in strict mode. - // Remember the position of octal escape sequences so that an error - // can be reported later (in strict mode). - // We don't report the error immediately, because the octal escape can - // occur before the "use strict" directive. - if (c != '0' || i > 0) { - octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); - } - return x; -} - - -Token::Value JavaScriptScanner::ScanString() { - uc32 quote = c0_; - Advance(); // consume quote - - LiteralScope literal(this); - while (c0_ != quote && c0_ >= 0 - && !unicode_cache_->IsLineTerminator(c0_)) { - uc32 c = c0_; - Advance(); - if (c == '\\') { - if (c0_ < 0) return Token::ILLEGAL; - ScanEscape(); - } else { - AddLiteralChar(c); - } - } - if (c0_ != quote) return Token::ILLEGAL; - literal.Complete(); - - Advance(); // consume quote - return Token::STRING; -} - - -void JavaScriptScanner::ScanDecimalDigits() { - while (IsDecimalDigit(c0_)) - AddLiteralCharAdvance(); -} - - -Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { - ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction - - enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; - - LiteralScope literal(this); - if (seen_period) { - // we have already seen a decimal point of the float - AddLiteralChar('.'); - ScanDecimalDigits(); // we know we have at least one digit - - } else { - // if the first character is '0' we must check for octals and hex - if (c0_ == '0') { - int start_pos = source_pos(); // For reporting octal positions. - AddLiteralCharAdvance(); - - // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number - if (c0_ == 'x' || c0_ == 'X') { - // hex number - kind = HEX; - AddLiteralCharAdvance(); - if (!IsHexDigit(c0_)) { - // we must have at least one hex digit after 'x'/'X' - return Token::ILLEGAL; - } - while (IsHexDigit(c0_)) { - AddLiteralCharAdvance(); - } - } else if ('0' <= c0_ && c0_ <= '7') { - // (possible) octal number - kind = OCTAL; - while (true) { - if (c0_ == '8' || c0_ == '9') { - kind = DECIMAL; - break; - } - if (c0_ < '0' || '7' < c0_) { - // Octal literal finished. - octal_pos_ = Location(start_pos, source_pos()); - break; - } - AddLiteralCharAdvance(); - } - } - } - - // Parse decimal digits and allow trailing fractional part. - if (kind == DECIMAL) { - ScanDecimalDigits(); // optional - if (c0_ == '.') { - AddLiteralCharAdvance(); - ScanDecimalDigits(); // optional - } - } - } - - // scan exponent, if any - if (c0_ == 'e' || c0_ == 'E') { - ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number - if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed - // scan exponent - AddLiteralCharAdvance(); - if (c0_ == '+' || c0_ == '-') - AddLiteralCharAdvance(); - if (!IsDecimalDigit(c0_)) { - // we must have at least one decimal digit after 'e'/'E' - return Token::ILLEGAL; - } - ScanDecimalDigits(); - } - - // The source character immediately following a numeric literal must - // not be an identifier start or a decimal digit; see ECMA-262 - // section 7.8.3, page 17 (note that we read only one decimal digit - // if the value is 0). - if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) - return Token::ILLEGAL; - - literal.Complete(); - - return Token::NUMBER; -} - - -uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { - Advance(); - if (c0_ != 'u') return -1; - Advance(); - uc32 result = ScanHexNumber(4); - if (result < 0) PushBack('u'); - return result; -} - - -// ---------------------------------------------------------------------------- -// Keyword Matcher - -#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ - KEYWORD_GROUP('b') \ - KEYWORD("break", Token::BREAK) \ - KEYWORD_GROUP('c') \ - KEYWORD("case", Token::CASE) \ - KEYWORD("catch", Token::CATCH) \ - KEYWORD("class", Token::FUTURE_RESERVED_WORD) \ - KEYWORD("const", Token::CONST) \ - KEYWORD("continue", Token::CONTINUE) \ - KEYWORD_GROUP('d') \ - KEYWORD("debugger", Token::DEBUGGER) \ - KEYWORD("default", Token::DEFAULT) \ - KEYWORD("delete", Token::DELETE) \ - KEYWORD("do", Token::DO) \ - KEYWORD_GROUP('e') \ - KEYWORD("else", Token::ELSE) \ - KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \ - KEYWORD("export", Token::FUTURE_RESERVED_WORD) \ - KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \ - KEYWORD_GROUP('f') \ - KEYWORD("false", Token::FALSE_LITERAL) \ - KEYWORD("finally", Token::FINALLY) \ - KEYWORD("for", Token::FOR) \ - KEYWORD("function", Token::FUNCTION) \ - KEYWORD_GROUP('i') \ - KEYWORD("if", Token::IF) \ - KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("import", Token::FUTURE_RESERVED_WORD) \ - KEYWORD("in", Token::IN) \ - KEYWORD("instanceof", Token::INSTANCEOF) \ - KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD_GROUP('l') \ - KEYWORD("let", harmony_block_scoping \ - ? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD_GROUP('n') \ - KEYWORD("new", Token::NEW) \ - KEYWORD("null", Token::NULL_LITERAL) \ - KEYWORD_GROUP('p') \ - KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD_GROUP('r') \ - KEYWORD("return", Token::RETURN) \ - KEYWORD_GROUP('s') \ - KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \ - KEYWORD("super", Token::FUTURE_RESERVED_WORD) \ - KEYWORD("switch", Token::SWITCH) \ - KEYWORD_GROUP('t') \ - KEYWORD("this", Token::THIS) \ - KEYWORD("throw", Token::THROW) \ - KEYWORD("true", Token::TRUE_LITERAL) \ - KEYWORD("try", Token::TRY) \ - KEYWORD("typeof", Token::TYPEOF) \ - KEYWORD_GROUP('v') \ - KEYWORD("var", Token::VAR) \ - KEYWORD("void", Token::VOID) \ - KEYWORD_GROUP('w') \ - KEYWORD("while", Token::WHILE) \ - KEYWORD("with", Token::WITH) \ - KEYWORD_GROUP('y') \ - KEYWORD("yield", Token::FUTURE_STRICT_RESERVED_WORD) - - -static Token::Value KeywordOrIdentifierToken(const char* input, - int input_length, - bool harmony_block_scoping) { - ASSERT(input_length >= 1); - const int kMinLength = 2; - const int kMaxLength = 10; - if (input_length < kMinLength || input_length > kMaxLength) { - return Token::IDENTIFIER; - } - switch (input[0]) { - default: -#define KEYWORD_GROUP_CASE(ch) \ - break; \ - case ch: -#define KEYWORD(keyword, token) \ - { \ - /* 'keyword' is a char array, so sizeof(keyword) is */ \ - /* strlen(keyword) plus 1 for the NUL char. */ \ - const int keyword_length = sizeof(keyword) - 1; \ - STATIC_ASSERT(keyword_length >= kMinLength); \ - STATIC_ASSERT(keyword_length <= kMaxLength); \ - if (input_length == keyword_length && \ - input[1] == keyword[1] && \ - (keyword_length <= 2 || input[2] == keyword[2]) && \ - (keyword_length <= 3 || input[3] == keyword[3]) && \ - (keyword_length <= 4 || input[4] == keyword[4]) && \ - (keyword_length <= 5 || input[5] == keyword[5]) && \ - (keyword_length <= 6 || input[6] == keyword[6]) && \ - (keyword_length <= 7 || input[7] == keyword[7]) && \ - (keyword_length <= 8 || input[8] == keyword[8]) && \ - (keyword_length <= 9 || input[9] == keyword[9])) { \ - return token; \ - } \ - } - KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) - } - return Token::IDENTIFIER; -} - - -Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { - ASSERT(unicode_cache_->IsIdentifierStart(c0_)); - LiteralScope literal(this); - // Scan identifier start character. - if (c0_ == '\\') { - uc32 c = ScanIdentifierUnicodeEscape(); - // Only allow legal identifier start characters. - if (c < 0 || - c == '\\' || // No recursive escapes. - !unicode_cache_->IsIdentifierStart(c)) { - return Token::ILLEGAL; - } - AddLiteralChar(c); - return ScanIdentifierSuffix(&literal); - } - - uc32 first_char = c0_; - Advance(); - AddLiteralChar(first_char); - - // Scan the rest of the identifier characters. - while (unicode_cache_->IsIdentifierPart(c0_)) { - if (c0_ != '\\') { - uc32 next_char = c0_; - Advance(); - AddLiteralChar(next_char); - continue; - } - // Fallthrough if no longer able to complete keyword. - return ScanIdentifierSuffix(&literal); - } - - literal.Complete(); - - if (next_.literal_chars->is_ascii()) { - Vector chars = next_.literal_chars->ascii_literal(); - return KeywordOrIdentifierToken(chars.start(), - chars.length(), - harmony_block_scoping_); - } - - return Token::IDENTIFIER; -} - - -Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { - // Scan the rest of the identifier characters. - while (unicode_cache_->IsIdentifierPart(c0_)) { - if (c0_ == '\\') { - uc32 c = ScanIdentifierUnicodeEscape(); - // Only allow legal identifier part characters. - if (c < 0 || - c == '\\' || - !unicode_cache_->IsIdentifierPart(c)) { - return Token::ILLEGAL; - } - AddLiteralChar(c); - } else { - AddLiteralChar(c0_); - Advance(); - } - } - literal->Complete(); - - return Token::IDENTIFIER; -} - - -bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { - // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags - bool in_character_class = false; - - // Previous token is either '/' or '/=', in the second case, the - // pattern starts at =. - next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); - next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); - - // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, - // the scanner should pass uninterpreted bodies to the RegExp - // constructor. - LiteralScope literal(this); - if (seen_equal) { - AddLiteralChar('='); - } - - while (c0_ != '/' || in_character_class) { - if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; - if (c0_ == '\\') { // Escape sequence. - AddLiteralCharAdvance(); - if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; - AddLiteralCharAdvance(); - // If the escape allows more characters, i.e., \x??, \u????, or \c?, - // only "safe" characters are allowed (letters, digits, underscore), - // otherwise the escape isn't valid and the invalid character has - // its normal meaning. I.e., we can just continue scanning without - // worrying whether the following characters are part of the escape - // or not, since any '/', '\\' or '[' is guaranteed to not be part - // of the escape sequence. - - // TODO(896): At some point, parse RegExps more throughly to capture - // octal esacpes in strict mode. - } else { // Unescaped character. - if (c0_ == '[') in_character_class = true; - if (c0_ == ']') in_character_class = false; - AddLiteralCharAdvance(); - } - } - Advance(); // consume '/' - - literal.Complete(); - - return true; -} - - -bool JavaScriptScanner::ScanLiteralUnicodeEscape() { - ASSERT(c0_ == '\\'); - uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; - Advance(); - int i = 1; - if (c0_ == 'u') { - i++; - while (i < 6) { - Advance(); - if (!IsHexDigit(c0_)) break; - chars_read[i] = c0_; - i++; - } - } - if (i < 6) { - // Incomplete escape. Undo all advances and return false. - while (i > 0) { - i--; - PushBack(chars_read[i]); - } - return false; - } - // Complete escape. Add all chars to current literal buffer. - for (int i = 0; i < 6; i++) { - AddLiteralChar(chars_read[i]); - } - return true; -} - - -bool JavaScriptScanner::ScanRegExpFlags() { - // Scan regular expression flags. - LiteralScope literal(this); - while (unicode_cache_->IsIdentifierPart(c0_)) { - if (c0_ != '\\') { - AddLiteralCharAdvance(); - } else { - if (!ScanLiteralUnicodeEscape()) { - break; - } - } - } - literal.Complete(); - - next_.location.end_pos = source_pos() - 1; - return true; -} - -} } // namespace v8::internal diff --git a/src/scanner-base.h b/src/scanner-base.h deleted file mode 100644 index d68d240e90e..00000000000 --- a/src/scanner-base.h +++ /dev/null @@ -1,562 +0,0 @@ -// Copyright 2011 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Features shared by parsing and pre-parsing scanners. - -#ifndef V8_SCANNER_BASE_H_ -#define V8_SCANNER_BASE_H_ - -#include "allocation.h" -#include "char-predicates.h" -#include "checks.h" -#include "globals.h" -#include "token.h" -#include "unicode-inl.h" -#include "utils.h" - -namespace v8 { -namespace internal { - -// Returns the value (0 .. 15) of a hexadecimal character c. -// If c is not a legal hexadecimal character, returns a value < 0. -inline int HexValue(uc32 c) { - c -= '0'; - if (static_cast(c) <= 9) return c; - c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. - if (static_cast(c) <= 5) return c + 10; - return -1; -} - - -// --------------------------------------------------------------------- -// Buffered stream of characters, using an internal UC16 buffer. - -class UC16CharacterStream { - public: - UC16CharacterStream() : pos_(0) { } - virtual ~UC16CharacterStream() { } - - // Returns and advances past the next UC16 character in the input - // stream. If there are no more characters, it returns a negative - // value. - inline uc32 Advance() { - if (buffer_cursor_ < buffer_end_ || ReadBlock()) { - pos_++; - return static_cast(*(buffer_cursor_++)); - } - // Note: currently the following increment is necessary to avoid a - // parser problem! The scanner treats the final kEndOfInput as - // a character with a position, and does math relative to that - // position. - pos_++; - - return kEndOfInput; - } - - // Return the current position in the character stream. - // Starts at zero. - inline unsigned pos() const { return pos_; } - - // Skips forward past the next character_count UC16 characters - // in the input, or until the end of input if that comes sooner. - // Returns the number of characters actually skipped. If less - // than character_count, - inline unsigned SeekForward(unsigned character_count) { - unsigned buffered_chars = - static_cast(buffer_end_ - buffer_cursor_); - if (character_count <= buffered_chars) { - buffer_cursor_ += character_count; - pos_ += character_count; - return character_count; - } - return SlowSeekForward(character_count); - } - - // Pushes back the most recently read UC16 character (or negative - // value if at end of input), i.e., the value returned by the most recent - // call to Advance. - // Must not be used right after calling SeekForward. - virtual void PushBack(int32_t character) = 0; - - protected: - static const uc32 kEndOfInput = -1; - - // Ensures that the buffer_cursor_ points to the character at - // position pos_ of the input, if possible. If the position - // is at or after the end of the input, return false. If there - // are more characters available, return true. - virtual bool ReadBlock() = 0; - virtual unsigned SlowSeekForward(unsigned character_count) = 0; - - const uc16* buffer_cursor_; - const uc16* buffer_end_; - unsigned pos_; -}; - - -class UnicodeCache { -// --------------------------------------------------------------------- -// Caching predicates used by scanners. - public: - UnicodeCache() {} - typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; - - StaticResource* utf8_decoder() { - return &utf8_decoder_; - } - - bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } - bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } - bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } - bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); } - - private: - - unibrow::Predicate kIsIdentifierStart; - unibrow::Predicate kIsIdentifierPart; - unibrow::Predicate kIsLineTerminator; - unibrow::Predicate kIsWhiteSpace; - StaticResource utf8_decoder_; - - DISALLOW_COPY_AND_ASSIGN(UnicodeCache); -}; - - -// ---------------------------------------------------------------------------- -// LiteralBuffer - Collector of chars of literals. - -class LiteralBuffer { - public: - LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } - - ~LiteralBuffer() { - if (backing_store_.length() > 0) { - backing_store_.Dispose(); - } - } - - inline void AddChar(uc16 character) { - if (position_ >= backing_store_.length()) ExpandBuffer(); - if (is_ascii_) { - if (character < kMaxAsciiCharCodeU) { - backing_store_[position_] = static_cast(character); - position_ += kASCIISize; - return; - } - ConvertToUC16(); - } - *reinterpret_cast(&backing_store_[position_]) = character; - position_ += kUC16Size; - } - - bool is_ascii() { return is_ascii_; } - - Vector uc16_literal() { - ASSERT(!is_ascii_); - ASSERT((position_ & 0x1) == 0); - return Vector( - reinterpret_cast(backing_store_.start()), - position_ >> 1); - } - - Vector ascii_literal() { - ASSERT(is_ascii_); - return Vector( - reinterpret_cast(backing_store_.start()), - position_); - } - - int length() { - return is_ascii_ ? position_ : (position_ >> 1); - } - - void Reset() { - position_ = 0; - is_ascii_ = true; - } - private: - static const int kInitialCapacity = 16; - static const int kGrowthFactory = 4; - static const int kMinConversionSlack = 256; - static const int kMaxGrowth = 1 * MB; - inline int NewCapacity(int min_capacity) { - int capacity = Max(min_capacity, backing_store_.length()); - int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); - return new_capacity; - } - - void ExpandBuffer() { - Vector new_store = Vector::New(NewCapacity(kInitialCapacity)); - memcpy(new_store.start(), backing_store_.start(), position_); - backing_store_.Dispose(); - backing_store_ = new_store; - } - - void ConvertToUC16() { - ASSERT(is_ascii_); - Vector new_store; - int new_content_size = position_ * kUC16Size; - if (new_content_size >= backing_store_.length()) { - // Ensure room for all currently read characters as UC16 as well - // as the character about to be stored. - new_store = Vector::New(NewCapacity(new_content_size)); - } else { - new_store = backing_store_; - } - char* src = reinterpret_cast(backing_store_.start()); - uc16* dst = reinterpret_cast(new_store.start()); - for (int i = position_ - 1; i >= 0; i--) { - dst[i] = src[i]; - } - if (new_store.start() != backing_store_.start()) { - backing_store_.Dispose(); - backing_store_ = new_store; - } - position_ = new_content_size; - is_ascii_ = false; - } - - bool is_ascii_; - int position_; - Vector backing_store_; - - DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); -}; - - -// ---------------------------------------------------------------------------- -// Scanner base-class. - -// Generic functionality used by both JSON and JavaScript scanners. -class Scanner { - public: - // -1 is outside of the range of any real source code. - static const int kNoOctalLocation = -1; - - typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; - - class LiteralScope { - public: - explicit LiteralScope(Scanner* self); - ~LiteralScope(); - void Complete(); - - private: - Scanner* scanner_; - bool complete_; - }; - - explicit Scanner(UnicodeCache* scanner_contants); - - // Returns the current token again. - Token::Value current_token() { return current_.token; } - - // One token look-ahead (past the token returned by Next()). - Token::Value peek() const { return next_.token; } - - struct Location { - Location(int b, int e) : beg_pos(b), end_pos(e) { } - Location() : beg_pos(0), end_pos(0) { } - - bool IsValid() const { - return beg_pos >= 0 && end_pos >= beg_pos; - } - - static Location invalid() { return Location(-1, -1); } - - int beg_pos; - int end_pos; - }; - - // Returns the location information for the current token - // (the token returned by Next()). - Location location() const { return current_.location; } - Location peek_location() const { return next_.location; } - - // Returns the literal string, if any, for the current token (the - // token returned by Next()). The string is 0-terminated and in - // UTF-8 format; they may contain 0-characters. Literal strings are - // collected for identifiers, strings, and numbers. - // These functions only give the correct result if the literal - // was scanned between calls to StartLiteral() and TerminateLiteral(). - bool is_literal_ascii() { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->is_ascii(); - } - Vector literal_ascii_string() { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->ascii_literal(); - } - Vector literal_uc16_string() { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->uc16_literal(); - } - int literal_length() const { - ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->length(); - } - - bool literal_contains_escapes() const { - Location location = current_.location; - int source_length = (location.end_pos - location.beg_pos); - if (current_.token == Token::STRING) { - // Subtract delimiters. - source_length -= 2; - } - return current_.literal_chars->length() != source_length; - } - - // Returns the literal string for the next token (the token that - // would be returned if Next() were called). - bool is_next_literal_ascii() { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->is_ascii(); - } - Vector next_literal_ascii_string() { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->ascii_literal(); - } - Vector next_literal_uc16_string() { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->uc16_literal(); - } - int next_literal_length() const { - ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->length(); - } - - static const int kCharacterLookaheadBufferSize = 1; - - protected: - // The current and look-ahead token. - struct TokenDesc { - Token::Value token; - Location location; - LiteralBuffer* literal_chars; - }; - - // Call this after setting source_ to the input. - void Init() { - // Set c0_ (one character ahead) - STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); - Advance(); - // Initialize current_ to not refer to a literal. - current_.literal_chars = NULL; - } - - // Literal buffer support - inline void StartLiteral() { - LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? - &literal_buffer2_ : &literal_buffer1_; - free_buffer->Reset(); - next_.literal_chars = free_buffer; - } - - inline void AddLiteralChar(uc32 c) { - ASSERT_NOT_NULL(next_.literal_chars); - next_.literal_chars->AddChar(c); - } - - // Complete scanning of a literal. - inline void TerminateLiteral() { - // Does nothing in the current implementation. - } - - // Stops scanning of a literal and drop the collected characters, - // e.g., due to an encountered error. - inline void DropLiteral() { - next_.literal_chars = NULL; - } - - inline void AddLiteralCharAdvance() { - AddLiteralChar(c0_); - Advance(); - } - - // Low-level scanning support. - void Advance() { c0_ = source_->Advance(); } - void PushBack(uc32 ch) { - source_->PushBack(c0_); - c0_ = ch; - } - - inline Token::Value Select(Token::Value tok) { - Advance(); - return tok; - } - - inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { - Advance(); - if (c0_ == next) { - Advance(); - return then; - } else { - return else_; - } - } - - uc32 ScanHexNumber(int expected_length); - - // Return the current source position. - int source_pos() { - return source_->pos() - kCharacterLookaheadBufferSize; - } - - UnicodeCache* unicode_cache_; - - // Buffers collecting literal strings, numbers, etc. - LiteralBuffer literal_buffer1_; - LiteralBuffer literal_buffer2_; - - TokenDesc current_; // desc for current token (as returned by Next()) - TokenDesc next_; // desc for next token (one token look-ahead) - - // Input stream. Must be initialized to an UC16CharacterStream. - UC16CharacterStream* source_; - - // One Unicode character look-ahead; c0_ < 0 at the end of the input. - uc32 c0_; -}; - -// ---------------------------------------------------------------------------- -// JavaScriptScanner - base logic for JavaScript scanning. - -class JavaScriptScanner : public Scanner { - public: - // A LiteralScope that disables recording of some types of JavaScript - // literals. If the scanner is configured to not record the specific - // type of literal, the scope will not call StartLiteral. - class LiteralScope { - public: - explicit LiteralScope(JavaScriptScanner* self) - : scanner_(self), complete_(false) { - scanner_->StartLiteral(); - } - ~LiteralScope() { - if (!complete_) scanner_->DropLiteral(); - } - void Complete() { - scanner_->TerminateLiteral(); - complete_ = true; - } - - private: - JavaScriptScanner* scanner_; - bool complete_; - }; - - explicit JavaScriptScanner(UnicodeCache* scanner_contants); - - void Initialize(UC16CharacterStream* source); - - // Returns the next token. - Token::Value Next(); - - // Returns true if there was a line terminator before the peek'ed token, - // possibly inside a multi-line comment. - bool HasAnyLineTerminatorBeforeNext() const { - return has_line_terminator_before_next_ || - has_multiline_comment_before_next_; - } - - // Scans the input as a regular expression pattern, previous - // character(s) must be /(=). Returns true if a pattern is scanned. - bool ScanRegExpPattern(bool seen_equal); - // Returns true if regexp flags are scanned (always since flags can - // be empty). - bool ScanRegExpFlags(); - - // Tells whether the buffer contains an identifier (no escapes). - // Used for checking if a property name is an identifier. - static bool IsIdentifier(unibrow::CharacterStream* buffer); - - // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. - uc32 ScanOctalEscape(uc32 c, int length); - - // Returns the location of the last seen octal literal - Location octal_position() const { return octal_pos_; } - void clear_octal_position() { octal_pos_ = Location::invalid(); } - - // Seek forward to the given position. This operation does not - // work in general, for instance when there are pushed back - // characters, but works for seeking forward until simple delimiter - // tokens, which is what it is used for. - void SeekForward(int pos); - - bool HarmonyBlockScoping() const { - return harmony_block_scoping_; - } - void SetHarmonyBlockScoping(bool block_scoping) { - harmony_block_scoping_ = block_scoping; - } - - - protected: - bool SkipWhiteSpace(); - Token::Value SkipSingleLineComment(); - Token::Value SkipMultiLineComment(); - - // Scans a single JavaScript token. - void Scan(); - - void ScanDecimalDigits(); - Token::Value ScanNumber(bool seen_period); - Token::Value ScanIdentifierOrKeyword(); - Token::Value ScanIdentifierSuffix(LiteralScope* literal); - - void ScanEscape(); - Token::Value ScanString(); - - // Scans a possible HTML comment -- begins with ' buffer_) { + // buffer_ is writable, buffer_cursor_ is const pointer. + buffer_[--buffer_cursor_ - buffer_] = static_cast(character); + pos_--; + return; + } + SlowPushBack(static_cast(character)); +} + + +void BufferedUC16CharacterStream::SlowPushBack(uc16 character) { + // In pushback mode, the end of the buffer contains pushback, + // and the start of the buffer (from buffer start to pushback_limit_) + // contains valid data that comes just after the pushback. + // We NULL the pushback_limit_ if pushing all the way back to the + // start of the buffer. + + if (pushback_limit_ == NULL) { + // Enter pushback mode. + pushback_limit_ = buffer_end_; + buffer_end_ = buffer_ + kBufferSize; + buffer_cursor_ = buffer_end_; + } + // Ensure that there is room for at least one pushback. + ASSERT(buffer_cursor_ > buffer_); + ASSERT(pos_ > 0); + buffer_[--buffer_cursor_ - buffer_] = character; + if (buffer_cursor_ == buffer_) { + pushback_limit_ = NULL; + } else if (buffer_cursor_ < pushback_limit_) { + pushback_limit_ = buffer_cursor_; + } + pos_--; +} + + +bool BufferedUC16CharacterStream::ReadBlock() { + buffer_cursor_ = buffer_; + if (pushback_limit_ != NULL) { + // Leave pushback mode. + buffer_end_ = pushback_limit_; + pushback_limit_ = NULL; + // If there were any valid characters left at the + // start of the buffer, use those. + if (buffer_cursor_ < buffer_end_) return true; + // Otherwise read a new block. + } + unsigned length = FillBuffer(pos_, kBufferSize); + buffer_end_ = buffer_ + length; + return length > 0; +} + + +unsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) { + // Leave pushback mode (i.e., ignore that there might be valid data + // in the buffer before the pushback_limit_ point). + pushback_limit_ = NULL; + return BufferSeekForward(delta); +} + +// ---------------------------------------------------------------------------- +// GenericStringUC16CharacterStream + + +GenericStringUC16CharacterStream::GenericStringUC16CharacterStream( + Handle data, + unsigned start_position, + unsigned end_position) + : string_(data), + length_(end_position) { + ASSERT(end_position >= start_position); + buffer_cursor_ = buffer_; + buffer_end_ = buffer_; + pos_ = start_position; +} + + +GenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { } + + +unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) { + unsigned old_pos = pos_; + pos_ = Min(pos_ + delta, length_); + ReadBlock(); + return pos_ - old_pos; +} + + +unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos, + unsigned length) { + if (from_pos >= length_) return 0; + if (from_pos + length > length_) { + length = length_ - from_pos; + } + String::WriteToFlat(*string_, buffer_, from_pos, from_pos + length); + return length; +} + + +// ---------------------------------------------------------------------------- +// Utf8ToUC16CharacterStream +Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data, + unsigned length) + : BufferedUC16CharacterStream(), + raw_data_(data), + raw_data_length_(length), + raw_data_pos_(0), + raw_character_position_(0) { + ReadBlock(); +} + + +Utf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { } + + +unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) { + unsigned old_pos = pos_; + unsigned target_pos = pos_ + delta; + SetRawPosition(target_pos); + pos_ = raw_character_position_; + ReadBlock(); + return pos_ - old_pos; +} + + +unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position, + unsigned length) { + static const unibrow::uchar kMaxUC16Character = 0xffff; + SetRawPosition(char_position); + if (raw_character_position_ != char_position) { + // char_position was not a valid position in the stream (hit the end + // while spooling to it). + return 0u; + } + unsigned i = 0; + while (i < length) { + if (raw_data_pos_ == raw_data_length_) break; + unibrow::uchar c = raw_data_[raw_data_pos_]; + if (c <= unibrow::Utf8::kMaxOneByteChar) { + raw_data_pos_++; + } else { + c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_, + raw_data_length_ - raw_data_pos_, + &raw_data_pos_); + // Don't allow characters outside of the BMP. + if (c > kMaxUC16Character) { + c = unibrow::Utf8::kBadChar; + } + } + buffer_[i++] = static_cast(c); + } + raw_character_position_ = char_position + i; + return i; +} + + +static const byte kUtf8MultiByteMask = 0xC0; +static const byte kUtf8MultiByteCharStart = 0xC0; +static const byte kUtf8MultiByteCharFollower = 0x80; + + +#ifdef DEBUG +static bool IsUtf8MultiCharacterStart(byte first_byte) { + return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart; +} +#endif + + +static bool IsUtf8MultiCharacterFollower(byte later_byte) { + return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; +} + + +// Move the cursor back to point at the preceding UTF-8 character start +// in the buffer. +static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { + byte character = buffer[--*cursor]; + if (character > unibrow::Utf8::kMaxOneByteChar) { + ASSERT(IsUtf8MultiCharacterFollower(character)); + // Last byte of a multi-byte character encoding. Step backwards until + // pointing to the first byte of the encoding, recognized by having the + // top two bits set. + while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } + ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor])); + } +} + + +// Move the cursor forward to point at the next following UTF-8 character start +// in the buffer. +static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { + byte character = buffer[(*cursor)++]; + if (character > unibrow::Utf8::kMaxOneByteChar) { + // First character of a multi-byte character encoding. + // The number of most-significant one-bits determines the length of the + // encoding: + // 110..... - (0xCx, 0xDx) one additional byte (minimum). + // 1110.... - (0xEx) two additional bytes. + // 11110... - (0xFx) three additional bytes (maximum). + ASSERT(IsUtf8MultiCharacterStart(character)); + // Additional bytes is: + // 1 if value in range 0xC0 .. 0xDF. + // 2 if value in range 0xE0 .. 0xEF. + // 3 if value in range 0xF0 .. 0xF7. + // Encode that in a single value. + unsigned additional_bytes = + ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; + *cursor += additional_bytes; + ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); + } +} + + +void Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) { + if (raw_character_position_ > target_position) { + // Spool backwards in utf8 buffer. + do { + Utf8CharacterBack(raw_data_, &raw_data_pos_); + raw_character_position_--; + } while (raw_character_position_ > target_position); + return; + } + // Spool forwards in the utf8 buffer. + while (raw_character_position_ < target_position) { + if (raw_data_pos_ == raw_data_length_) return; + Utf8CharacterForward(raw_data_, &raw_data_pos_); + raw_character_position_++; + } +} + + +// ---------------------------------------------------------------------------- +// ExternalTwoByteStringUC16CharacterStream + +ExternalTwoByteStringUC16CharacterStream:: + ~ExternalTwoByteStringUC16CharacterStream() { } + + +ExternalTwoByteStringUC16CharacterStream + ::ExternalTwoByteStringUC16CharacterStream( + Handle data, + int start_position, + int end_position) + : UC16CharacterStream(), + source_(data), + raw_data_(data->GetTwoByteData(start_position)) { + buffer_cursor_ = raw_data_, + buffer_end_ = raw_data_ + (end_position - start_position); + pos_ = start_position; +} + + +// ---------------------------------------------------------------------------- +// Scanner::LiteralScope + +Scanner::LiteralScope::LiteralScope(Scanner* self) + : scanner_(self), complete_(false) { + self->StartLiteral(); +} + + +Scanner::LiteralScope::~LiteralScope() { + if (!complete_) scanner_->DropLiteral(); +} + + +void Scanner::LiteralScope::Complete() { + scanner_->TerminateLiteral(); + complete_ = true; +} + +} } // namespace v8::internal diff --git a/src/scanner-character-streams.h b/src/scanner-character-streams.h new file mode 100644 index 00000000000..5c4ea2ca36d --- /dev/null +++ b/src/scanner-character-streams.h @@ -0,0 +1,129 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef V8_SCANNER_CHARACTER_STREAMS_H_ +#define V8_SCANNER_CHARACTER_STREAMS_H_ + +#include "scanner.h" + +namespace v8 { +namespace internal { + +// A buffered character stream based on a random access character +// source (ReadBlock can be called with pos_ pointing to any position, +// even positions before the current). +class BufferedUC16CharacterStream: public UC16CharacterStream { + public: + BufferedUC16CharacterStream(); + virtual ~BufferedUC16CharacterStream(); + + virtual void PushBack(uc32 character); + + protected: + static const unsigned kBufferSize = 512; + static const unsigned kPushBackStepSize = 16; + + virtual unsigned SlowSeekForward(unsigned delta); + virtual bool ReadBlock(); + virtual void SlowPushBack(uc16 character); + + virtual unsigned BufferSeekForward(unsigned delta) = 0; + virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; + + const uc16* pushback_limit_; + uc16 buffer_[kBufferSize]; +}; + + +// Generic string stream. +class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { + public: + GenericStringUC16CharacterStream(Handle data, + unsigned start_position, + unsigned end_position); + virtual ~GenericStringUC16CharacterStream(); + + protected: + virtual unsigned BufferSeekForward(unsigned delta); + virtual unsigned FillBuffer(unsigned position, unsigned length); + + Handle string_; + unsigned start_position_; + unsigned length_; +}; + + +// UC16 stream based on a literal UTF-8 string. +class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { + public: + Utf8ToUC16CharacterStream(const byte* data, unsigned length); + virtual ~Utf8ToUC16CharacterStream(); + + protected: + virtual unsigned BufferSeekForward(unsigned delta); + virtual unsigned FillBuffer(unsigned char_position, unsigned length); + void SetRawPosition(unsigned char_position); + + const byte* raw_data_; + unsigned raw_data_length_; // Measured in bytes, not characters. + unsigned raw_data_pos_; + // The character position of the character at raw_data[raw_data_pos_]. + // Not necessarily the same as pos_. + unsigned raw_character_position_; +}; + + +// UTF16 buffer to read characters from an external string. +class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { + public: + ExternalTwoByteStringUC16CharacterStream(Handle data, + int start_position, + int end_position); + virtual ~ExternalTwoByteStringUC16CharacterStream(); + + virtual void PushBack(uc32 character) { + ASSERT(buffer_cursor_ > raw_data_); + buffer_cursor_--; + pos_--; + } + + protected: + virtual unsigned SlowSeekForward(unsigned delta) { + // Fast case always handles seeking. + return 0; + } + virtual bool ReadBlock() { + // Entire string is read at start. + return false; + } + Handle source_; + const uc16* raw_data_; // Pointer to the actual array of characters. +}; + +} } // namespace v8::internal + +#endif // V8_SCANNER_CHARACTER_STREAMS_H_ diff --git a/src/scanner.cc b/src/scanner.cc index 5919073cde8..3425f4159e5 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -25,303 +25,1067 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#include "v8.h" +// Features shared by parsing and pre-parsing scanners. -#include "ast.h" -#include "handles.h" #include "scanner.h" -#include "unicode-inl.h" + +#include "../include/v8stdint.h" +#include "char-predicates-inl.h" namespace v8 { namespace internal { // ---------------------------------------------------------------------------- -// BufferedUC16CharacterStreams - -BufferedUC16CharacterStream::BufferedUC16CharacterStream() - : UC16CharacterStream(), - pushback_limit_(NULL) { - // Initialize buffer as being empty. First read will fill the buffer. - buffer_cursor_ = buffer_; - buffer_end_ = buffer_; +// Scanner + +Scanner::Scanner(UnicodeCache* unicode_cache) + : unicode_cache_(unicode_cache) { } + + +uc32 Scanner::ScanHexNumber(int expected_length) { + ASSERT(expected_length <= 4); // prevent overflow + + uc32 digits[4] = { 0, 0, 0, 0 }; + uc32 x = 0; + for (int i = 0; i < expected_length; i++) { + digits[i] = c0_; + int d = HexValue(c0_); + if (d < 0) { + // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes + // should be illegal, but other JS VMs just return the + // non-escaped version of the original character. + + // Push back digits that we have advanced past. + for (int j = i-1; j >= 0; j--) { + PushBack(digits[j]); + } + return -1; + } + x = x * 16 + d; + Advance(); + } + + return x; } -BufferedUC16CharacterStream::~BufferedUC16CharacterStream() { } -void BufferedUC16CharacterStream::PushBack(uc32 character) { - if (character == kEndOfInput) { - pos_--; - return; + +// ---------------------------------------------------------------------------- +// JavaScriptScanner + +JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) + : Scanner(scanner_contants), + octal_pos_(Location::invalid()), + harmony_block_scoping_(false) { } + + +void JavaScriptScanner::Initialize(UC16CharacterStream* source) { + source_ = source; + // Need to capture identifiers in order to recognize "get" and "set" + // in object literals. + Init(); + // Skip initial whitespace allowing HTML comment ends just like + // after a newline and scan first token. + has_line_terminator_before_next_ = true; + SkipWhiteSpace(); + Scan(); +} + + +// Ensure that tokens can be stored in a byte. +STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); + +// Table of one-character tokens, by character (0x00..0x7f only). +static const byte one_char_tokens[] = { + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::LPAREN, // 0x28 + Token::RPAREN, // 0x29 + Token::ILLEGAL, + Token::ILLEGAL, + Token::COMMA, // 0x2c + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::COLON, // 0x3a + Token::SEMICOLON, // 0x3b + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::CONDITIONAL, // 0x3f + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::LBRACK, // 0x5b + Token::ILLEGAL, + Token::RBRACK, // 0x5d + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::LBRACE, // 0x7b + Token::ILLEGAL, + Token::RBRACE, // 0x7d + Token::BIT_NOT, // 0x7e + Token::ILLEGAL +}; + + +Token::Value JavaScriptScanner::Next() { + current_ = next_; + has_line_terminator_before_next_ = false; + has_multiline_comment_before_next_ = false; + if (static_cast(c0_) <= 0x7f) { + Token::Value token = static_cast(one_char_tokens[c0_]); + if (token != Token::ILLEGAL) { + int pos = source_pos(); + next_.token = token; + next_.location.beg_pos = pos; + next_.location.end_pos = pos + 1; + Advance(); + return current_.token; + } } - if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) { - // buffer_ is writable, buffer_cursor_ is const pointer. - buffer_[--buffer_cursor_ - buffer_] = static_cast(character); - pos_--; - return; + Scan(); + return current_.token; +} + + +static inline bool IsByteOrderMark(uc32 c) { + // The Unicode value U+FFFE is guaranteed never to be assigned as a + // Unicode character; this implies that in a Unicode context the + // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF + // character expressed in little-endian byte order (since it could + // not be a U+FFFE character expressed in big-endian byte + // order). Nevertheless, we check for it to be compatible with + // Spidermonkey. + return c == 0xFEFF || c == 0xFFFE; +} + + +bool JavaScriptScanner::SkipWhiteSpace() { + int start_position = source_pos(); + + while (true) { + // We treat byte-order marks (BOMs) as whitespace for better + // compatibility with Spidermonkey and other JavaScript engines. + while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { + // IsWhiteSpace() includes line terminators! + if (unicode_cache_->IsLineTerminator(c0_)) { + // Ignore line terminators, but remember them. This is necessary + // for automatic semicolon insertion. + has_line_terminator_before_next_ = true; + } + Advance(); + } + + // If there is an HTML comment end '-->' at the beginning of a + // line (with only whitespace in front of it), we treat the rest + // of the line as a comment. This is in line with the way + // SpiderMonkey handles it. + if (c0_ == '-' && has_line_terminator_before_next_) { + Advance(); + if (c0_ == '-') { + Advance(); + if (c0_ == '>') { + // Treat the rest of the line as a comment. + SkipSingleLineComment(); + // Continue skipping white space after the comment. + continue; + } + PushBack('-'); // undo Advance() + } + PushBack('-'); // undo Advance() + } + // Return whether or not we skipped any characters. + return source_pos() != start_position; } - SlowPushBack(static_cast(character)); } -void BufferedUC16CharacterStream::SlowPushBack(uc16 character) { - // In pushback mode, the end of the buffer contains pushback, - // and the start of the buffer (from buffer start to pushback_limit_) - // contains valid data that comes just after the pushback. - // We NULL the pushback_limit_ if pushing all the way back to the - // start of the buffer. +Token::Value JavaScriptScanner::SkipSingleLineComment() { + Advance(); - if (pushback_limit_ == NULL) { - // Enter pushback mode. - pushback_limit_ = buffer_end_; - buffer_end_ = buffer_ + kBufferSize; - buffer_cursor_ = buffer_end_; + // The line terminator at the end of the line is not considered + // to be part of the single-line comment; it is recognized + // separately by the lexical grammar and becomes part of the + // stream of input elements for the syntactic grammar (see + // ECMA-262, section 7.4). + while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { + Advance(); } - // Ensure that there is room for at least one pushback. - ASSERT(buffer_cursor_ > buffer_); - ASSERT(pos_ > 0); - buffer_[--buffer_cursor_ - buffer_] = character; - if (buffer_cursor_ == buffer_) { - pushback_limit_ = NULL; - } else if (buffer_cursor_ < pushback_limit_) { - pushback_limit_ = buffer_cursor_; + + return Token::WHITESPACE; +} + + +Token::Value JavaScriptScanner::SkipMultiLineComment() { + ASSERT(c0_ == '*'); + Advance(); + + while (c0_ >= 0) { + uc32 ch = c0_; + Advance(); + if (unicode_cache_->IsLineTerminator(ch)) { + // Following ECMA-262, section 7.4, a comment containing + // a newline will make the comment count as a line-terminator. + has_multiline_comment_before_next_ = true; + } + // If we have reached the end of the multi-line comment, we + // consume the '/' and insert a whitespace. This way all + // multi-line comments are treated as whitespace. + if (ch == '*' && c0_ == '/') { + c0_ = ' '; + return Token::WHITESPACE; + } } - pos_--; + + // Unterminated multi-line comment. + return Token::ILLEGAL; } -bool BufferedUC16CharacterStream::ReadBlock() { - buffer_cursor_ = buffer_; - if (pushback_limit_ != NULL) { - // Leave pushback mode. - buffer_end_ = pushback_limit_; - pushback_limit_ = NULL; - // If there were any valid characters left at the - // start of the buffer, use those. - if (buffer_cursor_ < buffer_end_) return true; - // Otherwise read a new block. +Token::Value JavaScriptScanner::ScanHtmlComment() { + // Check for -= + Advance(); + if (c0_ == '-') { + Advance(); + if (c0_ == '>' && has_line_terminator_before_next_) { + // For compatibility with SpiderMonkey, we skip lines that + // start with an HTML comment end '-->'. + token = SkipSingleLineComment(); + } else { + token = Token::DEC; + } + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_SUB); + } else { + token = Token::SUB; + } + break; + + case '*': + // * *= + token = Select('=', Token::ASSIGN_MUL, Token::MUL); + break; + + case '%': + // % %= + token = Select('=', Token::ASSIGN_MOD, Token::MOD); + break; + + case '/': + // / // /* /= + Advance(); + if (c0_ == '/') { + token = SkipSingleLineComment(); + } else if (c0_ == '*') { + token = SkipMultiLineComment(); + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_DIV); + } else { + token = Token::DIV; + } + break; + + case '&': + // & && &= + Advance(); + if (c0_ == '&') { + token = Select(Token::AND); + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_BIT_AND); + } else { + token = Token::BIT_AND; + } + break; + + case '|': + // | || |= + Advance(); + if (c0_ == '|') { + token = Select(Token::OR); + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_BIT_OR); + } else { + token = Token::BIT_OR; + } + break; + + case '^': + // ^ ^= + token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); + break; + + case '.': + // . Number + Advance(); + if (IsDecimalDigit(c0_)) { + token = ScanNumber(true); + } else { + token = Token::PERIOD; + } + break; + + case ':': + token = Select(Token::COLON); + break; + + case ';': + token = Select(Token::SEMICOLON); + break; + + case ',': + token = Select(Token::COMMA); + break; + + case '(': + token = Select(Token::LPAREN); + break; + + case ')': + token = Select(Token::RPAREN); + break; + + case '[': + token = Select(Token::LBRACK); + break; + + case ']': + token = Select(Token::RBRACK); + break; + + case '{': + token = Select(Token::LBRACE); + break; + + case '}': + token = Select(Token::RBRACE); + break; + + case '?': + token = Select(Token::CONDITIONAL); + break; + + case '~': + token = Select(Token::BIT_NOT); + break; + + default: + if (unicode_cache_->IsIdentifierStart(c0_)) { + token = ScanIdentifierOrKeyword(); + } else if (IsDecimalDigit(c0_)) { + token = ScanNumber(false); + } else if (SkipWhiteSpace()) { + token = Token::WHITESPACE; + } else if (c0_ < 0) { + token = Token::EOS; + } else { + token = Select(Token::ILLEGAL); + } + break; + } + + // Continue scanning for tokens as long as we're just skipping + // whitespace. + } while (token == Token::WHITESPACE); + + next_.location.end_pos = source_pos(); + next_.token = token; } -// ---------------------------------------------------------------------------- -// GenericStringUC16CharacterStream - - -GenericStringUC16CharacterStream::GenericStringUC16CharacterStream( - Handle data, - unsigned start_position, - unsigned end_position) - : string_(data), - length_(end_position) { - ASSERT(end_position >= start_position); - buffer_cursor_ = buffer_; - buffer_end_ = buffer_; - pos_ = start_position; + +void JavaScriptScanner::SeekForward(int pos) { + // After this call, we will have the token at the given position as + // the "next" token. The "current" token will be invalid. + if (pos == next_.location.beg_pos) return; + int current_pos = source_pos(); + ASSERT_EQ(next_.location.end_pos, current_pos); + // Positions inside the lookahead token aren't supported. + ASSERT(pos >= current_pos); + if (pos != current_pos) { + source_->SeekForward(pos - source_->pos()); + Advance(); + // This function is only called to seek to the location + // of the end of a function (at the "}" token). It doesn't matter + // whether there was a line terminator in the part we skip. + has_line_terminator_before_next_ = false; + has_multiline_comment_before_next_ = false; + } + Scan(); } -GenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { } +void JavaScriptScanner::ScanEscape() { + uc32 c = c0_; + Advance(); + + // Skip escaped newlines. + if (unicode_cache_->IsLineTerminator(c)) { + // Allow CR+LF newlines in multiline string literals. + if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); + // Allow LF+CR newlines in multiline string literals. + if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); + return; + } + switch (c) { + case '\'': // fall through + case '"' : // fall through + case '\\': break; + case 'b' : c = '\b'; break; + case 'f' : c = '\f'; break; + case 'n' : c = '\n'; break; + case 'r' : c = '\r'; break; + case 't' : c = '\t'; break; + case 'u' : { + c = ScanHexNumber(4); + if (c < 0) c = 'u'; + break; + } + case 'v' : c = '\v'; break; + case 'x' : { + c = ScanHexNumber(2); + if (c < 0) c = 'x'; + break; + } + case '0' : // fall through + case '1' : // fall through + case '2' : // fall through + case '3' : // fall through + case '4' : // fall through + case '5' : // fall through + case '6' : // fall through + case '7' : c = ScanOctalEscape(c, 2); break; + } -unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) { - unsigned old_pos = pos_; - pos_ = Min(pos_ + delta, length_); - ReadBlock(); - return pos_ - old_pos; + // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these + // should be illegal, but they are commonly handled + // as non-escaped characters by JS VMs. + AddLiteralChar(c); } -unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos, - unsigned length) { - if (from_pos >= length_) return 0; - if (from_pos + length > length_) { - length = length_ - from_pos; +// Octal escapes of the forms '\0xx' and '\xxx' are not a part of +// ECMA-262. Other JS VMs support them. +uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) { + uc32 x = c - '0'; + int i = 0; + for (; i < length; i++) { + int d = c0_ - '0'; + if (d < 0 || d > 7) break; + int nx = x * 8 + d; + if (nx >= 256) break; + x = nx; + Advance(); + } + // Anything except '\0' is an octal escape sequence, illegal in strict mode. + // Remember the position of octal escape sequences so that an error + // can be reported later (in strict mode). + // We don't report the error immediately, because the octal escape can + // occur before the "use strict" directive. + if (c != '0' || i > 0) { + octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); } - String::WriteToFlat(*string_, buffer_, from_pos, from_pos + length); - return length; + return x; } -// ---------------------------------------------------------------------------- -// Utf8ToUC16CharacterStream -Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data, - unsigned length) - : BufferedUC16CharacterStream(), - raw_data_(data), - raw_data_length_(length), - raw_data_pos_(0), - raw_character_position_(0) { - ReadBlock(); -} +Token::Value JavaScriptScanner::ScanString() { + uc32 quote = c0_; + Advance(); // consume quote + LiteralScope literal(this); + while (c0_ != quote && c0_ >= 0 + && !unicode_cache_->IsLineTerminator(c0_)) { + uc32 c = c0_; + Advance(); + if (c == '\\') { + if (c0_ < 0) return Token::ILLEGAL; + ScanEscape(); + } else { + AddLiteralChar(c); + } + } + if (c0_ != quote) return Token::ILLEGAL; + literal.Complete(); -Utf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { } + Advance(); // consume quote + return Token::STRING; +} -unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) { - unsigned old_pos = pos_; - unsigned target_pos = pos_ + delta; - SetRawPosition(target_pos); - pos_ = raw_character_position_; - ReadBlock(); - return pos_ - old_pos; +void JavaScriptScanner::ScanDecimalDigits() { + while (IsDecimalDigit(c0_)) + AddLiteralCharAdvance(); } -unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position, - unsigned length) { - static const unibrow::uchar kMaxUC16Character = 0xffff; - SetRawPosition(char_position); - if (raw_character_position_ != char_position) { - // char_position was not a valid position in the stream (hit the end - // while spooling to it). - return 0u; - } - unsigned i = 0; - while (i < length) { - if (raw_data_pos_ == raw_data_length_) break; - unibrow::uchar c = raw_data_[raw_data_pos_]; - if (c <= unibrow::Utf8::kMaxOneByteChar) { - raw_data_pos_++; - } else { - c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_, - raw_data_length_ - raw_data_pos_, - &raw_data_pos_); - // Don't allow characters outside of the BMP. - if (c > kMaxUC16Character) { - c = unibrow::Utf8::kBadChar; +Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { + ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction + + enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; + + LiteralScope literal(this); + if (seen_period) { + // we have already seen a decimal point of the float + AddLiteralChar('.'); + ScanDecimalDigits(); // we know we have at least one digit + + } else { + // if the first character is '0' we must check for octals and hex + if (c0_ == '0') { + int start_pos = source_pos(); // For reporting octal positions. + AddLiteralCharAdvance(); + + // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number + if (c0_ == 'x' || c0_ == 'X') { + // hex number + kind = HEX; + AddLiteralCharAdvance(); + if (!IsHexDigit(c0_)) { + // we must have at least one hex digit after 'x'/'X' + return Token::ILLEGAL; + } + while (IsHexDigit(c0_)) { + AddLiteralCharAdvance(); + } + } else if ('0' <= c0_ && c0_ <= '7') { + // (possible) octal number + kind = OCTAL; + while (true) { + if (c0_ == '8' || c0_ == '9') { + kind = DECIMAL; + break; + } + if (c0_ < '0' || '7' < c0_) { + // Octal literal finished. + octal_pos_ = Location(start_pos, source_pos()); + break; + } + AddLiteralCharAdvance(); + } + } + } + + // Parse decimal digits and allow trailing fractional part. + if (kind == DECIMAL) { + ScanDecimalDigits(); // optional + if (c0_ == '.') { + AddLiteralCharAdvance(); + ScanDecimalDigits(); // optional } } - buffer_[i++] = static_cast(c); } - raw_character_position_ = char_position + i; - return i; -} + // scan exponent, if any + if (c0_ == 'e' || c0_ == 'E') { + ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number + if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed + // scan exponent + AddLiteralCharAdvance(); + if (c0_ == '+' || c0_ == '-') + AddLiteralCharAdvance(); + if (!IsDecimalDigit(c0_)) { + // we must have at least one decimal digit after 'e'/'E' + return Token::ILLEGAL; + } + ScanDecimalDigits(); + } -static const byte kUtf8MultiByteMask = 0xC0; -static const byte kUtf8MultiByteCharStart = 0xC0; -static const byte kUtf8MultiByteCharFollower = 0x80; + // The source character immediately following a numeric literal must + // not be an identifier start or a decimal digit; see ECMA-262 + // section 7.8.3, page 17 (note that we read only one decimal digit + // if the value is 0). + if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) + return Token::ILLEGAL; + literal.Complete(); -#ifdef DEBUG -static bool IsUtf8MultiCharacterStart(byte first_byte) { - return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart; + return Token::NUMBER; } -#endif -static bool IsUtf8MultiCharacterFollower(byte later_byte) { - return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower; +uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { + Advance(); + if (c0_ != 'u') return -1; + Advance(); + uc32 result = ScanHexNumber(4); + if (result < 0) PushBack('u'); + return result; } -// Move the cursor back to point at the preceding UTF-8 character start -// in the buffer. -static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) { - byte character = buffer[--*cursor]; - if (character > unibrow::Utf8::kMaxOneByteChar) { - ASSERT(IsUtf8MultiCharacterFollower(character)); - // Last byte of a multi-byte character encoding. Step backwards until - // pointing to the first byte of the encoding, recognized by having the - // top two bits set. - while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { } - ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor])); +// ---------------------------------------------------------------------------- +// Keyword Matcher + +#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ + KEYWORD_GROUP('b') \ + KEYWORD("break", Token::BREAK) \ + KEYWORD_GROUP('c') \ + KEYWORD("case", Token::CASE) \ + KEYWORD("catch", Token::CATCH) \ + KEYWORD("class", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("const", Token::CONST) \ + KEYWORD("continue", Token::CONTINUE) \ + KEYWORD_GROUP('d') \ + KEYWORD("debugger", Token::DEBUGGER) \ + KEYWORD("default", Token::DEFAULT) \ + KEYWORD("delete", Token::DELETE) \ + KEYWORD("do", Token::DO) \ + KEYWORD_GROUP('e') \ + KEYWORD("else", Token::ELSE) \ + KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("export", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \ + KEYWORD_GROUP('f') \ + KEYWORD("false", Token::FALSE_LITERAL) \ + KEYWORD("finally", Token::FINALLY) \ + KEYWORD("for", Token::FOR) \ + KEYWORD("function", Token::FUNCTION) \ + KEYWORD_GROUP('i') \ + KEYWORD("if", Token::IF) \ + KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("import", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("in", Token::IN) \ + KEYWORD("instanceof", Token::INSTANCEOF) \ + KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('l') \ + KEYWORD("let", harmony_block_scoping \ + ? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('n') \ + KEYWORD("new", Token::NEW) \ + KEYWORD("null", Token::NULL_LITERAL) \ + KEYWORD_GROUP('p') \ + KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('r') \ + KEYWORD("return", Token::RETURN) \ + KEYWORD_GROUP('s') \ + KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("super", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("switch", Token::SWITCH) \ + KEYWORD_GROUP('t') \ + KEYWORD("this", Token::THIS) \ + KEYWORD("throw", Token::THROW) \ + KEYWORD("true", Token::TRUE_LITERAL) \ + KEYWORD("try", Token::TRY) \ + KEYWORD("typeof", Token::TYPEOF) \ + KEYWORD_GROUP('v') \ + KEYWORD("var", Token::VAR) \ + KEYWORD("void", Token::VOID) \ + KEYWORD_GROUP('w') \ + KEYWORD("while", Token::WHILE) \ + KEYWORD("with", Token::WITH) \ + KEYWORD_GROUP('y') \ + KEYWORD("yield", Token::FUTURE_STRICT_RESERVED_WORD) + + +static Token::Value KeywordOrIdentifierToken(const char* input, + int input_length, + bool harmony_block_scoping) { + ASSERT(input_length >= 1); + const int kMinLength = 2; + const int kMaxLength = 10; + if (input_length < kMinLength || input_length > kMaxLength) { + return Token::IDENTIFIER; + } + switch (input[0]) { + default: +#define KEYWORD_GROUP_CASE(ch) \ + break; \ + case ch: +#define KEYWORD(keyword, token) \ + { \ + /* 'keyword' is a char array, so sizeof(keyword) is */ \ + /* strlen(keyword) plus 1 for the NUL char. */ \ + const int keyword_length = sizeof(keyword) - 1; \ + STATIC_ASSERT(keyword_length >= kMinLength); \ + STATIC_ASSERT(keyword_length <= kMaxLength); \ + if (input_length == keyword_length && \ + input[1] == keyword[1] && \ + (keyword_length <= 2 || input[2] == keyword[2]) && \ + (keyword_length <= 3 || input[3] == keyword[3]) && \ + (keyword_length <= 4 || input[4] == keyword[4]) && \ + (keyword_length <= 5 || input[5] == keyword[5]) && \ + (keyword_length <= 6 || input[6] == keyword[6]) && \ + (keyword_length <= 7 || input[7] == keyword[7]) && \ + (keyword_length <= 8 || input[8] == keyword[8]) && \ + (keyword_length <= 9 || input[9] == keyword[9])) { \ + return token; \ + } \ + } + KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) } + return Token::IDENTIFIER; } -// Move the cursor forward to point at the next following UTF-8 character start -// in the buffer. -static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) { - byte character = buffer[(*cursor)++]; - if (character > unibrow::Utf8::kMaxOneByteChar) { - // First character of a multi-byte character encoding. - // The number of most-significant one-bits determines the length of the - // encoding: - // 110..... - (0xCx, 0xDx) one additional byte (minimum). - // 1110.... - (0xEx) two additional bytes. - // 11110... - (0xFx) three additional bytes (maximum). - ASSERT(IsUtf8MultiCharacterStart(character)); - // Additional bytes is: - // 1 if value in range 0xC0 .. 0xDF. - // 2 if value in range 0xE0 .. 0xEF. - // 3 if value in range 0xF0 .. 0xF7. - // Encode that in a single value. - unsigned additional_bytes = - ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03; - *cursor += additional_bytes; - ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes])); +Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { + ASSERT(unicode_cache_->IsIdentifierStart(c0_)); + LiteralScope literal(this); + // Scan identifier start character. + if (c0_ == '\\') { + uc32 c = ScanIdentifierUnicodeEscape(); + // Only allow legal identifier start characters. + if (c < 0 || + c == '\\' || // No recursive escapes. + !unicode_cache_->IsIdentifierStart(c)) { + return Token::ILLEGAL; + } + AddLiteralChar(c); + return ScanIdentifierSuffix(&literal); } -} + uc32 first_char = c0_; + Advance(); + AddLiteralChar(first_char); -void Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) { - if (raw_character_position_ > target_position) { - // Spool backwards in utf8 buffer. - do { - Utf8CharacterBack(raw_data_, &raw_data_pos_); - raw_character_position_--; - } while (raw_character_position_ > target_position); - return; + // Scan the rest of the identifier characters. + while (unicode_cache_->IsIdentifierPart(c0_)) { + if (c0_ != '\\') { + uc32 next_char = c0_; + Advance(); + AddLiteralChar(next_char); + continue; + } + // Fallthrough if no longer able to complete keyword. + return ScanIdentifierSuffix(&literal); } - // Spool forwards in the utf8 buffer. - while (raw_character_position_ < target_position) { - if (raw_data_pos_ == raw_data_length_) return; - Utf8CharacterForward(raw_data_, &raw_data_pos_); - raw_character_position_++; + + literal.Complete(); + + if (next_.literal_chars->is_ascii()) { + Vector chars = next_.literal_chars->ascii_literal(); + return KeywordOrIdentifierToken(chars.start(), + chars.length(), + harmony_block_scoping_); } + + return Token::IDENTIFIER; } -// ---------------------------------------------------------------------------- -// ExternalTwoByteStringUC16CharacterStream - -ExternalTwoByteStringUC16CharacterStream:: - ~ExternalTwoByteStringUC16CharacterStream() { } - - -ExternalTwoByteStringUC16CharacterStream - ::ExternalTwoByteStringUC16CharacterStream( - Handle data, - int start_position, - int end_position) - : UC16CharacterStream(), - source_(data), - raw_data_(data->GetTwoByteData(start_position)) { - buffer_cursor_ = raw_data_, - buffer_end_ = raw_data_ + (end_position - start_position); - pos_ = start_position; +Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { + // Scan the rest of the identifier characters. + while (unicode_cache_->IsIdentifierPart(c0_)) { + if (c0_ == '\\') { + uc32 c = ScanIdentifierUnicodeEscape(); + // Only allow legal identifier part characters. + if (c < 0 || + c == '\\' || + !unicode_cache_->IsIdentifierPart(c)) { + return Token::ILLEGAL; + } + AddLiteralChar(c); + } else { + AddLiteralChar(c0_); + Advance(); + } + } + literal->Complete(); + + return Token::IDENTIFIER; } -// ---------------------------------------------------------------------------- -// Scanner::LiteralScope +bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { + // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags + bool in_character_class = false; + + // Previous token is either '/' or '/=', in the second case, the + // pattern starts at =. + next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); + next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); -Scanner::LiteralScope::LiteralScope(Scanner* self) - : scanner_(self), complete_(false) { - self->StartLiteral(); + // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, + // the scanner should pass uninterpreted bodies to the RegExp + // constructor. + LiteralScope literal(this); + if (seen_equal) { + AddLiteralChar('='); + } + + while (c0_ != '/' || in_character_class) { + if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; + if (c0_ == '\\') { // Escape sequence. + AddLiteralCharAdvance(); + if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; + AddLiteralCharAdvance(); + // If the escape allows more characters, i.e., \x??, \u????, or \c?, + // only "safe" characters are allowed (letters, digits, underscore), + // otherwise the escape isn't valid and the invalid character has + // its normal meaning. I.e., we can just continue scanning without + // worrying whether the following characters are part of the escape + // or not, since any '/', '\\' or '[' is guaranteed to not be part + // of the escape sequence. + + // TODO(896): At some point, parse RegExps more throughly to capture + // octal esacpes in strict mode. + } else { // Unescaped character. + if (c0_ == '[') in_character_class = true; + if (c0_ == ']') in_character_class = false; + AddLiteralCharAdvance(); + } + } + Advance(); // consume '/' + + literal.Complete(); + + return true; } -Scanner::LiteralScope::~LiteralScope() { - if (!complete_) scanner_->DropLiteral(); +bool JavaScriptScanner::ScanLiteralUnicodeEscape() { + ASSERT(c0_ == '\\'); + uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; + Advance(); + int i = 1; + if (c0_ == 'u') { + i++; + while (i < 6) { + Advance(); + if (!IsHexDigit(c0_)) break; + chars_read[i] = c0_; + i++; + } + } + if (i < 6) { + // Incomplete escape. Undo all advances and return false. + while (i > 0) { + i--; + PushBack(chars_read[i]); + } + return false; + } + // Complete escape. Add all chars to current literal buffer. + for (int i = 0; i < 6; i++) { + AddLiteralChar(chars_read[i]); + } + return true; } -void Scanner::LiteralScope::Complete() { - scanner_->TerminateLiteral(); - complete_ = true; +bool JavaScriptScanner::ScanRegExpFlags() { + // Scan regular expression flags. + LiteralScope literal(this); + while (unicode_cache_->IsIdentifierPart(c0_)) { + if (c0_ != '\\') { + AddLiteralCharAdvance(); + } else { + if (!ScanLiteralUnicodeEscape()) { + break; + } + } + } + literal.Complete(); + + next_.location.end_pos = source_pos() - 1; + return true; } } } // namespace v8::internal diff --git a/src/scanner.h b/src/scanner.h index 6422ee8cab8..73a4e217981 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -25,103 +25,538 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// Features shared by parsing and pre-parsing scanners. + #ifndef V8_SCANNER_H_ #define V8_SCANNER_H_ -#include "scanner-base.h" +#include "allocation.h" +#include "char-predicates.h" +#include "checks.h" +#include "globals.h" +#include "token.h" +#include "unicode-inl.h" +#include "utils.h" namespace v8 { namespace internal { -// A buffered character stream based on a random access character -// source (ReadBlock can be called with pos_ pointing to any position, -// even positions before the current). -class BufferedUC16CharacterStream: public UC16CharacterStream { +// Returns the value (0 .. 15) of a hexadecimal character c. +// If c is not a legal hexadecimal character, returns a value < 0. +inline int HexValue(uc32 c) { + c -= '0'; + if (static_cast(c) <= 9) return c; + c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. + if (static_cast(c) <= 5) return c + 10; + return -1; +} + + +// --------------------------------------------------------------------- +// Buffered stream of characters, using an internal UC16 buffer. + +class UC16CharacterStream { public: - BufferedUC16CharacterStream(); - virtual ~BufferedUC16CharacterStream(); + UC16CharacterStream() : pos_(0) { } + virtual ~UC16CharacterStream() { } + + // Returns and advances past the next UC16 character in the input + // stream. If there are no more characters, it returns a negative + // value. + inline uc32 Advance() { + if (buffer_cursor_ < buffer_end_ || ReadBlock()) { + pos_++; + return static_cast(*(buffer_cursor_++)); + } + // Note: currently the following increment is necessary to avoid a + // parser problem! The scanner treats the final kEndOfInput as + // a character with a position, and does math relative to that + // position. + pos_++; + + return kEndOfInput; + } - virtual void PushBack(uc32 character); + // Return the current position in the character stream. + // Starts at zero. + inline unsigned pos() const { return pos_; } + + // Skips forward past the next character_count UC16 characters + // in the input, or until the end of input if that comes sooner. + // Returns the number of characters actually skipped. If less + // than character_count, + inline unsigned SeekForward(unsigned character_count) { + unsigned buffered_chars = + static_cast(buffer_end_ - buffer_cursor_); + if (character_count <= buffered_chars) { + buffer_cursor_ += character_count; + pos_ += character_count; + return character_count; + } + return SlowSeekForward(character_count); + } + + // Pushes back the most recently read UC16 character (or negative + // value if at end of input), i.e., the value returned by the most recent + // call to Advance. + // Must not be used right after calling SeekForward. + virtual void PushBack(int32_t character) = 0; protected: - static const unsigned kBufferSize = 512; - static const unsigned kPushBackStepSize = 16; + static const uc32 kEndOfInput = -1; + + // Ensures that the buffer_cursor_ points to the character at + // position pos_ of the input, if possible. If the position + // is at or after the end of the input, return false. If there + // are more characters available, return true. + virtual bool ReadBlock() = 0; + virtual unsigned SlowSeekForward(unsigned character_count) = 0; + + const uc16* buffer_cursor_; + const uc16* buffer_end_; + unsigned pos_; +}; + - virtual unsigned SlowSeekForward(unsigned delta); - virtual bool ReadBlock(); - virtual void SlowPushBack(uc16 character); +class UnicodeCache { +// --------------------------------------------------------------------- +// Caching predicates used by scanners. + public: + UnicodeCache() {} + typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; + + StaticResource* utf8_decoder() { + return &utf8_decoder_; + } + + bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } + bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } + bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } + bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); } - virtual unsigned BufferSeekForward(unsigned delta) = 0; - virtual unsigned FillBuffer(unsigned position, unsigned length) = 0; + private: - const uc16* pushback_limit_; - uc16 buffer_[kBufferSize]; + unibrow::Predicate kIsIdentifierStart; + unibrow::Predicate kIsIdentifierPart; + unibrow::Predicate kIsLineTerminator; + unibrow::Predicate kIsWhiteSpace; + StaticResource utf8_decoder_; + + DISALLOW_COPY_AND_ASSIGN(UnicodeCache); }; -// Generic string stream. -class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream { +// ---------------------------------------------------------------------------- +// LiteralBuffer - Collector of chars of literals. + +class LiteralBuffer { public: - GenericStringUC16CharacterStream(Handle data, - unsigned start_position, - unsigned end_position); - virtual ~GenericStringUC16CharacterStream(); + LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } - protected: - virtual unsigned BufferSeekForward(unsigned delta); - virtual unsigned FillBuffer(unsigned position, unsigned length); + ~LiteralBuffer() { + if (backing_store_.length() > 0) { + backing_store_.Dispose(); + } + } + + inline void AddChar(uc16 character) { + if (position_ >= backing_store_.length()) ExpandBuffer(); + if (is_ascii_) { + if (character < kMaxAsciiCharCodeU) { + backing_store_[position_] = static_cast(character); + position_ += kASCIISize; + return; + } + ConvertToUC16(); + } + *reinterpret_cast(&backing_store_[position_]) = character; + position_ += kUC16Size; + } + + bool is_ascii() { return is_ascii_; } + + Vector uc16_literal() { + ASSERT(!is_ascii_); + ASSERT((position_ & 0x1) == 0); + return Vector( + reinterpret_cast(backing_store_.start()), + position_ >> 1); + } + + Vector ascii_literal() { + ASSERT(is_ascii_); + return Vector( + reinterpret_cast(backing_store_.start()), + position_); + } + + int length() { + return is_ascii_ ? position_ : (position_ >> 1); + } + + void Reset() { + position_ = 0; + is_ascii_ = true; + } + private: + static const int kInitialCapacity = 16; + static const int kGrowthFactory = 4; + static const int kMinConversionSlack = 256; + static const int kMaxGrowth = 1 * MB; + inline int NewCapacity(int min_capacity) { + int capacity = Max(min_capacity, backing_store_.length()); + int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); + return new_capacity; + } + + void ExpandBuffer() { + Vector new_store = Vector::New(NewCapacity(kInitialCapacity)); + memcpy(new_store.start(), backing_store_.start(), position_); + backing_store_.Dispose(); + backing_store_ = new_store; + } + + void ConvertToUC16() { + ASSERT(is_ascii_); + Vector new_store; + int new_content_size = position_ * kUC16Size; + if (new_content_size >= backing_store_.length()) { + // Ensure room for all currently read characters as UC16 as well + // as the character about to be stored. + new_store = Vector::New(NewCapacity(new_content_size)); + } else { + new_store = backing_store_; + } + char* src = reinterpret_cast(backing_store_.start()); + uc16* dst = reinterpret_cast(new_store.start()); + for (int i = position_ - 1; i >= 0; i--) { + dst[i] = src[i]; + } + if (new_store.start() != backing_store_.start()) { + backing_store_.Dispose(); + backing_store_ = new_store; + } + position_ = new_content_size; + is_ascii_ = false; + } + + bool is_ascii_; + int position_; + Vector backing_store_; - Handle string_; - unsigned start_position_; - unsigned length_; + DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); }; -// UC16 stream based on a literal UTF-8 string. -class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream { +// ---------------------------------------------------------------------------- +// Scanner base-class. + +// Generic functionality used by both JSON and JavaScript scanners. +class Scanner { public: - Utf8ToUC16CharacterStream(const byte* data, unsigned length); - virtual ~Utf8ToUC16CharacterStream(); + // -1 is outside of the range of any real source code. + static const int kNoOctalLocation = -1; + + typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; + + class LiteralScope { + public: + explicit LiteralScope(Scanner* self); + ~LiteralScope(); + void Complete(); + + private: + Scanner* scanner_; + bool complete_; + }; + + explicit Scanner(UnicodeCache* scanner_contants); + + // Returns the current token again. + Token::Value current_token() { return current_.token; } + + // One token look-ahead (past the token returned by Next()). + Token::Value peek() const { return next_.token; } + + struct Location { + Location(int b, int e) : beg_pos(b), end_pos(e) { } + Location() : beg_pos(0), end_pos(0) { } + + bool IsValid() const { + return beg_pos >= 0 && end_pos >= beg_pos; + } + + static Location invalid() { return Location(-1, -1); } + + int beg_pos; + int end_pos; + }; + + // Returns the location information for the current token + // (the token returned by Next()). + Location location() const { return current_.location; } + Location peek_location() const { return next_.location; } + + // Returns the literal string, if any, for the current token (the + // token returned by Next()). The string is 0-terminated and in + // UTF-8 format; they may contain 0-characters. Literal strings are + // collected for identifiers, strings, and numbers. + // These functions only give the correct result if the literal + // was scanned between calls to StartLiteral() and TerminateLiteral(). + bool is_literal_ascii() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->is_ascii(); + } + Vector literal_ascii_string() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->ascii_literal(); + } + Vector literal_uc16_string() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->uc16_literal(); + } + int literal_length() const { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->length(); + } + + bool literal_contains_escapes() const { + Location location = current_.location; + int source_length = (location.end_pos - location.beg_pos); + if (current_.token == Token::STRING) { + // Subtract delimiters. + source_length -= 2; + } + return current_.literal_chars->length() != source_length; + } + + // Returns the literal string for the next token (the token that + // would be returned if Next() were called). + bool is_next_literal_ascii() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->is_ascii(); + } + Vector next_literal_ascii_string() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->ascii_literal(); + } + Vector next_literal_uc16_string() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->uc16_literal(); + } + int next_literal_length() const { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->length(); + } + + UnicodeCache* unicode_cache() { return unicode_cache_; } + + static const int kCharacterLookaheadBufferSize = 1; protected: - virtual unsigned BufferSeekForward(unsigned delta); - virtual unsigned FillBuffer(unsigned char_position, unsigned length); - void SetRawPosition(unsigned char_position); - - const byte* raw_data_; - unsigned raw_data_length_; // Measured in bytes, not characters. - unsigned raw_data_pos_; - // The character position of the character at raw_data[raw_data_pos_]. - // Not necessarily the same as pos_. - unsigned raw_character_position_; + // The current and look-ahead token. + struct TokenDesc { + Token::Value token; + Location location; + LiteralBuffer* literal_chars; + }; + + // Call this after setting source_ to the input. + void Init() { + // Set c0_ (one character ahead) + STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); + Advance(); + // Initialize current_ to not refer to a literal. + current_.literal_chars = NULL; + } + + // Literal buffer support + inline void StartLiteral() { + LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? + &literal_buffer2_ : &literal_buffer1_; + free_buffer->Reset(); + next_.literal_chars = free_buffer; + } + + inline void AddLiteralChar(uc32 c) { + ASSERT_NOT_NULL(next_.literal_chars); + next_.literal_chars->AddChar(c); + } + + // Complete scanning of a literal. + inline void TerminateLiteral() { + // Does nothing in the current implementation. + } + + // Stops scanning of a literal and drop the collected characters, + // e.g., due to an encountered error. + inline void DropLiteral() { + next_.literal_chars = NULL; + } + + inline void AddLiteralCharAdvance() { + AddLiteralChar(c0_); + Advance(); + } + + // Low-level scanning support. + void Advance() { c0_ = source_->Advance(); } + void PushBack(uc32 ch) { + source_->PushBack(c0_); + c0_ = ch; + } + + inline Token::Value Select(Token::Value tok) { + Advance(); + return tok; + } + + inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { + Advance(); + if (c0_ == next) { + Advance(); + return then; + } else { + return else_; + } + } + + uc32 ScanHexNumber(int expected_length); + + // Return the current source position. + int source_pos() { + return source_->pos() - kCharacterLookaheadBufferSize; + } + + UnicodeCache* unicode_cache_; + + // Buffers collecting literal strings, numbers, etc. + LiteralBuffer literal_buffer1_; + LiteralBuffer literal_buffer2_; + + TokenDesc current_; // desc for current token (as returned by Next()) + TokenDesc next_; // desc for next token (one token look-ahead) + + // Input stream. Must be initialized to an UC16CharacterStream. + UC16CharacterStream* source_; + + // One Unicode character look-ahead; c0_ < 0 at the end of the input. + uc32 c0_; }; +// ---------------------------------------------------------------------------- +// JavaScriptScanner - base logic for JavaScript scanning. -// UTF16 buffer to read characters from an external string. -class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream { +class JavaScriptScanner : public Scanner { public: - ExternalTwoByteStringUC16CharacterStream(Handle data, - int start_position, - int end_position); - virtual ~ExternalTwoByteStringUC16CharacterStream(); + // A LiteralScope that disables recording of some types of JavaScript + // literals. If the scanner is configured to not record the specific + // type of literal, the scope will not call StartLiteral. + class LiteralScope { + public: + explicit LiteralScope(JavaScriptScanner* self) + : scanner_(self), complete_(false) { + scanner_->StartLiteral(); + } + ~LiteralScope() { + if (!complete_) scanner_->DropLiteral(); + } + void Complete() { + scanner_->TerminateLiteral(); + complete_ = true; + } - virtual void PushBack(uc32 character) { - ASSERT(buffer_cursor_ > raw_data_); - buffer_cursor_--; - pos_--; + private: + JavaScriptScanner* scanner_; + bool complete_; + }; + + explicit JavaScriptScanner(UnicodeCache* scanner_contants); + + void Initialize(UC16CharacterStream* source); + + // Returns the next token. + Token::Value Next(); + + // Returns true if there was a line terminator before the peek'ed token, + // possibly inside a multi-line comment. + bool HasAnyLineTerminatorBeforeNext() const { + return has_line_terminator_before_next_ || + has_multiline_comment_before_next_; } - protected: - virtual unsigned SlowSeekForward(unsigned delta) { - // Fast case always handles seeking. - return 0; + // Scans the input as a regular expression pattern, previous + // character(s) must be /(=). Returns true if a pattern is scanned. + bool ScanRegExpPattern(bool seen_equal); + // Returns true if regexp flags are scanned (always since flags can + // be empty). + bool ScanRegExpFlags(); + + // Tells whether the buffer contains an identifier (no escapes). + // Used for checking if a property name is an identifier. + static bool IsIdentifier(unibrow::CharacterStream* buffer); + + // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. + uc32 ScanOctalEscape(uc32 c, int length); + + // Returns the location of the last seen octal literal + Location octal_position() const { return octal_pos_; } + void clear_octal_position() { octal_pos_ = Location::invalid(); } + + // Seek forward to the given position. This operation does not + // work in general, for instance when there are pushed back + // characters, but works for seeking forward until simple delimiter + // tokens, which is what it is used for. + void SeekForward(int pos); + + bool HarmonyBlockScoping() const { + return harmony_block_scoping_; } - virtual bool ReadBlock() { - // Entire string is read at start. - return false; + void SetHarmonyBlockScoping(bool block_scoping) { + harmony_block_scoping_ = block_scoping; } - Handle source_; - const uc16* raw_data_; // Pointer to the actual array of characters. + + + protected: + bool SkipWhiteSpace(); + Token::Value SkipSingleLineComment(); + Token::Value SkipMultiLineComment(); + + // Scans a single JavaScript token. + void Scan(); + + void ScanDecimalDigits(); + Token::Value ScanNumber(bool seen_period); + Token::Value ScanIdentifierOrKeyword(); + Token::Value ScanIdentifierSuffix(LiteralScope* literal); + + void ScanEscape(); + Token::Value ScanString(); + + // Scans a possible HTML comment -- begins with ' class SmartPointer { public: + // Default constructor. Constructs an empty scoped pointer. + inline SmartPointer() : p_(NULL) {} - // Default constructor. Construct an empty scoped pointer. - inline SmartPointer() : p(NULL) {} - - - // Construct a scoped pointer from a plain one. - explicit inline SmartPointer(T* pointer) : p(pointer) {} - + // Constructs a scoped pointer from a plain one. + explicit inline SmartPointer(T* ptr) : p_(ptr) {} // Copy constructor removes the pointer from the original to avoid double // freeing. - inline SmartPointer(const SmartPointer& rhs) : p(rhs.p) { - const_cast&>(rhs).p = NULL; + inline SmartPointer(const SmartPointer& rhs) : p_(rhs.p_) { + const_cast&>(rhs).p_ = NULL; } - // When the destructor of the scoped pointer is executed the plain pointer // is deleted using DeleteArray. This implies that you must allocate with // NewArray. - inline ~SmartPointer() { if (p) DeleteArray(p); } + inline ~SmartPointer() { if (p_) DeleteArray(p_); } + inline T* operator->() const { return p_; } // You can get the underlying pointer out with the * operator. - inline T* operator*() { return p; } - + inline T* operator*() { return p_; } // You can use [n] to index as if it was a plain pointer inline T& operator[](size_t i) { - return p[i]; + return p_[i]; } // We don't have implicit conversion to a T* since that hinders migration: @@ -77,31 +73,26 @@ class SmartPointer { // deleted then call Detach(). Afterwards, the smart pointer is empty // (NULL). inline T* Detach() { - T* temp = p; - p = NULL; + T* temp = p_; + p_ = NULL; return temp; } - // Assignment requires an empty (NULL) SmartPointer as the receiver. Like // the copy constructor it removes the pointer in the original to avoid // double freeing. inline SmartPointer& operator=(const SmartPointer& rhs) { ASSERT(is_empty()); - T* tmp = rhs.p; // swap to handle self-assignment - const_cast&>(rhs).p = NULL; - p = tmp; + T* tmp = rhs.p_; // swap to handle self-assignment + const_cast&>(rhs).p_ = NULL; + p_ = tmp; return *this; } - - inline bool is_empty() { - return p == NULL; - } - + inline bool is_empty() { return p_ == NULL; } private: - T* p; + T* p_; }; } } // namespace v8::internal diff --git a/src/strtod.cc b/src/strtod.cc index b32abd97d79..c89c8f33397 100644 --- a/src/strtod.cc +++ b/src/strtod.cc @@ -26,14 +26,11 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include -#ifndef V8_INFINITY -#define V8_INFINITY std::numeric_limits::infinity() -#endif - +#include "globals.h" #include "utils.h" - #include "strtod.h" #include "bignum.h" #include "cached-powers.h" diff --git a/src/utils.h b/src/utils.h index 785bc4373c8..5a875d8271b 100644 --- a/src/utils.h +++ b/src/utils.h @@ -497,9 +497,6 @@ class Collector { public: explicit Collector(int initial_capacity = kMinCapacity) : index_(0), size_(0) { - if (initial_capacity < kMinCapacity) { - initial_capacity = kMinCapacity; - } current_chunk_ = Vector::New(initial_capacity); } @@ -601,25 +598,23 @@ class Collector { // Creates a new current chunk, and stores the old chunk in the chunks_ list. void Grow(int min_capacity) { ASSERT(growth_factor > 1); - int growth = current_chunk_.length() * (growth_factor - 1); - if (growth > max_growth) { - growth = max_growth; - } - int new_capacity = current_chunk_.length() + growth; - if (new_capacity < min_capacity) { - new_capacity = min_capacity + growth; - } - Vector new_chunk = Vector::New(new_capacity); - int new_index = PrepareGrow(new_chunk); - if (index_ > 0) { - chunks_.Add(current_chunk_.SubVector(0, index_)); + int new_capacity; + int current_length = current_chunk_.length(); + if (current_length < kMinCapacity) { + // The collector started out as empty. + new_capacity = min_capacity * growth_factor; + if (new_capacity < kMinCapacity) new_capacity = kMinCapacity; } else { - // Can happen if the call to PrepareGrow moves everything into - // the new chunk. - current_chunk_.Dispose(); + int growth = current_length * (growth_factor - 1); + if (growth > max_growth) { + growth = max_growth; + } + new_capacity = current_length + growth; + if (new_capacity < min_capacity) { + new_capacity = min_capacity + growth; + } } - current_chunk_ = new_chunk; - index_ = new_index; + NewChunk(new_capacity); ASSERT(index_ + min_capacity <= current_chunk_.length()); } @@ -627,8 +622,15 @@ class Collector { // some of the current data into the new chunk. The function may update // the current index_ value to represent data no longer in the current chunk. // Returns the initial index of the new chunk (after copied data). - virtual int PrepareGrow(Vector new_chunk) { - return 0; + virtual void NewChunk(int new_capacity) { + Vector new_chunk = Vector::New(new_capacity); + if (index_ > 0) { + chunks_.Add(current_chunk_.SubVector(0, index_)); + } else { + current_chunk_.Dispose(); + } + current_chunk_ = new_chunk; + index_ = 0; } }; @@ -683,20 +685,26 @@ class SequenceCollector : public Collector { int sequence_start_; // Move the currently active sequence to the new chunk. - virtual int PrepareGrow(Vector new_chunk) { - if (sequence_start_ != kNoSequence) { - int sequence_length = this->index_ - sequence_start_; - // The new chunk is always larger than the current chunk, so there - // is room for the copy. - ASSERT(sequence_length < new_chunk.length()); - for (int i = 0; i < sequence_length; i++) { - new_chunk[i] = this->current_chunk_[sequence_start_ + i]; - } - this->index_ = sequence_start_; - sequence_start_ = 0; - return sequence_length; + virtual void NewChunk(int new_capacity) { + if (sequence_start_ == kNoSequence) { + // Fall back on default behavior if no sequence has been started. + this->Collector::NewChunk(new_capacity); + return; } - return 0; + int sequence_length = this->index_ - sequence_start_; + Vector new_chunk = Vector::New(sequence_length + new_capacity); + ASSERT(sequence_length < new_chunk.length()); + for (int i = 0; i < sequence_length; i++) { + new_chunk[i] = this->current_chunk_[sequence_start_ + i]; + } + if (sequence_start_ > 0) { + this->chunks_.Add(this->current_chunk_.SubVector(0, sequence_start_)); + } else { + this->current_chunk_.Dispose(); + } + this->current_chunk_ = new_chunk; + this->index_ = sequence_length; + sequence_start_ = 0; } }; diff --git a/src/v8conversions.cc b/src/v8conversions.cc index 96056ecf441..bf175e50b5f 100644 --- a/src/v8conversions.cc +++ b/src/v8conversions.cc @@ -34,7 +34,6 @@ #include "v8conversions.h" #include "dtoa.h" #include "factory.h" -#include "scanner-base.h" #include "strtod.h" namespace v8 { diff --git a/src/version.cc b/src/version.cc index aeab053f0f2..53b1758c506 100644 --- a/src/version.cc +++ b/src/version.cc @@ -34,7 +34,7 @@ // cannot be changed without changing the SCons build script. #define MAJOR_VERSION 3 #define MINOR_VERSION 6 -#define BUILD_NUMBER 1 +#define BUILD_NUMBER 2 #define PATCH_LEVEL 0 // Use 1 for candidates and 0 otherwise. // (Boolean macro values are not supported by all preprocessors.) diff --git a/src/win32-math.cc b/src/win32-math.cc new file mode 100644 index 00000000000..3410872bb5a --- /dev/null +++ b/src/win32-math.cc @@ -0,0 +1,106 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Extra POSIX/ANSI routines for Win32 when using Visual Studio C++. Please +// refer to The Open Group Base Specification for specification of the correct +// semantics for these functions. +// (http://www.opengroup.org/onlinepubs/000095399/) +#ifdef _MSC_VER + +#undef V8_WIN32_LEAN_AND_MEAN +#define V8_WIN32_HEADERS_FULL +#include "win32-headers.h" +#include // Required for INT_MAX etc. +#include +#include // Required for DBL_MAX and on Win32 for finite() +#include "win32-math.h" + +#include "checks.h" + +namespace v8 { + +// Test for finite value - usually defined in math.h +int isfinite(double x) { + return _finite(x); +} + +} // namespace v8 + + +// Test for a NaN (not a number) value - usually defined in math.h +int isnan(double x) { + return _isnan(x); +} + + +// Test for infinity - usually defined in math.h +int isinf(double x) { + return (_fpclass(x) & (_FPCLASS_PINF | _FPCLASS_NINF)) != 0; +} + + +// Test if x is less than y and both nominal - usually defined in math.h +int isless(double x, double y) { + return isnan(x) || isnan(y) ? 0 : x < y; +} + + +// Test if x is greater than y and both nominal - usually defined in math.h +int isgreater(double x, double y) { + return isnan(x) || isnan(y) ? 0 : x > y; +} + + +// Classify floating point number - usually defined in math.h +int fpclassify(double x) { + // Use the MS-specific _fpclass() for classification. + int flags = _fpclass(x); + + // Determine class. We cannot use a switch statement because + // the _FPCLASS_ constants are defined as flags. + if (flags & (_FPCLASS_PN | _FPCLASS_NN)) return FP_NORMAL; + if (flags & (_FPCLASS_PZ | _FPCLASS_NZ)) return FP_ZERO; + if (flags & (_FPCLASS_PD | _FPCLASS_ND)) return FP_SUBNORMAL; + if (flags & (_FPCLASS_PINF | _FPCLASS_NINF)) return FP_INFINITE; + + // All cases should be covered by the code above. + ASSERT(flags & (_FPCLASS_SNAN | _FPCLASS_QNAN)); + return FP_NAN; +} + + +// Test sign - usually defined in math.h +int signbit(double x) { + // We need to take care of the special case of both positive + // and negative versions of zero. + if (x == 0) + return _fpclass(x) & _FPCLASS_NZ; + else + return x < 0; +} + +#endif // _MSC_VER diff --git a/src/win32-math.h b/src/win32-math.h new file mode 100644 index 00000000000..68759990b43 --- /dev/null +++ b/src/win32-math.h @@ -0,0 +1,61 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Extra POSIX/ANSI routines for Win32 when using Visual Studio C++. Please +// refer to The Open Group Base Specification for specification of the correct +// semantics for these functions. +// (http://www.opengroup.org/onlinepubs/000095399/) + +#ifndef V8_WIN32_MATH_H_ +#define V8_WIN32_MATH_H_ + +#ifndef _MSC_VER +#error Wrong environment, expected MSVC. +#endif // _MSC_VER + +enum { + FP_NAN, + FP_INFINITE, + FP_ZERO, + FP_SUBNORMAL, + FP_NORMAL +}; + +namespace v8 { + +int isfinite(double x); + +} // namespace v8 + +int isnan(double x); +int isinf(double x); +int isless(double x, double y); +int isgreater(double x, double y); +int fpclassify(double x); +int signbit(double x); + +#endif // V8_WIN32_MATH_H_ diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc index 8b6afdc59c4..160c9b11bee 100755 --- a/test/cctest/test-parsing.cc +++ b/test/cctest/test-parsing.cc @@ -31,14 +31,14 @@ #include "v8.h" +#include "cctest.h" +#include "execution.h" #include "isolate.h" -#include "token.h" -#include "scanner.h" #include "parser.h" -#include "utils.h" -#include "execution.h" #include "preparser.h" -#include "cctest.h" +#include "scanner-character-streams.h" +#include "token.h" +#include "utils.h" TEST(ScanKeywords) { struct KeywordToken { diff --git a/test/cctest/test-profile-generator.cc b/test/cctest/test-profile-generator.cc index 250ebd4a5f2..4a29e59c56f 100644 --- a/test/cctest/test-profile-generator.cc +++ b/test/cctest/test-profile-generator.cc @@ -37,16 +37,16 @@ TEST(TokenEnumerator) { TokenEnumerator te; CHECK_EQ(TokenEnumerator::kNoSecurityToken, te.GetTokenId(NULL)); v8::HandleScope hs; - v8::Local token1(v8::String::New("1")); + v8::Local token1(v8::String::New("1x")); CHECK_EQ(0, te.GetTokenId(*v8::Utils::OpenHandle(*token1))); CHECK_EQ(0, te.GetTokenId(*v8::Utils::OpenHandle(*token1))); - v8::Local token2(v8::String::New("2")); + v8::Local token2(v8::String::New("2x")); CHECK_EQ(1, te.GetTokenId(*v8::Utils::OpenHandle(*token2))); CHECK_EQ(1, te.GetTokenId(*v8::Utils::OpenHandle(*token2))); CHECK_EQ(0, te.GetTokenId(*v8::Utils::OpenHandle(*token1))); { v8::HandleScope hs; - v8::Local token3(v8::String::New("3")); + v8::Local token3(v8::String::New("3x")); CHECK_EQ(2, te.GetTokenId(*v8::Utils::OpenHandle(*token3))); CHECK_EQ(1, te.GetTokenId(*v8::Utils::OpenHandle(*token2))); CHECK_EQ(0, te.GetTokenId(*v8::Utils::OpenHandle(*token1))); diff --git a/test/cctest/test-utils.cc b/test/cctest/test-utils.cc index e136858300e..e4f70df4093 100644 --- a/test/cctest/test-utils.cc +++ b/test/cctest/test-utils.cc @@ -195,3 +195,15 @@ TEST(SequenceCollector) { } result.Dispose(); } + + +TEST(SequenceCollectorRegression) { + SequenceCollector collector(16); + collector.StartSequence(); + collector.Add('0'); + collector.AddBlock( + i::Vector("12345678901234567890123456789012", 32)); + i::Vector seq = collector.EndSequence(); + CHECK_EQ(0, strncmp("0123456789012345678901234567890123", + seq.start(), seq.length())); +} diff --git a/test/mjsunit/string-replace.js b/test/mjsunit/string-replace.js index 9e4f559e4cf..6b022df307d 100644 --- a/test/mjsunit/string-replace.js +++ b/test/mjsunit/string-replace.js @@ -207,3 +207,8 @@ replaceTest("[ab-aabb-ab-b][az-aazz-az-z]", replaceTest("[ab-aabb-ab-b][az-aazz-az-z]", "abaz", /a(.)/g, replacer); + +var str = 'She sells seashells by the seashore.'; +var re = /sh/g; +assertEquals('She sells sea$schells by the sea$schore.', + str.replace(re,"$$" + 'sch')) diff --git a/test/preparser/duplicate-parameter.pyt b/test/preparser/duplicate-parameter.pyt new file mode 100644 index 00000000000..4dfb7d691fe --- /dev/null +++ b/test/preparser/duplicate-parameter.pyt @@ -0,0 +1,90 @@ +# Copyright 2011 the V8 project authors. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Templatated tests with duplicate parameter names. + +# ---------------------------------------------------------------------- +# Constants and utility functions + +# A template that performs the same strict-mode test in different +# scopes (global scope, function scope, and nested function scope), +# and in non-strict mode too. +def DuplicateParameterTest(name, source): + expectation = "strict_param_dupe" + non_selfstrict = {"selfstrict":"", "id":"selfnormal"} + + Template(name, '"use strict";\n' + source)(non_selfstrict, expectation) + Template(name + '-infunc', + 'function foo() {\n "use strict";\n' + source +'\n}\n')( + non_selfstrict, expectation) + Template(name + '-infunc2', + 'function foo() {\n "use strict";\n function bar() {\n' + + source +'\n }\n}\n')(non_selfstrict, expectation) + + selfstrict = {"selfstrict": "\"use strict\";", "id": "selfstrict"} + nestedstrict = {"selfstrict": "function bar(){\"use strict\";}", + "id": "nestedstrict"} + selfstrictnestedclean = {"selfstrict": """ + "use strict"; + function bar(){} + """, "id": "selfstrictnestedclean"} + selftest = Template(name + '-$id', source) + selftest(selfstrict, expectation) + selftest(selfstrictnestedclean, expectation) + selftest(nestedstrict, None) + selftest(non_selfstrict, None) + + +# ---------------------------------------------------------------------- +# Test templates + +DuplicateParameterTest("dups", """ + function foo(a, a) { $selfstrict } +"""); + +DuplicateParameterTest("dups-apart", """ + function foo(a, b, c, d, e, f, g, h, i, j, k, l, m, n, a) { $selfstrict } +"""); + +DuplicateParameterTest("dups-escaped", """ + function foo(\u0061, b, c, d, e, f, g, h, i, j, k, l, m, n, a) { $selfstrict } +"""); + +DuplicateParameterTest("triples", """ + function foo(a, b, c, d, e, f, g, h, a, i, j, k, l, m, n, a) { $selfstrict } +"""); + +DuplicateParameterTest("escapes", """ + function foo(a, \u0061) { $selfstrict } +"""); + +DuplicateParameterTest("long-names", """ + function foo(arglebargleglopglyfarglebargleglopglyfarglebargleglopglyfa, + arglebargleglopglyfarglebargleglopglyfarglebargleglopglyfa) { + $selfstrict + } +"""); diff --git a/test/preparser/duplicate-property.pyt b/test/preparser/duplicate-property.pyt new file mode 100644 index 00000000000..5abf9adbcfd --- /dev/null +++ b/test/preparser/duplicate-property.pyt @@ -0,0 +1,162 @@ +# Copyright 2011 the V8 project authors. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Tests of duplicate properties in object literals. + +# ---------------------------------------------------------------------- +# Utility functions to generate a number of tests for each property +# name pair. + +def PropertyTest(name, propa, propb, allow_strict = True): + replacement = {"id1": propa, "id2": propb, "name": name} + + # Tests same test in both strict and non-strict context. + def StrictTest(name, source, replacement, expectation): + if (allow_strict): + Template("strict-" + name, + "\"use strict\";\n" + source)(replacement, expectation) + Template(name, source)(replacement, expectation) + + # This one only fails in non-strict context. + if (allow_strict): + Template("strict-$name-data-data", """ + "use strict"; + var o = {$id1: 42, $id2: 42}; + """)(replacement, "strict_duplicate_property") + + Template("$name-data-data", """ + var o = {$id1: 42, $id2: 42}; + """)(replacement, None) + + StrictTest("$name-data-get", """ + var o = {$id1: 42, get $id2(){}}; + """, replacement, "accessor_data_property") + + StrictTest("$name-data-set", """ + var o = {$id1: 42, set $id2(v){}}; + """, replacement, "accessor_data_property") + + StrictTest("$name-get-data", """ + var o = {get $id1(){}, $id2: 42}; + """, replacement, "accessor_data_property") + + StrictTest("$name-set-data", """ + var o = {set $id1(v){}, $id2: 42}; + """, replacement, "accessor_data_property") + + StrictTest("$name-get-get", """ + var o = {get $id1(){}, get $id2(){}}; + """, replacement, "accessor_get_set") + + StrictTest("$name-set-set", """ + var o = {set $id1(v){}, set $id2(v){}}; + """, replacement, "accessor_get_set") + + StrictTest("$name-nested-get", """ + var o = {get $id1(){}, o: {get $id2(){} } }; + """, replacement, None) + + StrictTest("$name-nested-set", """ + var o = {set $id1(){}, o: {set $id2(){} } }; + """, replacement, None) + + +def TestBothWays(name, propa, propb, allow_strict = True): + PropertyTest(name + "-1", propa, propb, allow_strict) + PropertyTest(name + "-2", propb, propa, allow_strict) + +def TestSame(name, prop, allow_strict = True): + PropertyTest(name, prop, prop, allow_strict) + +#----------------------------------------------------------------------- + +# Simple identifier property +TestSame("a", "a") + +# Get/set identifiers +TestSame("get-id", "get") +TestSame("set-id", "set") + +# Number properties +TestSame("0", "0") +TestSame("0.1", "0.1") +TestSame("1.0", "1.0") +TestSame("42.33", "42.33") +TestSame("2^32-2", "4294967294") +TestSame("2^32", "4294967296") +TestSame("2^53", "9007199254740992") +TestSame("Hex20", "0x20") +TestSame("exp10", "1e10") +TestSame("exp20", "1e20") +TestSame("Oct40", "040", False); + + +# String properties +TestSame("str-a", '"a"') +TestSame("str-0", '"0"') +TestSame("str-42", '"42"') +TestSame("str-empty", '""') + +# Keywords +TestSame("if", "if") +TestSame("case", "case") + +# Future reserved keywords +TestSame("public", "public") +TestSame("class", "class") + + +# Test that numbers are converted to string correctly. + +TestBothWays("hex-int", "0x20", "32") +TestBothWays("oct-int", "040", "32", False) # Octals disallowed in strict mode. +TestBothWays("dec-int", "32.00", "32") +TestBothWays("dec-underflow-int", + "32.00000000000000000000000000000000000000001", "32") +TestBothWays("exp-int", "3.2e1", "32") +TestBothWays("exp-int", "3200e-2", "32") +TestBothWays("overflow-inf", "1e2000", "Infinity") +TestBothWays("overflow-inf-exact", "1.797693134862315808e+308", "Infinity") +TestBothWays("non-overflow-inf-exact", "1.797693134862315807e+308", + "1.7976931348623157e+308") +TestBothWays("underflow-0", "1e-2000", "0") +TestBothWays("underflow-0-exact", "2.4703282292062E-324", "0") +TestBothWays("non-underflow-0-exact", "2.4703282292063E-324", "5e-324") +TestBothWays("precission-loss-high", "9007199254740992", "9007199254740993") +TestBothWays("precission-loss-low", "1.9999999999999998", "1.9999999999999997") +TestBothWays("non-canonical-literal-int", "1.0", "1") +TestBothWays("non-canonical-literal-frac", "1.50", "1.5") +TestBothWays("rounding-down", "1.12512512512512452", "1.1251251251251244") +TestBothWays("rounding-up", "1.12512512512512453", "1.1251251251251246") + +TestBothWays("hex-int-str", "0x20", '"32"') +TestBothWays("dec-int-str", "32.00", '"32"') +TestBothWays("exp-int-str", "3.2e1", '"32"') +TestBothWays("overflow-inf-str", "1e2000", '"Infinity"') +TestBothWays("underflow-0-str", "1e-2000", '"0"') +TestBothWays("non-canonical-literal-int-str", "1.0", '"1"') +TestBothWays("non-canonical-literal-frac-str", "1.50", '"1.5"') diff --git a/test/preparser/testcfg.py b/test/preparser/testcfg.py index d900e2696f8..88c06a31adc 100644 --- a/test/preparser/testcfg.py +++ b/test/preparser/testcfg.py @@ -98,7 +98,6 @@ def GetExpectations(self): def ParsePythonTestTemplates(self, result, filename, executable, current_path, mode): pathname = join(self.root, filename + ".pyt") - source = open(pathname).read(); def Test(name, source, expectation): throws = None if (expectation is not None): @@ -118,8 +117,7 @@ def MkTest(replacement, expectation): testsource = testsource.replace("$"+key, replacement[key]); Test(testname, testsource, expectation) return MkTest - eval(compile(source, pathname, "exec"), - {"Test": Test, "Template": Template}, {}) + execfile(pathname, {"Test": Test, "Template": Template}) def ListTests(self, current_path, path, mode, variant_flags): executable = 'preparser' @@ -148,7 +146,7 @@ def ListTests(self, current_path, path, mode, variant_flags): filenames.sort() for file in filenames: # Each file as a python source file to be executed in a specially - # perparsed environment (defining the Template and Test functions) + # created environment (defining the Template and Test functions) self.ParsePythonTestTemplates(result, file, executable, current_path, mode) return result diff --git a/tools/gyp/v8.gyp b/tools/gyp/v8.gyp index 2da821398af..c8db67f1027 100644 --- a/tools/gyp/v8.gyp +++ b/tools/gyp/v8.gyp @@ -71,6 +71,13 @@ ], }, }], + ['soname_version!=""', { + # Ideally, we'd like to specify the full filename for the + # library and set it to "libv8.so.<(soname_version)", + # but currently the best we can do is use 'product_name' and + # get "libv8-<(soname_version).so". + 'product_name': 'v8-<(soname_version)', + }], ], }, { @@ -288,11 +295,11 @@ '../../src/disasm.h', '../../src/disassembler.cc', '../../src/disassembler.h', - '../../src/dtoa.cc', - '../../src/dtoa.h', '../../src/diy-fp.cc', '../../src/diy-fp.h', '../../src/double.h', + '../../src/dtoa.cc', + '../../src/dtoa.h', '../../src/elements.cc', '../../src/elements.h', '../../src/execution.cc', @@ -407,10 +414,10 @@ '../../src/runtime-profiler.h', '../../src/safepoint-table.cc', '../../src/safepoint-table.h', - '../../src/scanner-base.cc', - '../../src/scanner-base.h', '../../src/scanner.cc', '../../src/scanner.h', + '../../src/scanner-character-streams.cc', + '../../src/scanner-character-streams.h', '../../src/scopeinfo.cc', '../../src/scopeinfo.h', '../../src/scopes.cc', @@ -643,6 +650,8 @@ ['OS=="win"', { 'sources': [ '../../src/platform-win32.cc', + '../../src/win32-math.cc', + '../../src/win32-math.h', ], 'msvs_disabled_warnings': [4351, 4355, 4800], 'link_settings': { @@ -787,19 +796,61 @@ '../../src', ], 'sources': [ + '../../include/v8-preparser.h', + '../../include/v8stdint.h', '../../src/allocation.cc', + '../../src/allocation.h', '../../src/bignum.cc', + '../../src/bignum.h', + '../../src/bignum-dtoa.cc', + '../../src/bignum-dtoa.h', '../../src/cached-powers.cc', + '../../src/cached-powers.h', + '../../src/char-predicates-inl.h', + '../../src/char-predicates.h', + '../../src/checks.h', + '../../src/conversions-inl.h', '../../src/conversions.cc', + '../../src/conversions.h', + '../../src/diy-fp.cc', + '../../src/diy-fp.h', + '../../src/double.h', + '../../src/dtoa.cc', + '../../src/dtoa.h', + '../../src/fast-dtoa.cc', + '../../src/fast-dtoa.h', + '../../src/fixed-dtoa.cc', + '../../src/fixed-dtoa.h', + '../../src/globals.h', '../../src/hashmap.cc', + '../../src/hashmap.h', + '../../src/list-inl.h', + '../../src/list.h', + '../../src/preparse-data-format.h', '../../src/preparse-data.cc', + '../../src/preparse-data.h', '../../src/preparser.cc', + '../../src/preparser.h', '../../src/preparser-api.cc', - '../../src/scanner-base.cc', + '../../src/scanner.cc', + '../../src/scanner.h', '../../src/strtod.cc', + '../../src/strtod.h', '../../src/token.cc', + '../../src/token.h', + '../../src/unicode-inl.h', '../../src/unicode.cc', + '../../src/unicode.h', + '../../src/utils-inl.h', '../../src/utils.cc', + '../../src/utils.h', + ], + 'conditions': [ + ['OS=="win"', { + 'sources': [ + '../../src/win32-math.cc', + '../../src/win32-math.h', + ]}], ], }, ], diff --git a/tools/presubmit.py b/tools/presubmit.py index 50d2620162f..fda7ba96e59 100755 --- a/tools/presubmit.py +++ b/tools/presubmit.py @@ -311,13 +311,17 @@ def ProcessContents(self, name, contents): def ProcessFiles(self, files, path): success = True + violations = 0 for file in files: try: handle = open(file) contents = handle.read() - success = self.ProcessContents(file, contents) and success + if not self.ProcessContents(file, contents): + success = False + violations += 1 finally: handle.close() + print "Total violating files: %s" % violations return success @@ -333,8 +337,10 @@ def Main(): parser = GetOptions() (options, args) = parser.parse_args() success = True + print "Running C++ lint check..." if not options.no_lint: success = CppLintProcessor().Run(workspace) and success + print "Running copyright header and trailing whitespaces check..." success = SourceProcessor().Run(workspace) and success if success: return 0 diff --git a/tools/push-to-trunk.sh b/tools/push-to-trunk.sh new file mode 100755 index 00000000000..74edbc48fb4 --- /dev/null +++ b/tools/push-to-trunk.sh @@ -0,0 +1,424 @@ +#!/bin/bash +# Copyright 2011 the V8 project authors. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +########## Global variable definitions + +BRANCHNAME=prepare-push +TRUNKBRANCH=trunk-push +TEMP_BRANCH=v8-push-to-trunk-script-temporary-branch +VERSION_FILE="src/version.cc" +PERSISTFILE_BASENAME=/tmp/v8-push-to-trunk-tempfile +CHANGELOG_ENTRY_FILE="$PERSISTFILE_BASENAME-changelog-entry" +PATCH_FILE="$PERSISTFILE_BASENAME-patch" +COMMITMSG_FILE="$PERSISTFILE_BASENAME-commitmsg" +TOUCHED_FILES_FILE="$PERSISTFILE_BASENAME-touched-files" +STEP=0 + + +########## Function definitions + +usage() { +cat << EOF +usage: $0 OPTIONS + +Performs the necessary steps for a V8 push to trunk. Only works for \ +git checkouts. + +OPTIONS: + -h Show this message + -s Specify the step where to start work. Default: 0. + -l Manually specify the git commit ID of the last push to trunk. +EOF +} + +die() { + [[ -n "$1" ]] && echo "Error: $1" + echo "Exiting." + exit 1 +} + +confirm() { + echo -n "$1 [Y/n] " + read ANSWER + if [[ -z "$ANSWER" || "$ANSWER" == "Y" || "$ANSWER" == "y" ]] ; then + return 0 + else + return 1 + fi +} + +delete_branch() { + local MATCH=$(git branch | grep $1) + if [ "$MATCH" == "$1" ] ; then + confirm "Branch $1 exists, do you want to delete it?" + if [ $? -eq 0 ] ; then + git branch -D $1 + echo "Branch $1 deleted." + else + die "Can't continue. Please delete branch $1 and try again." + fi + fi +} + +# Persist and restore variables to support canceling/resuming execution +# of this script. +persist() { + local VARNAME=$1 + local FILE="$PERSISTFILE_BASENAME-$VARNAME" + echo "${!VARNAME}" > $FILE +} + +restore() { + local VARNAME=$1 + local FILE="$PERSISTFILE_BASENAME-$VARNAME" + local VALUE="$(cat $FILE)" + eval "$VARNAME=\"$VALUE\"" +} + +restore_if_unset() { + local VARNAME=$1 + [[ -z "${!VARNAME}" ]] && restore "$VARNAME" + [[ -z "${!VARNAME}" ]] && die "Variable '$VARNAME' could not be restored." +} + + +########## Option parsing + +while getopts ":hs:l:" OPTION ; do + case $OPTION in + h) usage + exit 0 + ;; + s) STEP=$OPTARG + ;; + l) LASTPUSH=$OPTARG + ;; + ?) echo "Illegal option: -$OPTARG" + usage + exit 1 + ;; + esac +done + + +########## Regular workflow + +# Cancel if this is not a git checkout. +[[ -d .git ]] \ + || die "This is not a git checkout, this script won't work for you." + +# Cancel if EDITOR is unset or not executable. +[[ -n "$EDITOR" && -x "$(which $EDITOR)" ]] \ + || die "Please set your EDITOR environment variable, you'll need it." + +if [ $STEP -le 0 ] ; then + echo ">>> Step 0: Preparation" + # Check for a clean workdir. + [[ -z "$(git status -s -uno)" ]] \ + || die "Workspace is not clean. Please commit or undo your changes." + + # Persist current branch. + CURRENT_BRANCH=$(git status -s -b -uno | grep "^##" | awk '{print $2}') + persist "CURRENT_BRANCH" + # Get ahold of a safe temporary branch and check it out. + if [ "$CURRENT_BRANCH" != "$TEMP_BRANCH" ] ; then + delete_branch $TEMP_BRANCH + git checkout -b $TEMP_BRANCH + fi + # Delete branches if they exist. + delete_branch $BRANCHNAME + delete_branch $TRUNKBRANCH +fi + +if [ $STEP -le 1 ] ; then + echo ">>> Step 1: Fetch unfetched revisions." + git svn fetch || die "'git svn fetch' failed." +fi + +if [ $STEP -le 2 ] ; then + echo ">>> Step 2: Create a fresh branch." + git checkout -b $BRANCHNAME svn/bleeding_edge \ + || die "Creating branch $BRANCHNAME failed." +fi + +if [ $STEP -le 3 ] ; then + echo ">>> Step 3: Detect commit ID of last push to trunk." + [[ -n "$LASTPUSH" ]] || LASTPUSH=$(git log -1 --format=%H ChangeLog) + LOOP=1 + while [ $LOOP -eq 1 ] ; do + # Print assumed commit, circumventing git's pager. + git log -1 $LASTPUSH | cat + confirm "Is the commit printed above the last push to trunk?" + if [ $? -eq 0 ] ; then + LOOP=0 + else + LASTPUSH=$(git log -1 --format=%H $LASTPUSH^ ChangeLog) + fi + done + persist "LASTPUSH" +fi + +if [ $STEP -le 4 ] ; then + echo ">>> Step 4: Prepare raw ChangeLog entry." +# These version numbers are used again later for the trunk commit. + MAJOR=$(grep "#define MAJOR_VERSION" "$VERSION_FILE" | awk '{print $NF}') + persist "MAJOR" + MINOR=$(grep "#define MINOR_VERSION" "$VERSION_FILE" | awk '{print $NF}') + persist "MINOR" + BUILD=$(grep "#define BUILD_NUMBER" "$VERSION_FILE" | awk '{print $NF}') + persist "BUILD" + + DATE=$(date +%Y-%m-%d) + persist "DATE" + echo "$DATE: Version $MAJOR.$MINOR.$BUILD" > "$CHANGELOG_ENTRY_FILE" + echo "" >> "$CHANGELOG_ENTRY_FILE" + COMMITS=$(git log $LASTPUSH..HEAD --format=%H) + for commit in $COMMITS ; do + # Get the commit's title line. + git log -1 $commit --format="%w(80,8,8)%s" >> "$CHANGELOG_ENTRY_FILE" + # Grep for "BUG=xxxx" lines in the commit message. + git log -1 $commit --format="%b" | grep BUG= | grep -v "BUG=$" \ + | sed -e 's/^/ /' \ + >> "$CHANGELOG_ENTRY_FILE" + # Append the commit's author for reference. + git log -1 $commit --format="%w(80,8,8)(%an)" >> "$CHANGELOG_ENTRY_FILE" + echo "" >> "$CHANGELOG_ENTRY_FILE" + done +fi + +if [ $STEP -le 5 ] ; then + echo ">>> Step 5: Edit ChangeLog entry." + echo -n "Please press to have your EDITOR open the ChangeLog entry, \ +then edit its contents to your liking. When you're done, save the file and \ +exit your EDITOR. " + read ANSWER + $EDITOR "$CHANGELOG_ENTRY_FILE" + NEWCHANGELOG=$(mktemp) + # Eliminate any trailing newlines by going through a shell variable. + CHANGELOGENTRY=$(cat "$CHANGELOG_ENTRY_FILE") + [[ -n "$CHANGELOGENTRY" ]] || die "Empty ChangeLog entry." + echo "$CHANGELOGENTRY" > "$NEWCHANGELOG" + echo "" >> "$NEWCHANGELOG" # Explicitly insert two empty lines. + echo "" >> "$NEWCHANGELOG" + cat ChangeLog >> "$NEWCHANGELOG" + mv "$NEWCHANGELOG" ChangeLog +fi + +if [ $STEP -le 6 ] ; then + echo ">>> Step 6: Increment version number." + restore_if_unset "BUILD" + NEWBUILD=$(($BUILD + 1)) + confirm "Automatically increment BUILD_NUMBER? (Saying 'n' will fire up \ +your EDITOR on $VERSION_FILE so you can make arbitrary changes. When \ +you're done, save the file and exit your EDITOR.)" + if [ $? -eq 0 ] ; then + sed -e "/#define BUILD_NUMBER/s/[0-9]*$/$NEWBUILD/" \ + -i "$VERSION_FILE" + else + $EDITOR "$VERSION_FILE" + fi + NEWMAJOR=$(grep "#define MAJOR_VERSION" "$VERSION_FILE" | awk '{print $NF}') + persist "NEWMAJOR" + NEWMINOR=$(grep "#define MINOR_VERSION" "$VERSION_FILE" | awk '{print $NF}') + persist "NEWMINOR" + NEWBUILD=$(grep "#define BUILD_NUMBER" "$VERSION_FILE" | awk '{print $NF}') + persist "NEWBUILD" +fi + +if [ $STEP -le 7 ] ; then + echo ">>> Step 7: Commit to local branch." + restore_if_unset "NEWMAJOR" + restore_if_unset "NEWMINOR" + restore_if_unset "NEWBUILD" + git commit -a -m "Prepare push to trunk. \ +Now working on version $NEWMAJOR.$NEWMINOR.$NEWBUILD." \ + || die "'git commit -a' failed." +fi + +if [ $STEP -le 8 ] ; then + echo ">>> Step 8: Upload for code review." + echo -n "Please enter the email address of a V8 reviewer for your patch: " + read REVIEWER + git cl upload -r $REVIEWER --send-mail \ + || die "'git cl upload' failed, please try again." +fi + +if [ $STEP -le 9 ] ; then + echo ">>> Step 9: Commit to the repository." + echo "Please wait for an LGTM, then type \"LGTM\" to commit your \ +change. (If you need to iterate on the patch, do so in another shell.)" + unset ANSWER + while [ "$ANSWER" != "LGTM" ] ; do + [[ -n "$ANSWER" ]] && echo "That was not 'LGTM'." + echo -n "> " + read ANSWER + done + # Re-read the ChangeLog entry (to pick up possible changes). + cat ChangeLog | awk --posix '{ + if ($0 ~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}:/) { + if (in_firstblock == 1) { + exit 0; + } else { + in_firstblock = 1; + } + }; + print $0; + }' > "$CHANGELOG_ENTRY_FILE" + git cl dcommit || die "'git cl dcommit' failed, please try again." +fi + +if [ $STEP -le 10 ] ; then + echo ">>> Step 10: NOP" + # Present in the manual guide, not necessary (even harmful!) for this script. +fi + +if [ $STEP -le 11 ] ; then + echo ">>> Step 11: Squash commits into one." + # Instead of relying on "git rebase -i", we'll just create a diff, because + # that's easier to automate. + git diff svn/trunk > "$PATCH_FILE" + # Convert the ChangeLog entry to commit message format: + # - remove date + # - remove indentation + # - merge paragraphs into single long lines, keeping empty lines between them. + restore_if_unset "DATE" + CHANGELOGENTRY=$(cat "$CHANGELOG_ENTRY_FILE") + echo "$CHANGELOGENTRY" \ + | sed -e "s/^$DATE: //" \ + | sed -e 's/^ *//' \ + | awk '{ + if (need_space == 1) { + printf(" "); + }; + printf("%s", $0); + if ($0 ~ /^$/) { + printf("\n\n"); + need_space = 0; + } else { + need_space = 1; + } + }' > "$COMMITMSG_FILE" || die "Commit message editing failed." + LOOP=1 + while [ $LOOP -eq 1 ] ; do + echo "This is the trunk commit message:" + echo "--------------------" + cat "$COMMITMSG_FILE" + echo -e "\n--------------------" + confirm "Does this look good to you? (Saying 'n' will fire up your \ +EDITOR so you can change the commit message. When you're done, save the \ +file and exit your EDITOR.)" + if [ $? -eq 0 ] ; then + LOOP=0 + else + $EDITOR "$COMMITMSG_FILE" + fi + done + rm -f "$CHANGELOG_ENTRY_FILE" +fi + +if [ $STEP -le 12 ] ; then + echo ">>> Step 12: Create a new branch from trunk." + git checkout -b $TRUNKBRANCH svn/trunk \ + || die "Checking out a new branch '$TRUNKBRANCH' failed." +fi + +if [ $STEP -le 13 ] ; then + echo ">>> Step 13: Apply squashed changes." + patch -p1 < "$PATCH_FILE" | tee >(awk '{print $NF}' >> "$TOUCHED_FILES_FILE") + [[ $? -eq 0 ]] || die "Applying the patch to trunk failed." + TOUCHED_FILES=$(cat "$TOUCHED_FILES_FILE") + for FILE in $TOUCHED_FILES ; do + git add "$FILE" + done + rm -f "$PATCH_FILE" + rm -f "$TOUCHED_FILES_FILE" +fi + +if [ $STEP -le 14 ] ; then + echo ">>> Step 14: Set correct version for trunk." + restore_if_unset "MAJOR" + restore_if_unset "MINOR" + restore_if_unset "BUILD" + sed -e "/#define MAJOR_VERSION/s/[0-9]*$/$MAJOR/" \ + -e "/#define MINOR_VERSION/s/[0-9]*$/$MINOR/" \ + -e "/#define BUILD_NUMBER/s/[0-9]*$/$BUILD/" \ + -e "/#define PATCH_LEVEL/s/[0-9]*$/0/" \ + -e "/#define IS_CANDIDATE_VERSION/s/[0-9]*$/0/" \ + -i "$VERSION_FILE" || die "Patching $VERSION_FILE failed." +fi + +if [ $STEP -le 15 ] ; then + echo ">>> Step 15: Commit to local trunk branch." + git add "$VERSION_FILE" + git commit -F "$COMMITMSG_FILE" || die "'git commit' failed." + rm -f "$COMMITMSG_FILE" +fi + +if [ $STEP -le 16 ] ; then + echo ">>> Step 16: Sanity check." + confirm "Please check if your local checkout is sane: Inspect $VERSION_FILE, \ +compile, run tests. Do you want to commit this new trunk revision to the \ +repository?" + [[ $? -eq 0 ]] || die "Execution canceled." +fi + +if [ $STEP -le 17 ] ; then + echo ">>> Step 17. Commit to SVN." + git svn dcommit || die "'git svn dcommit' failed." +fi + +if [ $STEP -le 18 ] ; then + echo ">>> Step 18: Tag the new revision." + restore_if_unset "MAJOR" + restore_if_unset "MINOR" + restore_if_unset "BUILD" + git svn tag $MAJOR.$MINOR.$BUILD -m "Tagging version $MAJOR.$MINOR.$BUILD" \ + || die "'git svn tag' failed." +fi + +if [ $STEP -le 19 ] ; then + echo ">>> Step 19: Cleanup." + restore_if_unset "CURRENT_BRANCH" + git checkout -f $CURRENT_BRANCH + [[ "$TEMP_BRANCH" != "$CURRENT_BRANCH" ]] && git branch -D $TEMP_BRANCH + [[ "$BRANCHNAME" != "$CURRENT_BRANCH" ]] && git branch -D $BRANCHNAME + [[ "$TRUNKBRANCH" != "$CURRENT_BRANCH" ]] && git branch -D $TRUNKBRANCH +fi + +if [ $STEP -le 20 ] ; then + echo ">>> Step 20: Done!" + restore_if_unset "MAJOR" + restore_if_unset "MINOR" + restore_if_unset "BUILD" + echo "Congratulations, you have successfully created the trunk revision \ +$MAJOR.$MINOR.$BUILD. Please don't forget to update the v8rel spreadsheet, \ +and to roll this new version into Chromium." + # Clean up all temporary files. + rm -f "$PERSISTFILE_BASENAME"* +fi