From 4b9a1c297b5851a64febb423c74fd6508a711648 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 23 Nov 2011 00:32:14 -0800 Subject: [PATCH 1/8] bail fast on file extensions we recognize as binary --- lib/linguist/blob_helper.rb | 9 +++++++++ lib/linguist/repository.rb | 3 +++ 2 files changed, 12 insertions(+) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 64497ca53c..eb052c3d70 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -90,6 +90,15 @@ def detect_encoding @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data end + # Public: Is the blob binary according to its mime type + # + # Return true or false + def binary_mime_type? + if mime_type = Mime.lookup_mime_type_for(pathname.extname) + mime_type.binary? + end + end + # Public: Is the blob binary? # # Return true or false diff --git a/lib/linguist/repository.rb b/lib/linguist/repository.rb index 3341f492ac..a91b99296c 100644 --- a/lib/linguist/repository.rb +++ b/lib/linguist/repository.rb @@ -67,6 +67,9 @@ def compute_stats return if @computed_stats @enum.each do |blob| + # Skip binary file extensions + next if blob.binary_mime_type? + # Skip vendored or generated blobs next if blob.vendored? || blob.generated? || blob.language.nil? From bb4840ca3e9ccac76ee8cd76ac16bfb92deee77b Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 23 Nov 2011 00:49:11 -0800 Subject: [PATCH 2/8] viewable? should not load or binary check large blobs --- lib/linguist/blob_helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index eb052c3d70..2d4c54ea8c 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -150,7 +150,7 @@ def large? # # Return true or false def viewable? - text? && !large? + !large? && text? end vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__)) From 21488c84c360aa8adebf6538ab50dd7eaa6c09b7 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 23 Nov 2011 00:49:52 -0800 Subject: [PATCH 3/8] delay calls to binary? in guess_language until viewable?, to avoid loading blobs for well known file extensions --- lib/linguist/blob_helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 2d4c54ea8c..37d8597dc9 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -368,7 +368,7 @@ def language # # Returns a Language or nil def guess_language - return if binary? + return if binary_mime_type? # Disambiguate between multiple language extensions disambiguate_extension_language || From 9994ac3a0c50926c4f999e0e4bf985ca5754217f Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 23 Nov 2011 00:50:44 -0800 Subject: [PATCH 4/8] only load blobs to check for Date: Wed, 23 Nov 2011 00:51:33 -0800 Subject: [PATCH 5/8] avoid load_blob and shebang check on files with extensions previously, any file with an unrecognized file extension was loaded to check for a shebang. now, this only occurs if the file has a generic name with no file extension (like ./script) it is possible this will no longer match certain scripts with esoteric extensions (if we find these we can add them to the shebang_extname? method). however, most common script extensions (.sh, .rb, .pl, etc) will continue to work since the file extension takes precedence over the shebang line. --- lib/linguist/blob_helper.rb | 10 ++++++ test/test_blob.rb | 68 ++++++++++++++++++++----------------- 2 files changed, 47 insertions(+), 31 deletions(-) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index 5820736a68..c164d768e6 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -142,6 +142,13 @@ def drupal_extname? ['.module', '.install', '.test', '.inc'].include?(extname) end + # Public: Is the blob likely to have a shebang? + # + # Return true or false + def shebang_extname? + extname.empty? + end + MEGABYTE = 1024 * 1024 # Public: Is the blob too big to load? @@ -592,6 +599,9 @@ def shebang_script # # Returns the Language or nil def shebang_language + # Skip file extensions unlikely to have shebangs + return unless shebang_extname? + if script = shebang_script Language[script] end diff --git a/test/test_blob.rb b/test/test_blob.rb index 5d00078cae..ac659c794a 100644 --- a/test/test_blob.rb +++ b/test/test_blob.rb @@ -17,6 +17,12 @@ def blob(name) FileBlob.new(File.join(fixtures_path, name), fixtures_path) end + def script_blob(name) + blob = blob(name) + blob.instance_variable_set(:@name, 'script') + blob + end + def test_name assert_equal "foo.rb", blob("foo.rb").name end @@ -385,40 +391,40 @@ def test_lexer end def test_shebang_script - assert_equal 'sh', blob("script.sh").shebang_script - assert_equal 'bash', blob("script.bash").shebang_script - assert_equal 'zsh', blob("script.zsh").shebang_script - assert_equal 'perl', blob("script.pl").shebang_script - assert_equal 'ruby', blob("script.rb").shebang_script - assert_equal 'ruby', blob("script2.rb").shebang_script - assert_equal 'python', blob("script.py").shebang_script - assert_equal 'node', blob("script.js").shebang_script - assert_equal 'groovy', blob("script.groovy").shebang_script - assert_equal 'macruby', blob("script.mrb").shebang_script - assert_equal 'rake', blob("script.rake").shebang_script - assert_equal 'foo', blob("script.foo").shebang_script - assert_equal 'nush', blob("script.nu").shebang_script - assert_equal 'scala', blob("script.scala").shebang_script - assert_equal 'racket', blob("script.rkt").shebang_script - assert_equal nil, blob("foo.rb").shebang_script + assert_equal 'sh', script_blob("script.sh").shebang_script + assert_equal 'bash', script_blob("script.bash").shebang_script + assert_equal 'zsh', script_blob("script.zsh").shebang_script + assert_equal 'perl', script_blob("script.pl").shebang_script + assert_equal 'ruby', script_blob("script.rb").shebang_script + assert_equal 'ruby', script_blob("script2.rb").shebang_script + assert_equal 'python', script_blob("script.py").shebang_script + assert_equal 'node', script_blob("script.js").shebang_script + assert_equal 'groovy', script_blob("script.groovy").shebang_script + assert_equal 'macruby', script_blob("script.mrb").shebang_script + assert_equal 'rake', script_blob("script.rake").shebang_script + assert_equal 'foo', script_blob("script.foo").shebang_script + assert_equal 'nush', script_blob("script.nu").shebang_script + assert_equal 'scala', script_blob("script.scala").shebang_script + assert_equal 'racket', script_blob("script.rkt").shebang_script + assert_equal nil, script_blob("foo.rb").shebang_script end def test_shebang_language - assert_equal Language['Shell'], blob("script.sh").shebang_language - assert_equal Language['Shell'], blob("script.bash").shebang_language - assert_equal Language['Shell'], blob("script.zsh").shebang_language - assert_equal Language['Perl'], blob("script.pl").shebang_language - assert_equal Language['Ruby'], blob("script.rb").shebang_language - assert_equal Language['Python'], blob("script.py").shebang_language - assert_equal Language['JavaScript'], blob("script.js").shebang_language - assert_equal Language['Groovy'], blob("script.groovy").shebang_language - assert_equal Language['Ruby'], blob("script.mrb").shebang_language - assert_equal Language['Ruby'], blob("script.rake").shebang_language - assert_equal Language['Nu'], blob("script.nu").shebang_language - assert_equal Language['Scala'], blob("script.scala").shebang_language - assert_equal Language['Racket'], blob("script.rkt").shebang_language - assert_equal nil, blob("script.foo").shebang_language - assert_equal nil, blob("foo.rb").shebang_language + assert_equal Language['Shell'], script_blob("script.sh").shebang_language + assert_equal Language['Shell'], script_blob("script.bash").shebang_language + assert_equal Language['Shell'], script_blob("script.zsh").shebang_language + assert_equal Language['Perl'], script_blob("script.pl").shebang_language + assert_equal Language['Ruby'], script_blob("script.rb").shebang_language + assert_equal Language['Python'], script_blob("script.py").shebang_language + assert_equal Language['JavaScript'], script_blob("script.js").shebang_language + assert_equal Language['Groovy'], script_blob("script.groovy").shebang_language + assert_equal Language['Ruby'], script_blob("script.mrb").shebang_language + assert_equal Language['Ruby'], script_blob("script.rake").shebang_language + assert_equal Language['Nu'], script_blob("script.nu").shebang_language + assert_equal Language['Scala'], script_blob("script.scala").shebang_language + assert_equal Language['Racket'], script_blob("script.rkt").shebang_language + assert_equal nil, script_blob("script.foo").shebang_language + assert_equal nil, script_blob("foo.rb").shebang_language end def test_colorize From d2118d1471f94a7d7b343b262b11f247b9778976 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 23 Nov 2011 01:37:01 -0800 Subject: [PATCH 6/8] only check for shebangs in executable files --- lib/linguist/blob_helper.rb | 3 ++- lib/linguist/file_blob.rb | 7 +++++++ test/fixtures/script.bash | 0 test/fixtures/script.foo | 0 test/fixtures/script.groovy | 0 test/fixtures/script.js | 0 test/fixtures/script.mrb | 0 test/fixtures/script.nu | 0 test/fixtures/script.pl | 0 test/fixtures/script.py | 0 test/fixtures/script.rake | 0 test/fixtures/script.rb | 0 test/fixtures/script.rkt | 0 test/fixtures/script.scala | 0 test/fixtures/script.sh | 0 test/fixtures/script.zsh | 0 16 files changed, 9 insertions(+), 1 deletion(-) mode change 100644 => 100755 test/fixtures/script.bash mode change 100644 => 100755 test/fixtures/script.foo mode change 100644 => 100755 test/fixtures/script.groovy mode change 100644 => 100755 test/fixtures/script.js mode change 100644 => 100755 test/fixtures/script.mrb mode change 100644 => 100755 test/fixtures/script.nu mode change 100644 => 100755 test/fixtures/script.pl mode change 100644 => 100755 test/fixtures/script.py mode change 100644 => 100755 test/fixtures/script.rake mode change 100644 => 100755 test/fixtures/script.rb mode change 100644 => 100755 test/fixtures/script.rkt mode change 100644 => 100755 test/fixtures/script.scala mode change 100644 => 100755 test/fixtures/script.sh mode change 100644 => 100755 test/fixtures/script.zsh diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index c164d768e6..d7c70b4fd2 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -146,7 +146,8 @@ def drupal_extname? # # Return true or false def shebang_extname? - extname.empty? + extname.empty? && + (mode.to_i(8) & 05) == 05 end MEGABYTE = 1024 * 1024 diff --git a/lib/linguist/file_blob.rb b/lib/linguist/file_blob.rb index 7dbecb4203..7e7f1acda1 100644 --- a/lib/linguist/file_blob.rb +++ b/lib/linguist/file_blob.rb @@ -32,6 +32,13 @@ def initialize(path, base_path = nil) # Returns a String attr_reader :name + # Public: Read file permissions + # + # Returns a String like '100644' + def mode + File.stat(@path).mode.to_s(8) + end + # Public: Read file contents. # # Returns a String. diff --git a/test/fixtures/script.bash b/test/fixtures/script.bash old mode 100644 new mode 100755 diff --git a/test/fixtures/script.foo b/test/fixtures/script.foo old mode 100644 new mode 100755 diff --git a/test/fixtures/script.groovy b/test/fixtures/script.groovy old mode 100644 new mode 100755 diff --git a/test/fixtures/script.js b/test/fixtures/script.js old mode 100644 new mode 100755 diff --git a/test/fixtures/script.mrb b/test/fixtures/script.mrb old mode 100644 new mode 100755 diff --git a/test/fixtures/script.nu b/test/fixtures/script.nu old mode 100644 new mode 100755 diff --git a/test/fixtures/script.pl b/test/fixtures/script.pl old mode 100644 new mode 100755 diff --git a/test/fixtures/script.py b/test/fixtures/script.py old mode 100644 new mode 100755 diff --git a/test/fixtures/script.rake b/test/fixtures/script.rake old mode 100644 new mode 100755 diff --git a/test/fixtures/script.rb b/test/fixtures/script.rb old mode 100644 new mode 100755 diff --git a/test/fixtures/script.rkt b/test/fixtures/script.rkt old mode 100644 new mode 100755 diff --git a/test/fixtures/script.scala b/test/fixtures/script.scala old mode 100644 new mode 100755 diff --git a/test/fixtures/script.sh b/test/fixtures/script.sh old mode 100644 new mode 100755 diff --git a/test/fixtures/script.zsh b/test/fixtures/script.zsh old mode 100644 new mode 100755 From 7cdee48bab5ebc09d67becd27d417085a89911d3 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 23 Nov 2011 03:00:18 -0800 Subject: [PATCH 7/8] handle missing blob mode --- lib/linguist/blob_helper.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/linguist/blob_helper.rb b/lib/linguist/blob_helper.rb index d7c70b4fd2..0cec2813dd 100644 --- a/lib/linguist/blob_helper.rb +++ b/lib/linguist/blob_helper.rb @@ -147,6 +147,7 @@ def drupal_extname? # Return true or false def shebang_extname? extname.empty? && + mode && (mode.to_i(8) & 05) == 05 end From 0180a1ee2f5a4c61407c4d040483aece19be3230 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Wed, 23 Nov 2011 12:35:09 -0800 Subject: [PATCH 8/8] skip vendored MathJax files --- lib/linguist/vendor.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/linguist/vendor.yml b/lib/linguist/vendor.yml index 03f229e84c..cd5a551dbc 100644 --- a/lib/linguist/vendor.yml +++ b/lib/linguist/vendor.yml @@ -57,6 +57,8 @@ - (^|/)ckeditor\.js$ - (^|/)tiny_mce([^.]*)\.js$ +# MathJax +- (^|/)MathJax/ ## Python ##