From cfc590a43b92102bdfac331776f4c7e7621d7dfb Mon Sep 17 00:00:00 2001 From: patrick brisbin Date: Mon, 21 Mar 2016 17:55:07 -0400 Subject: [PATCH 1/2] Simplify FileList building The existing algorithm was hard to follow and prone to expanding large directories that should be completely ignored. The new algorithm is the simpler one used by most engines. The only difference in this engine is that we also do glob-filtering on a per-language basis. This used to be configurable, but that fact was not advertised and is likely not being used. The constant was renamed accordingly. This should fix #106 --- lib/cc/engine/analyzers/analyzer_base.rb | 3 +- lib/cc/engine/analyzers/engine_config.rb | 4 -- lib/cc/engine/analyzers/file_list.rb | 62 ++++++------------- lib/cc/engine/analyzers/javascript/main.rb | 2 +- lib/cc/engine/analyzers/php/main.rb | 2 +- lib/cc/engine/analyzers/python/main.rb | 2 +- lib/cc/engine/analyzers/ruby/main.rb | 2 +- .../cc/engine/analyzers/engine_config_spec.rb | 30 +-------- spec/cc/engine/analyzers/file_list_spec.rb | 61 ++++-------------- 9 files changed, 35 insertions(+), 133 deletions(-) diff --git a/lib/cc/engine/analyzers/analyzer_base.rb b/lib/cc/engine/analyzers/analyzer_base.rb index 764b3f18..3e23c377 100644 --- a/lib/cc/engine/analyzers/analyzer_base.rb +++ b/lib/cc/engine/analyzers/analyzer_base.rb @@ -62,8 +62,7 @@ def process_file(path) def file_list @_file_list ||= ::CC::Engine::Analyzers::FileList.new( engine_config: engine_config, - default_paths: self.class::DEFAULT_PATHS, - language: self.class::LANGUAGE + patterns: self.class::PATTERNS, ) end end diff --git a/lib/cc/engine/analyzers/engine_config.rb b/lib/cc/engine/analyzers/engine_config.rb index 17d01bac..767e6db0 100644 --- a/lib/cc/engine/analyzers/engine_config.rb +++ b/lib/cc/engine/analyzers/engine_config.rb @@ -26,10 +26,6 @@ def mass_threshold_for(language) end end - def paths_for(language) - fetch_language(language).fetch("paths", nil) - end - private attr_reader :config diff --git a/lib/cc/engine/analyzers/file_list.rb b/lib/cc/engine/analyzers/file_list.rb index 1e0b873b..ae04568c 100644 --- a/lib/cc/engine/analyzers/file_list.rb +++ b/lib/cc/engine/analyzers/file_list.rb @@ -4,64 +4,38 @@ module CC module Engine module Analyzers class FileList - def initialize(engine_config:, default_paths:, language:) + def initialize(engine_config:, patterns:) @engine_config = engine_config - @default_paths = default_paths - @language = language + @patterns = patterns end def files - Array(matching_files) & Array(included_files) - end - - private - - attr_reader :engine_config, :default_paths, :language - - def matching_files - paths.map do |glob| - Dir.glob("./#{glob}").reject do |path| - File.directory?(path) + engine_config.include_paths.flat_map do |path| + if path.end_with?("/") + expand(path) + elsif matches?(path) + [path] + else + [] end - end.flatten + end end - def paths - engine_paths || default_paths - end + private - def engine_paths - @engine_config.paths_for(language) - end + attr_reader :engine_config, :patterns - def included_files - include_paths. - map { |path| make_relative(path) }. - map { |path| collect_files(path) }.flatten.compact - end + def expand(path) + globs = patterns.map { |p| File.join(path, p) } - def collect_files(path) - if File.directory?(path) - Dir.entries(path).map do |new_path| - next if [".", ".."].include?(new_path) - collect_files File.join(path, new_path) - end - else - path - end + Dir.glob(globs) end - def make_relative(path) - if path.match(%r(^\./)) - path - else - "./#{path}" + def matches?(path) + patterns.any? do |p| + File.fnmatch?(File.join("./", p), path, File::FNM_PATHNAME) end end - - def include_paths - engine_config.include_paths - end end end end diff --git a/lib/cc/engine/analyzers/javascript/main.rb b/lib/cc/engine/analyzers/javascript/main.rb index 62be586d..52f254fc 100644 --- a/lib/cc/engine/analyzers/javascript/main.rb +++ b/lib/cc/engine/analyzers/javascript/main.rb @@ -10,7 +10,7 @@ module Engine module Analyzers module Javascript class Main < CC::Engine::Analyzers::Base - DEFAULT_PATHS = [ + PATTERNS = [ "**/*.js", "**/*.jsx" ] diff --git a/lib/cc/engine/analyzers/php/main.rb b/lib/cc/engine/analyzers/php/main.rb index e5bbf3f2..ad2ab8fc 100644 --- a/lib/cc/engine/analyzers/php/main.rb +++ b/lib/cc/engine/analyzers/php/main.rb @@ -9,7 +9,7 @@ module Analyzers module Php class Main < CC::Engine::Analyzers::Base LANGUAGE = "php" - DEFAULT_PATHS = [ + PATTERNS = [ "**/*.php", "**/*.inc", "**/*.module" diff --git a/lib/cc/engine/analyzers/python/main.rb b/lib/cc/engine/analyzers/python/main.rb index 956c8ab6..320ee686 100644 --- a/lib/cc/engine/analyzers/python/main.rb +++ b/lib/cc/engine/analyzers/python/main.rb @@ -11,7 +11,7 @@ module Analyzers module Python class Main < CC::Engine::Analyzers::Base LANGUAGE = "python" - DEFAULT_PATHS = ["**/*.py"] + PATTERNS = ["**/*.py"] DEFAULT_MASS_THRESHOLD = 32 POINTS_PER_OVERAGE = 50_000 diff --git a/lib/cc/engine/analyzers/ruby/main.rb b/lib/cc/engine/analyzers/ruby/main.rb index 36ec8aee..cded642a 100644 --- a/lib/cc/engine/analyzers/ruby/main.rb +++ b/lib/cc/engine/analyzers/ruby/main.rb @@ -9,7 +9,7 @@ module Analyzers module Ruby class Main < CC::Engine::Analyzers::Base LANGUAGE = "ruby" - DEFAULT_PATHS = [ + PATTERNS = [ "**/*.rb", "**/*.rake", "**/Rakefile", diff --git a/spec/cc/engine/analyzers/engine_config_spec.rb b/spec/cc/engine/analyzers/engine_config_spec.rb index 2cf7eed3..8f7acdd8 100644 --- a/spec/cc/engine/analyzers/engine_config_spec.rb +++ b/spec/cc/engine/analyzers/engine_config_spec.rb @@ -47,34 +47,6 @@ end end - describe "#paths_for" do - it "returns paths values for given language" do - engine_config = CC::Engine::Analyzers::EngineConfig.new({ - "config" => { - "languages" => { - "EliXiR" => { - "paths" => ["/", "/etc"], - } - } - } - }) - - expect(engine_config.paths_for("elixir")).to eq(["/", "/etc"]) - end - - it "returns nil if language is an empty key" do - engine_config = CC::Engine::Analyzers::EngineConfig.new({ - "config" => { - "languages" => { - "EliXiR" => "" - } - } - }) - - expect(engine_config.paths_for("elixir")).to be_nil - end - end - describe "mass_threshold_for" do it "returns configured mass threshold as integer" do engine_config = CC::Engine::Analyzers::EngineConfig.new({ @@ -103,7 +75,7 @@ end end - describe "exlude_paths" do + describe "include_paths" do it "returns given include paths" do engine_config = CC::Engine::Analyzers::EngineConfig.new({ "include_paths" => ["/tmp"] diff --git a/spec/cc/engine/analyzers/file_list_spec.rb b/spec/cc/engine/analyzers/file_list_spec.rb index 6e0be03c..781ec6c3 100644 --- a/spec/cc/engine/analyzers/file_list_spec.rb +++ b/spec/cc/engine/analyzers/file_list_spec.rb @@ -21,65 +21,26 @@ end describe "#files" do - it "returns files from default_paths when language is missing paths" do + it "expands patterns for directory includes" do file_list = ::CC::Engine::Analyzers::FileList.new( - engine_config: CC::Engine::Analyzers::EngineConfig.new({}), - default_paths: ["**/*.js", "**/*.jsx"], - language: "javascript", + engine_config: CC::Engine::Analyzers::EngineConfig.new( + "include_paths" => ["./"], + ), + patterns: ["**/*.js", "**/*.jsx"], ) expect(file_list.files).to eq(["./foo.js", "./foo.jsx"]) end - it "returns files from engine config defined paths when present" do + it "filters file includes by patterns" do file_list = ::CC::Engine::Analyzers::FileList.new( - engine_config: CC::Engine::Analyzers::EngineConfig.new({ - "config" => { - "languages" => { - "elixir" => { - "paths" => ["**/*.ex"] - } - } - } - }), - default_paths: ["**/*.js", "**/*.jsx"], - language: "elixir", + engine_config: CC::Engine::Analyzers::EngineConfig.new( + "include_paths" => ["./foo.ex", "./foo.js"], + ), + patterns: ["**/*.js", "**/*.jsx"], ) - expect(file_list.files).to eq(["./foo.ex"]) - end - - it "returns files from default_paths when languages is an array" do - file_list = ::CC::Engine::Analyzers::FileList.new( - engine_config: CC::Engine::Analyzers::EngineConfig.new({ - "config" => { - "languages" => [ - "elixir" - ], - }, - }), - default_paths: ["**/*.js", "**/*.jsx"], - language: "javascript", - ) - - expect(file_list.files).to eq(["./foo.js", "./foo.jsx"]) - end - - it "excludes files not in include_paths" do - file_list = ::CC::Engine::Analyzers::FileList.new( - engine_config: CC::Engine::Analyzers::EngineConfig.new({ - "include_paths" => ["foo.jsx", "nested"], - "config" => { - "languages" => [ - "elixir" - ], - }, - }), - default_paths: ["**/*.js", "**/*.jsx", "**/*.hs"], - language: "javascript", - ) - - expect(file_list.files).to eq(["./foo.jsx", "./nested/nest.hs"]) + expect(file_list.files).to eq(["./foo.js"]) end end end From da386846184565ea584d48fa099732b22cf06f1b Mon Sep 17 00:00:00 2001 From: patrick brisbin Date: Tue, 22 Mar 2016 12:33:36 -0400 Subject: [PATCH 2/2] Adjust file matching and expansion The include_paths may or may not be ./-prefixed (they usually aren't, but there's the notable case of ["./"]). Patterns are never ./-prefixed (they can't be for the purposes of relative expansion). Given this uncertainty, it's possible to drop files if you're not careful in how you match or expand. In addition to this, fingerprints in the wild are all computed with ./-prefixed paths. For all these reasons, we need to normalize the prefixing of paths and patterns in FileList. --- lib/cc/engine/analyzers/file_list.rb | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lib/cc/engine/analyzers/file_list.rb b/lib/cc/engine/analyzers/file_list.rb index ae04568c..9e23b628 100644 --- a/lib/cc/engine/analyzers/file_list.rb +++ b/lib/cc/engine/analyzers/file_list.rb @@ -26,16 +26,25 @@ def files attr_reader :engine_config, :patterns def expand(path) - globs = patterns.map { |p| File.join(path, p) } + globs = patterns.map { |p| File.join(relativize(path), p) } Dir.glob(globs) end def matches?(path) patterns.any? do |p| - File.fnmatch?(File.join("./", p), path, File::FNM_PATHNAME) + File.fnmatch?( + relativize(p), + relativize(path), + File::FNM_PATHNAME | File::FNM_EXTGLOB + ) end end + + # Ensure all paths (and patterns) are ./-prefixed + def relativize(path) + "./#{path.sub(%r{^\./}, "")}" + end end end end