Skip to content

Commit

Permalink
Incrementally rebuild when a data file is changed (#8771)
Browse files Browse the repository at this point in the history
Merge pull request 8771
  • Loading branch information
ashmaroli committed Sep 29, 2022
1 parent d45fb96 commit 160a681
Show file tree
Hide file tree
Showing 11 changed files with 315 additions and 5 deletions.
53 changes: 53 additions & 0 deletions features/incremental_rebuild.feature
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,59 @@ Feature: Incremental rebuild
And the _site directory should exist
And I should see "Basic Site with include tag: Regenerated by Jekyll" in "_site/index.html"

Scenario: Rebuild when a data file is changed
Given I have a _data directory
And I have a "_data/colors.yml" file that contains "[red, green, blue]"
And I have a _data/members/core directory
And I have a "_data/members/core/emeritus.yml" file with content:
"""
- name: John Doe
role: Admin
"""
And I have an _includes directory
And I have an "_includes/about.html" file with content:
"""
<ul>
{% for entry in site.data.members.core.emeritus %}
<li title="{{ entry.name }} -- {{ entry.role }}">{{ entry.name }}</li>
{% endfor %}
</ul>
"""
And I have a _layouts directory
And I have a page layout that contains "{{ content }}\n\n{% include about.html %}"
And I have a home layout that contains "{{ content }}\n\nGenerated by Jekyll"
And I have a "_layouts/post.html" page with layout "page" that contains "{{ content }}"
And I have a "_layouts/static.html" page with layout "home" that contains "{{ content }}"
And I have an "index.html" page with layout "home" that contains "{{ site.data.colors | join: '_' }}"
And I have an "about.html" page with layout "page" that contains "About Us"
And I have a configuration file with "collections_dir" set to "collections"
And I have a collections/_posts directory
And I have the following post within the "collections" directory:
| title | date | layout | content |
| Table | 2009-03-26 | post | Post with data dependency |
| Wargames | 2009-03-27 | static | Post without data dependency |
When I run jekyll build -IV
Then I should get a zero exit status
And the _site directory should exist
And I should see "red_green_blue" in "_site/index.html"
And I should see "John Doe -- Admin" in "_site/about.html"
And I should see "Rendering: index.html" in the build output
And I should see "Rendering: _posts/2009-03-27-wargames.markdown" in the build output
When I wait 1 second
Then I have a "_data/members/core/emeritus.yml" file with content:
"""
- name: Jane Doe
role: Admin
"""
When I run jekyll build -IV
Then I should get a zero exit status
And the _site directory should exist
And I should see "red_green_blue" in "_site/index.html"
And I should see "Jane Doe -- Admin" in "_site/about.html"
And I should see "Rendering: _posts/2009-03-26-table.markdown" in the build output
But I should not see "Rendering: index.html" in the build output
And I should not see "Rendering: _posts/2009-03-27-wargames.markdown" in the build output

Scenario: Rebuild when a dependency of document in custom collection_dir is changed
Given I have a _includes directory
And I have a configuration file with "collections_dir" set to "collections"
Expand Down
2 changes: 2 additions & 0 deletions lib/jekyll.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ module Jekyll
autoload :Collection, "jekyll/collection"
autoload :Configuration, "jekyll/configuration"
autoload :Convertible, "jekyll/convertible"
autoload :DataEntry, "jekyll/data_entry"
autoload :DataHash, "jekyll/data_hash"
autoload :Deprecator, "jekyll/deprecator"
autoload :Document, "jekyll/document"
autoload :EntryFilter, "jekyll/entry_filter"
Expand Down
83 changes: 83 additions & 0 deletions lib/jekyll/data_entry.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# frozen_string_literal: true

module Jekyll
class DataEntry
attr_accessor :context
attr_reader :data

# Create a Jekyll wrapper for given parsed data object.
#
# site - The current Jekyll::Site instance.
# abs_path - Absolute path to the data source file.
# parsed_data - Parsed representation of data source file contents.
#
# Returns nothing.
def initialize(site, abs_path, parsed_data)
@site = site
@path = abs_path
@data = parsed_data
end

# Liquid representation of current instance is the parsed data object.
#
# Mark as a dependency for regeneration here since every renderable object primarily uses the
# parsed data object while the parent resource is being rendered by Liquid. Accessing the data
# object directly via Ruby interface `#[]()` is outside the scope of regeneration.
#
# FIXME: Marking as dependency on every call is non-ideal. Optimize at later day.
#
# Returns the parsed data object.
def to_liquid
add_regenerator_dependencies if incremental_build?
@data
end

# -- Overrides to maintain backwards compatibility --

# Any missing method will be forwarded to the underlying data object stored in the instance
# variable `@data`.
def method_missing(method, *args, &block)
@data.respond_to?(method) ? @data.send(method, *args, &block) : super
end

def respond_to_missing?(method, *)
@data.respond_to?(method) || super
end

def <=>(other)
data <=> (other.is_a?(self.class) ? other.data : other)
end

def ==(other)
data == (other.is_a?(self.class) ? other.data : other)
end

# Explicitly defined to bypass re-routing from `method_missing` hook for greater performance.
#
# Returns string representation of parsed data object.
def inspect
@data.inspect
end

private

def incremental_build?
@incremental = @site.config["incremental"] if @incremental.nil?
@incremental
end

def add_regenerator_dependencies
page = context.registers[:page]
return unless page&.key?("path")

absolute_path = \
if page["collection"]
@site.in_source_dir(@site.config["collections_dir"], page["path"])
else
@site.in_source_dir(page["path"])
end

@site.regenerator.add_dependency(absolute_path, @path)
end
end
end
61 changes: 61 additions & 0 deletions lib/jekyll/data_hash.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# frozen_string_literal: true

module Jekyll
# A class that behaves very similar to Ruby's `Hash` class yet different in how it is handled by
# Liquid. This class emulates Hash by delegation instead of inheritance to minimize overridden
# methods especially since some Hash methods returns another Hash instance instead of the
# subclass instance.
class DataHash
#
# Delegate given (zero-arity) method(s) to the Hash object stored in instance variable
# `@registry`.
# NOTE: Avoiding the use of `Forwardable` module's `def_delegators` for preventing unnecessary
# creation of interim objects on multiple calls.
def self.delegate_to_registry(*symbols)
symbols.each { |sym| define_method(sym) { @registry.send(sym) } }
end
private_class_method :delegate_to_registry

# -- core instance methods --

attr_accessor :context

def initialize
@registry = {}
end

def [](key)
@registry[key].tap do |value|
value.context = context if value.respond_to?(:context=)
end
end

# `Hash#to_liquid` returns the Hash instance itself.
# Mimic that behavior by returning `self` instead of returning the `@registry` variable value.
def to_liquid
self
end

# -- supplementary instance methods to emulate Hash --

delegate_to_registry :freeze, :inspect

def merge(other, &block)
merged_registry = @registry.merge(other, &block)
dup.tap { |d| d.instance_variable_set(:@registry, merged_registry) }
end

def merge!(other, &block)
@registry.merge!(other, &block)
self
end

def method_missing(method, *args, &block)
@registry.send(method, *args, &block)
end

def respond_to_missing?(method, *)
@registry.respond_to?(method)
end
end
end
7 changes: 6 additions & 1 deletion lib/jekyll/drops/site_drop.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ class SiteDrop < Drop

mutable false

delegate_method_as :site_data, :data
delegate_methods :time, :pages, :static_files, :tags, :categories

private delegate_method_as :config, :fallback_data
Expand All @@ -24,6 +23,12 @@ def key?(key)
(key != "posts" && @obj.collections.key?(key)) || super
end

def data
@obj.site_data.tap do |value|
value.context = @context if value.respond_to?(:context=)
end
end

def posts
@site_posts ||= @obj.posts.docs.sort { |a, b| b <=> a }
end
Expand Down
9 changes: 6 additions & 3 deletions lib/jekyll/readers/data_reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class DataReader

def initialize(site, in_source_dir: nil)
@site = site
@content = {}
@content = DataHash.new
@entry_filter = EntryFilter.new(site)
@in_source_dir = in_source_dir || @site.method(:in_source_dir)
@source_dir = @in_source_dir.call("/")
Expand All @@ -24,6 +24,8 @@ def read(dir)
@content
end

# rubocop:disable Metrics/AbcSize

# Read and parse all .yaml, .yml, .json, .csv and .tsv
# files under <dir> and add them to the <data> variable.
#
Expand All @@ -43,13 +45,14 @@ def read_data_to(dir, data)
next if @entry_filter.symlink?(path)

if File.directory?(path)
read_data_to(path, data[sanitize_filename(entry)] = {})
read_data_to(path, data[sanitize_filename(entry)] = DataHash.new)
else
key = sanitize_filename(File.basename(entry, ".*"))
data[key] = read_data_file(path)
data[key] = DataEntry.new(site, path, read_data_file(path))
end
end
end
# rubocop:enable Metrics/AbcSize

# Determines how to read a data file.
#
Expand Down
9 changes: 8 additions & 1 deletion lib/jekyll/utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,14 @@ def deep_merge_hashes!(target, overwrite)
end

def mergable?(value)
value.is_a?(Hash) || value.is_a?(Drops::Drop)
case value
when Hash, Drops::Drop, DataHash
true
when DataEntry
mergable?(value.data)
else
false
end
end

def duplicable?(obj)
Expand Down
1 change: 1 addition & 0 deletions test/source/_data/boolean.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
true
4 changes: 4 additions & 0 deletions test/source/_data/languages_plus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
- java
- ruby
- rust
- golang
34 changes: 34 additions & 0 deletions test/test_data_entry.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# frozen_string_literal: true

require "helper"

class TestDataEntry < JekyllUnitTest
context "Data Entry" do
setup do
site = fixture_site
site.read
@data_hash = site.site_data
end

should "expose underlying data object es Liquid representation" do
subject = @data_hash["languages"]
assert_equal Jekyll::DataEntry, subject.class
assert_equal subject.data, subject.to_liquid
end

should "respond to `#[](key)` when expected to but raise Exception otherwise" do
greeting = @data_hash["greetings"]
assert greeting["foo"]

boolean = @data_hash["boolean"] # the value is a Boolean.
assert_raises(NoMethodError) { boolean["foo"] }
end

should "compare with another instance of same class using underlying data" do
assert_equal(
[%w(java ruby), %w(java ruby rust golang)],
[@data_hash["languages_plus"], @data_hash["languages"]].sort
)
end
end
end
57 changes: 57 additions & 0 deletions test/test_data_hash.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# frozen_string_literal: true

require "helper"

class TestDataHash < JekyllUnitTest
context "Data Hash" do
setup do
@site = fixture_site
@site.read
end

should "only mimic a ::Hash instance" do
subject = @site.site_data
assert_equal Jekyll::DataHash, subject.class
refute subject.is_a?(Hash)

copy = subject.dup
assert copy["greetings"]["foo"]
assert_includes copy.dig("greetings", "foo"), "Hello!"

copy["greetings"] = "Hola!"
assert_equal "Hola!", copy["greetings"]
refute copy["greetings"]["foo"]

frozen_data_hash = Jekyll::DataHash.new.freeze
assert_raises(FrozenError) { frozen_data_hash["lorem"] = "ipsum" }
end

should "be mergable" do
alpha = Jekyll::DataHash.new
beta = Jekyll::DataHash.new

assert_equal "{}", alpha.inspect
sample_data = { "foo" => "bar" }

assert_equal sample_data["foo"], alpha.merge(sample_data)["foo"]
assert_equal alpha.class, alpha.merge(sample_data).class
assert_empty alpha

beta.merge!(sample_data)
assert_equal sample_data["foo"], alpha.merge(beta)["foo"]
assert_equal alpha.class, alpha.merge(beta).class
assert_empty alpha

beta.merge!(@site.site_data)
assert_equal alpha.class, beta.class
assert_includes beta.dig("greetings", "foo"), "Hello!"

assert_empty alpha
assert_equal sample_data["foo"], Jekyll::Utils.deep_merge_hashes(alpha, sample_data)["foo"]
assert_includes(
Jekyll::Utils.deep_merge_hashes(alpha, beta).dig("greetings", "foo"),
"Hello!"
)
end
end
end

0 comments on commit 160a681

Please sign in to comment.