diff --git a/docs/_tutorials/cache_api.md b/docs/_tutorials/cache_api.md new file mode 100644 index 00000000000..2690dc1639a --- /dev/null +++ b/docs/_tutorials/cache_api.md @@ -0,0 +1,87 @@ +--- +layout: tutorials +permalink: /tutorials/cache-api/ +title: Cache API +--- + +Jekyll includes a caching API, which is used both internally as well as exposed +for plugins, which can be used to cache the output of deterministic functions to +speed up site generation. This cache will be persistent across builds, but +cleared when Jekyll detects any changes to `_config.yml`. + +## Jekyll::Cache.new(name) → new_cache + +If there has already been a cache created with `name`, this will return a +reference to that existing Cache. Otherwise, create a new Cache called `name`. + +If this Cache will be used by a Gem-packaged plugin, `name` should either be the +name of the Gem, or prefixed with the name of the Gem followed by `::` (if a +plugin expects to use multiple Caches). If this Cache will be used internally by +Jekyll, `name` should be the name of the class that is using the Cache (ie: +`"Jekyll::Converters::Markdown"`). + +Cached objects are shared between all Caches created with the same `name`, but +are _not_ shared between Caches with different names. There can be an object +stored with key `1` in `Jekyll::Cache.new("a")` and an object stored with key +`1` in `Jekyll::Cache.new("b")` and these will not point to the same cached +object. This way, you do not need to ensure that keys are globally unique. + +## getset(key) {block} + +This is the most common way to utilize the Cache. + +`block` is a bit of code that takes a lot of time to compute, but always +generates the same output given a particular input (like converting Markdown to +HTML). `key` is a `String` (or an object with `to_s`) that uniquely identifies +the input to the function. + +If `key` already exists in the Cache, it will be returned and `block` will never +be executed. If `key` does not exist in the Cache, `block` will be executed and +the result will be added to the Cache and returned. + +```ruby +def cache + @@cache ||= Jekyll::Cache.new("ConvertMarkdown") +end + +def convert_markdown_to_html(markdown) + cache.getset(markdown) do + expensive_conversion_method(markdown) + end +end +``` + +In the above example, `expensive_conversion_method` will only be called once for +any given `markdown` input. If `convert_markdown_to_html` is called a second +time with the same input, the cached output will be returned. + +Because posts will frequently remain unchanged from one build to the next, this +is an effective way to avoid performing the same computations each time the site +is built. + +## clear + +This will clear all cached objects from a particular Cache. The Cache will be +empty, both in memory and on disk. + + +### The following methods will probably only be used in special circumstances + +## cache[key] → value + +Fetches `key` from Cache and returns its `value`. Raises if `key` does not exist +in Cache. + +## cache[key] = value + +Adds `value` to Cache under `key`. +Returns nothing. + +## key?(key) → true or false + +Returns `true` if `key` already exists in Cache. False otherwise. + +## delete(key) + +Removes `key` from Cache. +Returns nothing. diff --git a/lib/jekyll.rb b/lib/jekyll.rb index 56a9b84f5b2..fe4af08411f 100644 --- a/lib/jekyll.rb +++ b/lib/jekyll.rb @@ -54,6 +54,7 @@ module Jekyll autoload :FrontmatterDefaults, "jekyll/frontmatter_defaults" autoload :Hooks, "jekyll/hooks" autoload :Layout, "jekyll/layout" + autoload :Cache, "jekyll/cache" autoload :CollectionReader, "jekyll/readers/collection_reader" autoload :DataReader, "jekyll/readers/data_reader" autoload :LayoutReader, "jekyll/readers/layout_reader" diff --git a/lib/jekyll/cache.rb b/lib/jekyll/cache.rb new file mode 100644 index 00000000000..ae5138e018d --- /dev/null +++ b/lib/jekyll/cache.rb @@ -0,0 +1,168 @@ +# frozen_string_literal: true + +require "digest" + +module Jekyll + class Cache + # rubocop:disable Style/ClassVars + @@caches = {} + @@disk_cache_enabled = true + + # Get an existing named cache, or create a new one if none exists + # + # name - name of the cache + # + # Returns nothing. + def initialize(name) + @@base_dir ||= File.expand_path(".jekyll-cache/Jekyll/Cache") + @cache = @@caches[name] ||= {} + @name = name.gsub(%r![^\w\s-]!, "-") + end + + # Disable Marshaling cached items to disk + def self.disable_disk_cache! + @@disk_cache_enabled = false + end + # rubocop:enable Style/ClassVars + + # Clear all caches + def self.clear + delete_cache_files + @@caches.each_value(&:clear) + end + + # Clear this particular cache + def clear + delete_cache_files + @cache.clear + end + + # Retrieve a cached item + # Raises if key does not exist in cache + # + # Returns cached value + def [](key) + return @cache[key] if @cache.key?(key) + path = path_to(hash(key)) + if @@disk_cache_enabled && File.file?(path) && File.readable?(path) + @cache[key] = load(path) + else + raise + end + end + + # Add an item to cache + # + # Returns nothing. + def []=(key, value) + @cache[key] = value + return unless @@disk_cache_enabled + path = path_to(hash(key)) + dump(path, value) + end + + # If an item already exists in the cache, retrieve it + # Else execute code block, and add the result to the cache, and return that + # result + def getset(key) + self[key] + rescue StandardError + value = yield + self[key] = value + value + end + + # Remove one particular item from the cache + # + # Returns nothing. + def delete(key) + @cache.delete(key) + return unless @@disk_cache_enabled + path = path_to(hash(key)) + File.delete(path) + end + + # Check if `key` already exists in this cache + # + # Returns true if key exists in the cache, false otherwise + def key?(key) + # First, check if item is already cached in memory + return true if @cache.key?(key) + # Otherwise, it might be cached on disk + # but we should not consider the disk cache if it is disabled + return false unless @@disk_cache_enabled + path = path_to(hash(key)) + File.file?(path) && File.readable?(path) + end + + # Compare the current config to the cached config + # If they are different, clear all caches + # + # Returns nothing. + def self.clear_if_config_changed(config) + config = config.inspect + cache = Jekyll::Cache.new "Jekyll::Cache" + return if cache.key?("config") && cache["config"] == config + clear + cache = Jekyll::Cache.new "Jekyll::Cache" + cache["config"] = config + nil + end + + private + + # Given a hashed key, return the path to where this item would be saved on + # disk + def path_to(hash = nil) + @base_dir ||= File.join(@@base_dir, @name) + return @base_dir if hash.nil? + File.join(@base_dir, hash[0..1], hash[2..-1]).freeze + end + + # Given a key, return a SHA2 hash that can be used for caching this item to + # disk + def hash(key) + Digest::SHA2.hexdigest(key).freeze + end + + # Remove all this caches items from disk + # + # Returns nothing. + def delete_cache_files + FileUtils.rm_rf(path_to) if @@disk_cache_enabled + end + + # Delete all cached items from all caches + # + # Returns nothing. + def self.delete_cache_files + FileUtils.rm_rf(@@base_dir) if @@disk_cache_enabled + end + private_class_method :delete_cache_files + + # Load `path` from disk and return the result + # This MUST NEVER be called in Safe Mode + # rubocop:disable Security/MarshalLoad + def load(path) + raise unless @@disk_cache_enabled + cached_file = File.open(path, "rb") + value = Marshal.load(cached_file) + cached_file.close + value + end + # rubocop:enable Security/MarshalLoad + + # Given a path and a value, save value to disk at path + # This should NEVER be called in Safe Mode + # + # Returns nothing. + def dump(path, value) + return unless @@disk_cache_enabled + dir = File.dirname(path) + FileUtils.mkdir_p(dir) + File.open(path, "wb") do |cached_file| + Marshal.dump(value, cached_file) + end + end + end +end diff --git a/lib/jekyll/site.rb b/lib/jekyll/site.rb index 5c4ac78e83c..530e2b233a0 100644 --- a/lib/jekyll/site.rb +++ b/lib/jekyll/site.rb @@ -51,6 +51,7 @@ def config=(config) # keep using `gems` to avoid breaking change self.gems = config["plugins"] + configure_cache configure_plugins configure_theme configure_include_paths @@ -100,6 +101,7 @@ def reset raise ArgumentError, "limit_posts must be a non-negative number" if limit_posts.negative? + Jekyll::Cache.clear_if_config_changed config Jekyll::Hooks.trigger :site, :after_reset, self end @@ -421,6 +423,11 @@ def site_cleaner @site_cleaner ||= Cleaner.new(self) end + # Disable Marshaling cache to disk in Safe Mode + def configure_cache + Jekyll::Cache.disable_disk_cache! if safe + end + def configure_plugins self.plugin_manager = Jekyll::PluginManager.new(self) self.plugins = plugin_manager.plugins_path diff --git a/lib/site_template/.gitignore b/lib/site_template/.gitignore index bcebd7267eb..f40fbd8ba56 100644 --- a/lib/site_template/.gitignore +++ b/lib/site_template/.gitignore @@ -1,4 +1,5 @@ _site .sass-cache +.jekyll-cache .jekyll-metadata vendor diff --git a/lib/theme_template/gitignore.erb b/lib/theme_template/gitignore.erb index 867d3792d5a..736d7400ff4 100644 --- a/lib/theme_template/gitignore.erb +++ b/lib/theme_template/gitignore.erb @@ -1,5 +1,6 @@ *.gem .bundle +.jekyll-cache .sass-cache _site Gemfile.lock diff --git a/test/test_site.rb b/test/test_site.rb index 63fb63fde66..d0322ebbdb3 100644 --- a/test/test_site.rb +++ b/test/test_site.rb @@ -76,6 +76,9 @@ def read_posts allow(File).to receive(:directory?).with(theme_dir("_sass")).and_return(true) allow(File).to receive(:directory?).with(theme_dir("_layouts")).and_return(true) allow(File).to receive(:directory?).with(theme_dir("_includes")).and_return(false) + allow(File).to receive(:directory?).with( + File.expand_path(".jekyll-cache/Jekyll/Cache/Jekyll--Cache") + ).and_return(true) site = fixture_site("theme" => "test-theme") assert_equal [source_dir("_includes")], site.includes_load_paths end