diff --git a/README.md b/README.md index bbaa445..a59a0d7 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,30 @@ plugins: last-modified-at: date-format: '%d-%b-%y' ``` +For sites with lots of documents using `last_modified_at`, there may be render +performance improvement via: + +```yml +plugins: + - jekyll-last-modified-at + +last-modified-at: + use-git-cache: true +``` + +If `use-git-cache` is `false` (the default), every committed file using +`last_modified_at` will generate a separate spawned process to check the git log +for time data. So if you have 10 documents, this will result in 10 spawned calls. + +If `use-git-cache` is `true`, a single spawned process is generated that reads +the entire git log history and caches the time data. This cache is then read +from during the rest of the site generation process. So if you have 10 (or 1000) +documents, this will result in 1 spawned call. The cache is flushed on site +reset, allowing for a long-lived server to correctly reflect `last_modified_at` +of files modified and committed while it has been running. + +Note: there may be performance issues for repositories with very large +histories, in which case the default behavior is likely preferred. ## Usage @@ -57,3 +81,18 @@ To format such a time, you'll need to rely on Liquid's `date` filter: ``` (It's generally [more performant to use the `page.last_modified_at` version](https://github.com/gjtorikian/jekyll-last-modified-at/issues/24#issuecomment-55431108) of this plugin.) + +## `page.date` + +Additionally, you can have this plugin automatically set a default `date` value on every page based on when the file was **first** commited in git. To enable this, set `set-page-date` to `true` in your config yaml: + + ```yml +plugins: + - jekyll-last-modified-at + +last-modified-at: + set-page-date: true +``` + +If a post's date is already set via [the filename](https://jekyllrb.com/docs/posts/#creating-posts) or a page's date is set in its [frontmatter](https://jekyllrb.com/docs/variables/#page-variables), those values will override the value provided by this plugin. If a git date isn't available, `ctime` is used. + diff --git a/lib/jekyll-last-modified-at.rb b/lib/jekyll-last-modified-at.rb index 2482d86..5ce996d 100644 --- a/lib/jekyll-last-modified-at.rb +++ b/lib/jekyll-last-modified-at.rb @@ -9,8 +9,5 @@ module LastModifiedAt autoload :Executor, 'jekyll-last-modified-at/executor' autoload :Determinator, 'jekyll-last-modified-at/determinator' autoload :Git, 'jekyll-last-modified-at/git' - - PATH_CACHE = {} # rubocop:disable Style/MutableConstant - REPO_CACHE = {} # rubocop:disable Style/MutableConstant end end diff --git a/lib/jekyll-last-modified-at/determinator.rb b/lib/jekyll-last-modified-at/determinator.rb index 8c5138b..bdbd7a0 100644 --- a/lib/jekyll-last-modified-at/determinator.rb +++ b/lib/jekyll-last-modified-at/determinator.rb @@ -3,49 +3,69 @@ module Jekyll module LastModifiedAt class Determinator - attr_reader :site_source, :page_path + @repo_cache = {} + @last_mod_cache = {} + @first_mod_cache = {} + class << self + # attr_accessor so we can flush externally + attr_accessor :repo_cache + attr_accessor :last_mod_cache + attr_accessor :first_mod_cache + end + + attr_reader :site_source, :page_path, :use_git_cache attr_accessor :format - def initialize(site_source, page_path, format = nil) - @site_source = site_source - @page_path = page_path - @format = format || '%d-%b-%y' + def initialize(site_source, page_path, format = nil, use_git_cache = false, first_time = false) # rubocop:disable Style/OptionalBooleanParameter + @site_source = site_source + @page_path = page_path + @format = format || '%d-%b-%y' + @use_git_cache = use_git_cache + @first_time = first_time end def git - return REPO_CACHE[site_source] unless REPO_CACHE[site_source].nil? + return self.class.repo_cache[site_source] unless self.class.repo_cache[site_source].nil? - REPO_CACHE[site_source] = Git.new(site_source) - REPO_CACHE[site_source] + self.class.repo_cache[site_source] = Git.new(site_source) end def formatted_last_modified_date - return PATH_CACHE[page_path] unless PATH_CACHE[page_path].nil? + last_modified_at_time.strftime(@format) + end + + def formatted_first_modified_date + first_modified_at_time.strftime(@format) + end + + def first_modified_at_time + return self.class.first_mod_cache[page_path] unless self.class.first_mod_cache[page_path].nil? - last_modified = last_modified_at_time.strftime(@format) - PATH_CACHE[page_path] = last_modified - last_modified + raise Errno::ENOENT, "#{absolute_path_to_article} does not exist!" unless File.exist? absolute_path_to_article + + self.class.first_mod_cache[page_path] = Time.at(first_modified_at_unix.to_i) + end + + def first_modified_at_unix + if git.git_repo? + first_commit_date = git.first_commit_date(relative_path_from_git_dir, use_git_cache) + first_commit_date.nil? || first_commit_date.empty? ? ctime(absolute_path_to_article) : first_commit_date + else + ctime(absolute_path_to_article) + end end def last_modified_at_time + return self.class.last_mod_cache[page_path] unless self.class.last_mod_cache[page_path].nil? + raise Errno::ENOENT, "#{absolute_path_to_article} does not exist!" unless File.exist? absolute_path_to_article - Time.at(last_modified_at_unix.to_i) + self.class.last_mod_cache[page_path] = Time.at(last_modified_at_unix.to_i) end def last_modified_at_unix if git.git_repo? - last_commit_date = Executor.sh( - 'git', - '--git-dir', - git.top_level_directory, - 'log', - '-n', - '1', - '--format="%ct"', - '--', - relative_path_from_git_dir - )[/\d+/] + last_commit_date = git.last_commit_date(relative_path_from_git_dir, use_git_cache) # last_commit_date can be nil iff the file was not committed. last_commit_date.nil? || last_commit_date.empty? ? mtime(absolute_path_to_article) : last_commit_date else @@ -54,11 +74,27 @@ def last_modified_at_unix end def to_s - @to_s ||= formatted_last_modified_date + if @first_time + @to_s ||= formatted_first_modified_date + else + @to_s ||= formatted_last_modified_date + end end def to_liquid - @to_liquid ||= last_modified_at_time + if @first_time + @to_liquid ||= first_modified_at_time + else + @to_liquid ||= last_modified_at_time + end + end + + def to_time + to_liquid + end + + def strftime(*args) + return to_liquid().strftime(*args) end private @@ -79,6 +115,10 @@ def relative_path_from_git_dir def mtime(file) File.mtime(file).to_i.to_s end + + def ctime(file) + File.ctime(file).to_i.to_s + end end end end diff --git a/lib/jekyll-last-modified-at/git.rb b/lib/jekyll-last-modified-at/git.rb index 22c4f30..e34f96e 100644 --- a/lib/jekyll-last-modified-at/git.rb +++ b/lib/jekyll-last-modified-at/git.rb @@ -8,6 +8,8 @@ class Git def initialize(site_source) @site_source = site_source @is_git_repo = nil + @lcd_cache = {} + @lce_cache = {} end def top_level_directory @@ -33,6 +35,82 @@ def git_repo? false end end + + def last_commit_date(path, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + if use_git_cache + build_cache if @lcd_cache.empty? + @lcd_cache[path] + else + Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '-n', + '1', + '--format="%ct"', + '--', + path + )[/\d+/] + end + end + + def first_commit_date(path, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + if use_git_cache + build_cache if @lce_cache.empty? + @lce_cache[path] + else + Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '--follow', + '--diff-filter=A', + '--format="%ct"', + '--', + path + ).split("\n")[-1][/\d+/] + end + end + + private + + # generates hash of `path => unix time stamp (string)` + def build_cache + # example output: + # + # %these-files-modified-at:1621042992 + # + # Dockerfile.production + # %these-files-modified-at:1621041929 + # + # assets/css/style.52513a5600efd4015668ccb9b702256e.css + # assets/css/style.52513a5600efd4015668ccb9b702256e.css.gz + lines = Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '--name-only', + '--date=unix', + '--pretty=%%these-files-modified-at:%ct' + ) + + timestamp = nil + lines.split("\n").each do |line| + next if line.empty? + + if line.start_with?('%these-files-modified-at:') + # new record + timestamp = line.split(':')[1] + next + end + + @lcd_cache[line] = timestamp unless @lcd_cache.key?(line) + @lce_cache[line] = timestamp + end + end end end end diff --git a/lib/jekyll-last-modified-at/hook.rb b/lib/jekyll-last-modified-at/hook.rb index 127aaee..7b898b7 100644 --- a/lib/jekyll-last-modified-at/hook.rb +++ b/lib/jekyll-last-modified-at/hook.rb @@ -6,8 +6,15 @@ module Hook def self.add_determinator_proc proc { |item| format = item.site.config.dig('last-modified-at', 'date-format') - item.data['last_modified_at'] = Determinator.new(item.site.source, item.path, - format) + use_git_cache = item.site.config.dig('last-modified-at', 'use-git-cache') + item.data['last_modified_at'] = Determinator.new(item.site.source, item.relative_path, + format, use_git_cache) + if item.site.config.dig('last-modified-at', 'set-page-date') + # The "date" field will be converted to a string first by Jekyll and it must be + # in the format given below: https://jekyllrb.com/docs/variables/#page-variables + item.data['date'] = Determinator.new(item.site.source, item.relative_path, + '%Y-%m-%d %H:%M:%S %z', use_git_cache, true) + end } end