From 716d07b98469278e9f30e44246976202da667f2e Mon Sep 17 00:00:00 2001 From: Kip Landergren Date: Tue, 18 May 2021 15:37:49 -0700 Subject: [PATCH 1/5] add option `use-git-cache` which makes single `git log ...` call This diff does two things: 1. aligns path_cache behavior in determinator.rb so that both formatted and raw calls to last_modified_time are read from the path_cache 2. adds option `use-git-cache` which improves render performance by reading the entire git log once (instead of 1:1 for each file) --- README.md | 25 +++++++++ lib/jekyll-last-modified-at.rb | 3 - lib/jekyll-last-modified-at/determinator.rb | 48 ++++++++-------- lib/jekyll-last-modified-at/git.rb | 61 +++++++++++++++++++++ lib/jekyll-last-modified-at/hook.rb | 12 +++- 5 files changed, 120 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index bbaa445..f833e8c 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,31 @@ last-modified-at: date-format: '%d-%b-%y' ``` +For sites with lots of documents using `last_modified_at`, there may be render +performance improvement via: + +```yml +plugins: + - jekyll-last-modified-at + +last-modified-at: + use-git-cache: true +``` + +If `use-git-cache` is `false` (the default), every committed file using +`last_modified_at` will generate a separate spawned process to check the git log +for time data. So if you have 10 documents, this will result in 10 spawned calls. + +If `use-git-cache` is `true`, a single spawned process is generated that reads +the entire git log history and caches the time data. This cache is then read +from during the rest of the site generation process. So if you have 10 (or 1000) +documents, this will result in 1 spawned call. The cache is flushed on site +reset, allowing for a long-lived server to correctly reflect `last_modified_at` +of files modified and committed while it has been running. + +Note: there may be performance issues for repositories with very large +histories, in which case the default behavior is likely preferred. + ## Usage There are a few ways to use this gem. diff --git a/lib/jekyll-last-modified-at.rb b/lib/jekyll-last-modified-at.rb index 2482d86..5ce996d 100644 --- a/lib/jekyll-last-modified-at.rb +++ b/lib/jekyll-last-modified-at.rb @@ -9,8 +9,5 @@ module LastModifiedAt autoload :Executor, 'jekyll-last-modified-at/executor' autoload :Determinator, 'jekyll-last-modified-at/determinator' autoload :Git, 'jekyll-last-modified-at/git' - - PATH_CACHE = {} # rubocop:disable Style/MutableConstant - REPO_CACHE = {} # rubocop:disable Style/MutableConstant end end diff --git a/lib/jekyll-last-modified-at/determinator.rb b/lib/jekyll-last-modified-at/determinator.rb index 8c5138b..789e531 100644 --- a/lib/jekyll-last-modified-at/determinator.rb +++ b/lib/jekyll-last-modified-at/determinator.rb @@ -3,49 +3,47 @@ module Jekyll module LastModifiedAt class Determinator - attr_reader :site_source, :page_path + @repo_cache = {} + @path_cache = {} + class << self + # attr_accessor so we can flush externally + attr_accessor :repo_cache + attr_accessor :path_cache + end + + attr_reader :site_source, :page_path, :use_git_cache attr_accessor :format - def initialize(site_source, page_path, format = nil) - @site_source = site_source - @page_path = page_path - @format = format || '%d-%b-%y' + def initialize(site_source, page_path, format = nil, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + @site_source = site_source + @page_path = page_path + @format = format || '%d-%b-%y' + @use_git_cache = use_git_cache end def git - return REPO_CACHE[site_source] unless REPO_CACHE[site_source].nil? + return self.class.repo_cache[site_source] unless self.class.repo_cache[site_source].nil? - REPO_CACHE[site_source] = Git.new(site_source) - REPO_CACHE[site_source] + self.class.repo_cache[site_source] = Git.new(site_source) + self.class.repo_cache[site_source] end def formatted_last_modified_date - return PATH_CACHE[page_path] unless PATH_CACHE[page_path].nil? - - last_modified = last_modified_at_time.strftime(@format) - PATH_CACHE[page_path] = last_modified - last_modified + last_modified_at_time.strftime(@format) end def last_modified_at_time + return self.class.path_cache[page_path] unless self.class.path_cache[page_path].nil? + raise Errno::ENOENT, "#{absolute_path_to_article} does not exist!" unless File.exist? absolute_path_to_article - Time.at(last_modified_at_unix.to_i) + self.class.path_cache[page_path] = Time.at(last_modified_at_unix.to_i) + self.class.path_cache[page_path] end def last_modified_at_unix if git.git_repo? - last_commit_date = Executor.sh( - 'git', - '--git-dir', - git.top_level_directory, - 'log', - '-n', - '1', - '--format="%ct"', - '--', - relative_path_from_git_dir - )[/\d+/] + last_commit_date = git.last_commit_date(relative_path_from_git_dir, use_git_cache) # last_commit_date can be nil iff the file was not committed. last_commit_date.nil? || last_commit_date.empty? ? mtime(absolute_path_to_article) : last_commit_date else diff --git a/lib/jekyll-last-modified-at/git.rb b/lib/jekyll-last-modified-at/git.rb index 22c4f30..c730f6f 100644 --- a/lib/jekyll-last-modified-at/git.rb +++ b/lib/jekyll-last-modified-at/git.rb @@ -8,6 +8,7 @@ class Git def initialize(site_source) @site_source = site_source @is_git_repo = nil + @lcd_cache = {} end def top_level_directory @@ -33,6 +34,66 @@ def git_repo? false end end + + def last_commit_date(path, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + if use_git_cache + build_lcd_cache if @lcd_cache.empty? + @lcd_cache[path] + else + Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '-n', + '1', + '--format="%ct"', + '--', + path + )[/\d+/] + end + end + + private + + # generates hash of `path => unix time stamp (string)` + def build_lcd_cache + # example output: + # + # %jekyll-last-modified-at:1621042992 + # + # Dockerfile.production + # %jekyll-last-modified-at:1621041929 + # + # assets/css/style.52513a5600efd4015668ccb9b702256e.css + # assets/css/style.52513a5600efd4015668ccb9b702256e.css.gz + lines = Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '--name-only', + '--date=unix', + '--pretty=%%jekyll-last-modified-at:%ct' + ) + + lcd = nil + lines.split("\n").each do |line| + next if line.empty? + + if line.start_with?('%jekyll-last-modified-at:') + # new record + lcd = line.split(':')[1] + next + end + + # we already have it + next if @lcd_cache[line] + + # we don't have it + @lcd_cache[line] = lcd + end + end end end end diff --git a/lib/jekyll-last-modified-at/hook.rb b/lib/jekyll-last-modified-at/hook.rb index 127aaee..b5ea024 100644 --- a/lib/jekyll-last-modified-at/hook.rb +++ b/lib/jekyll-last-modified-at/hook.rb @@ -6,11 +6,21 @@ module Hook def self.add_determinator_proc proc { |item| format = item.site.config.dig('last-modified-at', 'date-format') + use_git_cache = item.site.config.dig('last-modified-at', 'use-git-cache') item.data['last_modified_at'] = Determinator.new(item.site.source, item.path, - format) + format, use_git_cache) } end + Jekyll::Hooks.register :site, :after_reset do |site| + use_git_cache = site.config.dig('last-modified-at', 'use-git-cache') + if use_git_cache + # flush the caches so we can detect commits while server is running + Determinator.repo_cache = {} + Determinator.path_cache = {} + end + end + Jekyll::Hooks.register :posts, :post_init, &Hook.add_determinator_proc Jekyll::Hooks.register :pages, :post_init, &Hook.add_determinator_proc Jekyll::Hooks.register :documents, :post_init, &Hook.add_determinator_proc From 58165fe6206c35b8f3daabc3c474fbd02b60f844 Mon Sep 17 00:00:00 2001 From: Kip Landergren Date: Tue, 18 May 2021 17:19:49 -0700 Subject: [PATCH 2/5] use item.relative_path for higher cache affinity `item.path` returns the absolute path to a file, whereas other instantiations of `Determinator`, like in `tag.rb`, use a relative path. by using a relative path in both places we increase the likelihood of cache hits. --- lib/jekyll-last-modified-at/hook.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/jekyll-last-modified-at/hook.rb b/lib/jekyll-last-modified-at/hook.rb index b5ea024..85f0306 100644 --- a/lib/jekyll-last-modified-at/hook.rb +++ b/lib/jekyll-last-modified-at/hook.rb @@ -7,7 +7,7 @@ def self.add_determinator_proc proc { |item| format = item.site.config.dig('last-modified-at', 'date-format') use_git_cache = item.site.config.dig('last-modified-at', 'use-git-cache') - item.data['last_modified_at'] = Determinator.new(item.site.source, item.path, + item.data['last_modified_at'] = Determinator.new(item.site.source, item.relative_path, format, use_git_cache) } end From b4eb87d516edb42ced8af2182bb3b386a7d51693 Mon Sep 17 00:00:00 2001 From: Khemarato Bhikkhu Date: Sun, 14 Nov 2021 14:35:08 +0700 Subject: [PATCH 3/5] Add option for setting page.date automatically with the file creation time --- README.md | 2 + lib/jekyll-last-modified-at/determinator.rb | 60 ++++++++++++++++++--- lib/jekyll-last-modified-at/git.rb | 41 +++++++++----- lib/jekyll-last-modified-at/hook.rb | 15 +++--- 4 files changed, 88 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index f833e8c..55aecb4 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,8 @@ last-modified-at: date-format: '%d-%b-%y' ``` +You can also set `override-date: true` to have "last-modified-at" determine the datetime the file was first commited and use that to automatically set the post's `date` field. Will use `ctime` if there's no git information. + For sites with lots of documents using `last_modified_at`, there may be render performance improvement via: diff --git a/lib/jekyll-last-modified-at/determinator.rb b/lib/jekyll-last-modified-at/determinator.rb index 789e531..3d7075e 100644 --- a/lib/jekyll-last-modified-at/determinator.rb +++ b/lib/jekyll-last-modified-at/determinator.rb @@ -4,21 +4,24 @@ module Jekyll module LastModifiedAt class Determinator @repo_cache = {} - @path_cache = {} + @last_mod_cache = {} + @first_mod_cache = {} class << self # attr_accessor so we can flush externally attr_accessor :repo_cache - attr_accessor :path_cache + attr_accessor :last_mod_cache + attr_accessor :first_mod_cache end attr_reader :site_source, :page_path, :use_git_cache attr_accessor :format - def initialize(site_source, page_path, format = nil, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + def initialize(site_source, page_path, format = nil, use_git_cache = false, first_time = false) # rubocop:disable Style/OptionalBooleanParameter @site_source = site_source @page_path = page_path @format = format || '%d-%b-%y' @use_git_cache = use_git_cache + @first_time = first_time end def git @@ -32,13 +35,34 @@ def formatted_last_modified_date last_modified_at_time.strftime(@format) end + def formatted_first_modified_date + first_modified_at_time.strftime(@format) + end + + def first_modified_at_time + return self.class.first_mod_cache[page_path] unless self.class.first_mod_cache[page_path].nil? + + raise Errno::ENOENT, "#{absolute_path_to_article} does not exist!" unless File.exist? absolute_path_to_article + + self.class.first_mod_cache[page_path] = Time.at(first_modified_at_unix.to_i) + self.class.first_mod_cache[page_path] + end + + def first_modified_at_unix + if git.git_repo? + first_commit_date = git.first_commit_date(relative_path_from_git_dir, use_git_cache) + first_commit_date.nil? || first_commit_date.empty? ? ctime(absolute_path_to_article) : first_commit_date + else + ctime(absolute_path_to_article) + end + end + def last_modified_at_time - return self.class.path_cache[page_path] unless self.class.path_cache[page_path].nil? + return self.class.last_mod_cache[page_path] unless self.class.last_mod_cache[page_path].nil? raise Errno::ENOENT, "#{absolute_path_to_article} does not exist!" unless File.exist? absolute_path_to_article - self.class.path_cache[page_path] = Time.at(last_modified_at_unix.to_i) - self.class.path_cache[page_path] + self.class.last_mod_cache[page_path] = Time.at(last_modified_at_unix.to_i) end def last_modified_at_unix @@ -52,11 +76,27 @@ def last_modified_at_unix end def to_s - @to_s ||= formatted_last_modified_date + if @first_time + @to_s ||= formatted_first_modified_date + else + @to_s ||= formatted_last_modified_date + end end def to_liquid - @to_liquid ||= last_modified_at_time + if @first_time + @to_liquid ||= first_modified_at_time + else + @to_liquid ||= last_modified_at_time + end + end + + def to_time + to_liquid + end + + def strftime(*args) + return to_liquid().strftime(*args) end private @@ -77,6 +117,10 @@ def relative_path_from_git_dir def mtime(file) File.mtime(file).to_i.to_s end + + def ctime(file) + File.ctime(file).to_i.to_s + end end end end diff --git a/lib/jekyll-last-modified-at/git.rb b/lib/jekyll-last-modified-at/git.rb index c730f6f..057a28b 100644 --- a/lib/jekyll-last-modified-at/git.rb +++ b/lib/jekyll-last-modified-at/git.rb @@ -9,6 +9,7 @@ def initialize(site_source) @site_source = site_source @is_git_repo = nil @lcd_cache = {} + @lce_cache = {} end def top_level_directory @@ -37,7 +38,7 @@ def git_repo? def last_commit_date(path, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter if use_git_cache - build_lcd_cache if @lcd_cache.empty? + build_cache if @lcd_cache.empty? @lcd_cache[path] else Executor.sh( @@ -54,16 +55,33 @@ def last_commit_date(path, use_git_cache = false) # rubocop:disable Style/Option end end + def first_commit_date(path, use_git_cache = false) # rubocop:disable Style/OptionalBooleanParameter + if use_git_cache + build_cache if @lce_cache.empty? + @lce_cache[path] + else + Executor.sh( + 'git', + '--git-dir', + top_level_directory, + 'log', + '--format="%ct"', + '--', + path + ).split("\n")[-1][/\d+/] + end + end + private # generates hash of `path => unix time stamp (string)` - def build_lcd_cache + def build_cache # example output: # - # %jekyll-last-modified-at:1621042992 + # %these-files-modified-at:1621042992 # # Dockerfile.production - # %jekyll-last-modified-at:1621041929 + # %these-files-modified-at:1621041929 # # assets/css/style.52513a5600efd4015668ccb9b702256e.css # assets/css/style.52513a5600efd4015668ccb9b702256e.css.gz @@ -74,24 +92,21 @@ def build_lcd_cache 'log', '--name-only', '--date=unix', - '--pretty=%%jekyll-last-modified-at:%ct' + '--pretty=%%these-files-modified-at:%ct' ) - lcd = nil + timestamp = nil lines.split("\n").each do |line| next if line.empty? - if line.start_with?('%jekyll-last-modified-at:') + if line.start_with?('%these-files-modified-at:') # new record - lcd = line.split(':')[1] + timestamp = line.split(':')[1] next end - # we already have it - next if @lcd_cache[line] - - # we don't have it - @lcd_cache[line] = lcd + @lcd_cache[line] = timestamp unless @lcd_cache.key?(line) + @lce_cache[line] = timestamp end end end diff --git a/lib/jekyll-last-modified-at/hook.rb b/lib/jekyll-last-modified-at/hook.rb index 85f0306..849a126 100644 --- a/lib/jekyll-last-modified-at/hook.rb +++ b/lib/jekyll-last-modified-at/hook.rb @@ -9,18 +9,15 @@ def self.add_determinator_proc use_git_cache = item.site.config.dig('last-modified-at', 'use-git-cache') item.data['last_modified_at'] = Determinator.new(item.site.source, item.relative_path, format, use_git_cache) + if item.site.config.dig('last-modified-at', 'override-date') + # The "date" field will be converted to a string first by Jekyll and it must be + # in the format given below: https://jekyllrb.com/docs/variables/#page-variables + item.data['date'] = Determinator.new(item.site.source, item.relative_path, + '%Y-%m-%d %H:%M:%S %z', use_git_cache, true) + end } end - Jekyll::Hooks.register :site, :after_reset do |site| - use_git_cache = site.config.dig('last-modified-at', 'use-git-cache') - if use_git_cache - # flush the caches so we can detect commits while server is running - Determinator.repo_cache = {} - Determinator.path_cache = {} - end - end - Jekyll::Hooks.register :posts, :post_init, &Hook.add_determinator_proc Jekyll::Hooks.register :pages, :post_init, &Hook.add_determinator_proc Jekyll::Hooks.register :documents, :post_init, &Hook.add_determinator_proc From c550c1da3ae38c6e018e369e9e509bb0da04b72c Mon Sep 17 00:00:00 2001 From: Khemarato Bhikkhu Date: Mon, 15 Nov 2021 05:36:03 +0700 Subject: [PATCH 4/5] address some PR feedback --- README.md | 2 +- lib/jekyll-last-modified-at/determinator.rb | 2 -- lib/jekyll-last-modified-at/git.rb | 2 ++ lib/jekyll-last-modified-at/hook.rb | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 55aecb4..d6bd8f5 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ last-modified-at: date-format: '%d-%b-%y' ``` -You can also set `override-date: true` to have "last-modified-at" determine the datetime the file was first commited and use that to automatically set the post's `date` field. Will use `ctime` if there's no git information. +You can also set `set-page-date: true` to have the plugin determine the datetime the file was first commited and use that to automatically set the post's default `date`. This will use the file's `ctime` if there's no git information. For sites with lots of documents using `last_modified_at`, there may be render performance improvement via: diff --git a/lib/jekyll-last-modified-at/determinator.rb b/lib/jekyll-last-modified-at/determinator.rb index 3d7075e..bdbd7a0 100644 --- a/lib/jekyll-last-modified-at/determinator.rb +++ b/lib/jekyll-last-modified-at/determinator.rb @@ -28,7 +28,6 @@ def git return self.class.repo_cache[site_source] unless self.class.repo_cache[site_source].nil? self.class.repo_cache[site_source] = Git.new(site_source) - self.class.repo_cache[site_source] end def formatted_last_modified_date @@ -45,7 +44,6 @@ def first_modified_at_time raise Errno::ENOENT, "#{absolute_path_to_article} does not exist!" unless File.exist? absolute_path_to_article self.class.first_mod_cache[page_path] = Time.at(first_modified_at_unix.to_i) - self.class.first_mod_cache[page_path] end def first_modified_at_unix diff --git a/lib/jekyll-last-modified-at/git.rb b/lib/jekyll-last-modified-at/git.rb index 057a28b..e34f96e 100644 --- a/lib/jekyll-last-modified-at/git.rb +++ b/lib/jekyll-last-modified-at/git.rb @@ -65,6 +65,8 @@ def first_commit_date(path, use_git_cache = false) # rubocop:disable Style/Optio '--git-dir', top_level_directory, 'log', + '--follow', + '--diff-filter=A', '--format="%ct"', '--', path diff --git a/lib/jekyll-last-modified-at/hook.rb b/lib/jekyll-last-modified-at/hook.rb index 849a126..7b898b7 100644 --- a/lib/jekyll-last-modified-at/hook.rb +++ b/lib/jekyll-last-modified-at/hook.rb @@ -9,7 +9,7 @@ def self.add_determinator_proc use_git_cache = item.site.config.dig('last-modified-at', 'use-git-cache') item.data['last_modified_at'] = Determinator.new(item.site.source, item.relative_path, format, use_git_cache) - if item.site.config.dig('last-modified-at', 'override-date') + if item.site.config.dig('last-modified-at', 'set-page-date') # The "date" field will be converted to a string first by Jekyll and it must be # in the format given below: https://jekyllrb.com/docs/variables/#page-variables item.data['date'] = Determinator.new(item.site.source, item.relative_path, From f6fa6213d83920c0b820fc8e62fae67259a111d7 Mon Sep 17 00:00:00 2001 From: Khemarato Bhikkhu Date: Mon, 15 Nov 2021 19:17:51 +0700 Subject: [PATCH 5/5] better README --- README.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index d6bd8f5..a59a0d7 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,6 @@ plugins: last-modified-at: date-format: '%d-%b-%y' ``` - -You can also set `set-page-date: true` to have the plugin determine the datetime the file was first commited and use that to automatically set the post's default `date`. This will use the file's `ctime` if there's no git information. - For sites with lots of documents using `last_modified_at`, there may be render performance improvement via: @@ -84,3 +81,18 @@ To format such a time, you'll need to rely on Liquid's `date` filter: ``` (It's generally [more performant to use the `page.last_modified_at` version](https://github.com/gjtorikian/jekyll-last-modified-at/issues/24#issuecomment-55431108) of this plugin.) + +## `page.date` + +Additionally, you can have this plugin automatically set a default `date` value on every page based on when the file was **first** commited in git. To enable this, set `set-page-date` to `true` in your config yaml: + + ```yml +plugins: + - jekyll-last-modified-at + +last-modified-at: + set-page-date: true +``` + +If a post's date is already set via [the filename](https://jekyllrb.com/docs/posts/#creating-posts) or a page's date is set in its [frontmatter](https://jekyllrb.com/docs/variables/#page-variables), those values will override the value provided by this plugin. If a git date isn't available, `ctime` is used. +