From 4a86e3704c4a29104d4c461234e1948c40f9fa34 Mon Sep 17 00:00:00 2001
From: Boruch Baum
Date: Thu, 2 Sep 2021 16:05:00 -0400
Subject: [PATCH] w3m-filter: new and improved filter for github
+ covers many/most pages of the site
+ prunes out cruft and artifacts associated with javascript page
functions that won't work in emacs-w3m.
---
ChangeLog | 6 ++
w3m-filter.el | 213 ++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 185 insertions(+), 34 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 6f153852..c171c3bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2021-09-02 Boruch Baum
+
+ * w3m-filter.el (w3m-filter-github-repo-main-page): Delete obsolete
+ function.
+ (w3m-filter-github, (w3m-filter-configuration): Replace with new one.
+
2021-08-17 Katsumi Yamaoka
Add timeout to w3m-download
diff --git a/w3m-filter.el b/w3m-filter.el
index a92847bf..47a7de22 100644
--- a/w3m-filter.el
+++ b/w3m-filter.el
@@ -124,8 +124,8 @@
"A filter for Wikipedia"
"\\`http://.*\\.wikipedia\\.org/" w3m-filter-wikipedia)
(t "filter for github.com repository main page"
- "\\`http[s]?://github\\.com/[^/]+/[^/]+[/]?\\'"
- w3m-filter-github-repo-main-page)
+ "\\`http[s]?://github\\.com"
+ w3m-filter-github)
(t "xkcd filter" "\\`http[s]?://xkcd.com/" w3m-filter-xkcd)
(nil
("Remove inline frames in all pages"
@@ -1062,38 +1062,183 @@ READ MORE:\\([^<]+\\)\\(\\)?\\(
\\)?"
(while (re-search-forward "\n[\n\t ]+" nil t)
(replace-match "")))
-(defun w3m-filter-github-repo-main-page (url)
- "filter distractions for the main page of a github repository."
- (w3m-filter-delete-regions
- url
- "" nil t nil nil nil 1)
- (w3m-filter-delete-regions
- url
- "
" nil t nil nil nil 1)
- (insert "
")
- ;; NOTE: There is inconsistency in some pages. Some have DIV element
- ;; 'repository-topics-container', while others have 'overall-summary'.
- (w3m-filter-delete-regions
- url
- "
" nil t nil nil nil 1)
- (goto-char (point-min))
- (search-forward "
")
- (search-forward "
" nil t)
- (goto-char (match-beginning 0))
- (insert "
")
- (search-forward "" nil t)
- (insert "
")
- )
+(defun w3m-filter-github (url)
+ "filter for github.com."
+ (let (p0 p1)
+ (cond
+ ;; condition: home page
+ ((string-match "github.com/$" url)
+ (goto-char (point-min))
+ ;; improve readability of search form
+ (when (search-forward "
Search GitHub : \\&"))
+ ;; rm other search cruft
+ (w3m-filter-delete-regions url "
")
+ ;; rm broken sign-up
+ (and (search-forward "Email address" nil t)
+ (setq p0 (search-backward "" nil t 2)
+ (delete-region p0 (match-end 0)))
+ ;; rm line numbers in sample code
+ (w3m-filter-delete-regions url "
" nil nil nil nil nil 1)
+ ;; rm duplicate text lines
+ (w3m-filter-delete-regions url ""))
+ ;; condition: repository main page
+ ((string-match "github.com/[^/]+/[^/?&]+$" url)
+ ;; can't perform star or notification w/o JS, so delete
+ (w3m-filter-delete-regions url "" "" t nil nil nil nil 1)
+ (when (search-forward "forks" nil t)
+ (w3m-filter-delete-regions url "" nil nil t p0 p1))
+ ;; rm broken "switching branches/tags" via form
+ (goto-char (setq p1 (point-min)))
+ (and (search-forward "title=\"Switch branches or tags\"" nil t)
+ (search-backward "
" nil t)
+ (search-forward "
" nil t)
+ (delete-region p1 (point)))
+ (goto-char p1)
+ ;; rm 'launching github desktop' etc.
+ (and (search-forward "
" nil t)
+ (setq p1 (match-beginning 0))
+ (search-forward "Git stats" nil t)
+ (delete-region p1 (point)))
+ (goto-char p1)
+ (when (search-forward "Files " nil t)
+ (replace-match ""))
+ (goto-char (point-min))
+ (when (search-forward "Permalink" nil t)
+ (insert "Files
")
+ (setq p1 (point))
+ (and (search-forward "Commit time " nil t)
+ (search-forward " " nil t)
+ (delete-region p1 (point))))
+ ;; delimit end of files with
+ (goto-char (point-min))
+ (when (and (search-forward "" nil t)
+ (setq p1 (match-beginning 0))
+ (search-backward "")))
+ ;; condition: user-account main page
+ ((string-match "github.com/[^/]+$" url)
+ ;; rm second of duplicate account heading
+ (goto-char (point-min))
+ (and (search-forward "" nil t)
+ (search-forward "" nil t 2)
+ (delete-region p0 (match-end 0)))
+ ;; rm duplicate achievements
+ (dolist (elem
+ '(">Achievements"
+ "