-
-
Notifications
You must be signed in to change notification settings - Fork 6
/
filter-and-transform.sh
executable file
·175 lines (166 loc) · 6.59 KB
/
filter-and-transform.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/env bash
#
# This script transitions a lesson repository to use The Carpentries Workbench
# in two steps:
#
# 1. Use git-filter-repo to create a copy of the lesson and rewrite the git
# history without boilerplate, styling, or generated content
# 2. Use `transform-lesson.R` to transform the lesson structure from Jekyll to
# workbench, moving files and rewriting markdown syntax.
#
# Requirements
#
# 1. A lesson repository in a sub-directory of this lesson (e.g.
# swcarpentry/r-novice-gapminder, included as a submodule)
# 2. An R script associated with the repository (e.g.
# swcarpentry/r-novice-gapminder.R)
# 3. transform-lesson.R
# 4. dependencies.R
# 5. git-filter-repo (included as a submodule in the directory)
# 6. pat.sh to get github personal access token
# 7. git
#
# Usage
#
# filter-and-transform.sh <out> <post> [paths] [callback]
#
# <out> a JSON file to contain a record of the commits generated by
# transform-lesson.R
# <post> an R script that performs post-transformation cleaning in the format
# of <user>/<repo>.R
# [paths] a file that lists the paths that _should not_ be included in the
# workbench repository in the format specified by git-filter-repo:
# <https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#_filtering_based_on_many_paths>
# By default, this is filter-list.txt
# [callback] a message callback that is used to filter commit messages.
# (see https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#CALLBACKS)
# if this is missing it will evaluate the callback in `message-callback.txt`.
# To perform no modification, use `"return message"` in this place
#
# Output
#
# - a transformed lesson in the same path as <out>
# - a json file (<out>) that records the commit hashes and files associated
# with those commit hashes
#
# Example
#
# filter-and-transform.sh \
# sandpaper/carpentries/instructor-training.json \
# carpentries/instructor-training.R
#
# This will create a transformation of carpentries/instructor-training/ in
# sandpaper/carpentries/instructor-training/ and a record of the commits that
# created the file transformations in
# sandpaper/carpentries/instructor-training.json
# for the makefile, the output is a json file, but we want to make it a directory,
# so we are using parameter expansion
# https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html
CWD=$(pwd)
OUT=${1%.*} # No file extension
SCRIPT=${2}
FILTER=${3:-${CWD}/filter-list.txt}
REPO="${SCRIPT%.*}" # Repo is script with no file extension
BASE="$(basename ${REPO})"
GHP="$(./pat.sh)"
# CLEANING ---------------------------------------------------------------------
#
# Move out the site/ directory in case it has previously been built (keeping
# the memory alive)
if [[ -d ${OUT}/site/ ]]; then
mv ${OUT}/site/ ${OUT}../site-${BASE} || echo "" > /dev/null
fi
# removing the directory to make a fresh clone for git-filter-repo
rm -rf ${OUT}
# make a fresh clone of the submodule to the output directory because
# git-filter-repo has safety measure to avoid overwriting a repository that has
# not been freshly cloned.
# <https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#FRESHCLONE>
git clone --no-local .git/modules/${REPO} ${OUT}
# FILTERING --------------------------------------------------------------------
#
# This process will filter out the commits that originated from the styles
# repository and contribute nothing to the lesson content. This includes styling
# files AND boilerplate files like LICENSE.md
BLANK=""
PRODUCTION="${PRODUCTION:-}"
# when the callback is not blank, that means that we are likely using
# this in production and will need to reflect that in the output
if [[ -n ${4} ]]; then
PRODUCTION="true"
fi
CALLBACK=${4:-$(eval echo $(cat ${CWD}/message-callback.txt))}
echo -e "\033[1mConverting \033[38;5;208m${OUT}\033[0;00m...\033[22m"
cd ${OUT}
git-filter-repo \
--prune-empty=always \
--invert-paths \
--paths-from-file ${FILTER} \
--message-callback "${CALLBACK}" \
2>&1 | tee ${CWD}/${OUT}-filter.log
# SETTING THE REMOTE -----------------------------------------------------------
# Update our branch and remote
ORIGIN=https://github.com/fishtree-attempt/${BASE}.git
CURRENT_BRANCH=$(git branch --show-current)
echo -e "\033[1mSetting origin to \033[38;5;208m${ORIGIN}\033[0;00m...\033[22m"
if [[ $(git remote -v) ]]; then
git remote set-url origin ${ORIGIN}
else
git remote add origin ${ORIGIN}
fi
if [[ ${CURRENT_BRANCH} != 'main' ]]; then
echo -e "\033[1mSetting default branch from \033[38;5;208m${CURRENT_BRANCH}\033[0;00m to \033[38;5;208mmain\033[0;00m...\033[22m"
fi
git branch -m main
# Back to our home and move the site back where it belongs
cd ${CWD}
if [[ -d ${OUT}../site-${BASE} ]]; then
mv ${OUT}../site-${BASE} ${OUT}site/ || echo "" > /dev/null
fi
echo -e "... \033[1m\033[38;5;208mdone\033[0;00m\033[22m"
# TRANSFORM --------------------------------------------------------------------
# R Ecology Lesson was not built the same way as other Carpentries lessons, so
# it runs through its own script.
if [[ ${SCRIPT} == 'datacarpentry/R-ecology-lesson.R' ]]; then
PROD="${PRODUCTION}" GITHUB_PAT="${GHP}" Rscript ${SCRIPT} \
--build \
--funs functions.R \
--template template/ \
--output ${OUT} \
${REPO} \
2>&1 | tee ${CWD}/${OUT}-filter.log
else
case ${REPO} in
'librarycarpentry/lc-shell')
# replace bash with .language bash in the new repository until
# https://github.com/LibraryCarpentry/lc-shell/commit/6807b96f674764469047346f435ba74ee44f6617
# is merged.
tmp=$(mktemp -d)
rm -r ${tmp}
cp -r ${REPO} ${tmp}
sed -i -r -e 's/\.bash/\.language-bash/' ${tmp}/{_episodes,_extras,}/*.md
REPO=${tmp}
;;
'datacarpentry/ecology-workshop' | 'datacarpentry/genomics-workshop' | 'datacarpentry/socialsci-workshop' | 'datacarpentry/geospatial-workshop')
# Workshop lessons need an episode seed before running
tmp=$(mktemp -d)
rm -r ${tmp}
cp -r ${REPO} ${tmp}
REPO=${tmp}
mkdir -p ${REPO}/_episodes ${OUT}/episodes/
cp ${CWD}/placeholder-episode.txt ${REPO}/_episodes/placeholder.md
;;
*)
:
;;
esac
PROD="${PRODUCTION}" GITHUB_PAT="${GHP}" Rscript transform-lesson.R \
--build \
--fix-liquid \
--funs functions.R \
--template template/ \
--output ${OUT} \
${REPO} \
${SCRIPT} \
2>&1 | tee ${CWD}/${OUT}-filter.log
fi