Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 280 lines (245 sloc) 9.155 kB
54e85fa @jgm initial commit
authored
1 #!/usr/bin/perl -w
2 #
f0bf547 @jgm Bug fixes due to Dan Robins
authored
3 # sep-offprint 0.7 - John MacFarlane - January 23, 2007
4 # Incorporated several bug fixes due to Dan Robins:
5 # + fixed script to include supplements, if present
6 # + hyperlinks now marked in blue
7 # + removed unnecessary call to lwp-rget
8 #
e916cb0 @jgm changed version to 0.6
authored
9 # sep-offprint 0.6 - John MacFarlane - August 30, 2006
fc5066a @jgm version bump
authored
10 # sep-offprint 0.5 - John MacFarlane - August 25, 2006
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
11 # sep-offprint 0.4 - John MacFarlane - August 22, 2006
54e85fa @jgm initial commit
authored
12 # sep-offprint 0.3 - John MacFarlane - May 25, 2005
13 #
14 # produces a PDF or postscript "offprint" of a Stanford
15 # Encyclopedia of Philosophy (SEP) article
16 #
17 # Argument is an entry name from SEP, as it appears in the URL.
18 # For example, to get the article on classical logic, which is at
19 # http://plato.stanford.edu/entries/logic-classical/, just type
20 #
21 # perl sep-offprint logic-classical
22 #
f0bf547 @jgm Bug fixes due to Dan Robins
authored
23 # and it will create logic-classical.pdf.
54e85fa @jgm initial commit
authored
24 #
25 # There are many command-line options. For a list, type
26 #
f0bf547 @jgm Bug fixes due to Dan Robins
authored
27 # perl sep-offprint --help
54e85fa @jgm initial commit
authored
28 #
b294df2 @jgm use lwp-rget instead of wget
authored
29 # The programs html2ps and ps2pdf must be in the user's path:
54e85fa @jgm initial commit
authored
30 #
31 # html2ps can be found at http://user.it.uu.se/~jan/html2ps.html.
32 # Download the tarball or zip file and run the "install" script.
33 #
34 # ps2pdf is part of Ghostscript -- many users will have it
35 # already: http://www.cs.wisc.edu/~ghost/doc/AFPL/get851.htm
36 #
b294df2 @jgm use lwp-rget instead of wget
authored
37 # In addition, the LWP package for Perl must be installed.
38 #
54e85fa @jgm initial commit
authored
39 # For more information and updates, see
40 # http://philosophy.berkeley.edu/macfarlane/sep-offprint.html
41
42 use Getopt::Long;
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
43 use File::Temp qw/ tempdir /;
4e7bee9 @jgm use File::Copy instead of cp; other minor improvements
authored
44 use File::Copy;
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
45 use Cwd;
54e85fa @jgm initial commit
authored
46
47 sub printhelp {
48 die
49 "Produces a PDF offprint from a Stanford Encyclopedia of Philosophy article.
50 (http://plato.stanford.edu/)
51
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
52 Usage: sep-offprint [options] <entry name>
54e85fa @jgm initial commit
authored
53
54 Examples: sep-offprint russell
55 sep-offprint frege russell wittgenstein
56 sep-offprint --1up --ps --paper a4 frege
57 sep-offprint --1up --fontfamily Helvetica frege
58
59 Options:
60
61 --1up print one page per sheet, portrait orientation
62 --2up print two pages per sheet, landscape orientation (default)
63 --ps produce postscript (PS) output
64 --pdf produce PDF output (default)
65 --font <font> use <font> (Times, Helvetica, Palatino, Courier) (default Times)
66 --size <size> use <size> (10pt, 12pt, 14pt, 16pt) (default 14pt)
67 --align <align> use <align> (left, justified) (default justified)
68 --paper <papersize> specify <papersize> (letter, legal, a4) (default letter)
69 --localpath <path> look for entry in a subdirectory of <path> on local filesystem
70 --help this message
71 --version prints version number\n";
72 }
73
74 GetOptions( '1up|1' => \$oneup,
75 '2up|2' => \$twoup,
76 'ps' => \$ps,
77 'pdf' => \$pdf,
78 'font=s' => \$fontfamily,
79 'size=s' => \$fontsize,
80 'align=s' => \$textalign,
81 'paper=s' => \$papersize,
82 'localpath=s' => \$localpath,
83 'help|h' => \$help,
84 'version|v' => \$version);
85
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
86 if ($#ARGV < 0) {&printhelp;};
87 $entryname = $ARGV[0];
88
89 # remove uppercase and spaces:
90 $entryname =~ tr/A-Z/a-z/;
91 $entryname =~ tr/ /-/;
92
4e7bee9 @jgm use File::Copy instead of cp; other minor improvements
authored
93 # remove SEP url if specified:
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
94 $entryname =~ s{http://plato.stanford.edu/entries/}{};
95 # remove /index.html if specified:
96 $entryname =~ s{/.*}{};
97
54e85fa @jgm initial commit
authored
98 if ($help) {&printhelp;};
8255b29 @jgm fixed version number
authored
99 if ($version) {die "sep-offprint ver. 0.6\n";};
54e85fa @jgm initial commit
authored
100 if (not ($pdf or $ps)) {$pdf=1};
101 if ($oneup) {$twoup = 0} else {$twoup = 1};
102 if (not $fontsize) {$fontsize = "14pt"};
103 if (not $fontfamily) {$fontfamily = "Times"};
104 if (not $textalign) {$textalign = "justify"};
105 if (not $papersize) {$papersize = "letter"};
106
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
107 $temp = tempdir ( CLEANUP => 1 );
108
b2fde1b @jgm cleanup on sep-offprint
authored
109 $current = getcwd;
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
110
111 # Get all the source files and put them in temp directory $temp
112
113 if ($localpath) {
6c7a3cf @jgm spacing changes
authored
114 $footer = "$localpath/$entryname/";
b8298ba @jgm added support for more entities, fixed copy bug
authored
115 while (<$localpath/$entryname/*.*>) {
f0bf547 @jgm Bug fixes due to Dan Robins
authored
116 copy($_,$temp)
b8298ba @jgm added support for more entities, fixed copy bug
authored
117 };
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
118 chdir $temp;
119 }
120 else {
6c7a3cf @jgm spacing changes
authored
121 $footer = "http://plato.stanford.edu/entries/$entryname/";
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
122 chdir $temp;
b294df2 @jgm use lwp-rget instead of wget
authored
123 system("lwp-rget --quiet http://plato.stanford.edu/entries/$entryname/index.html");
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
124 };
125
54e85fa @jgm initial commit
authored
126 # Create blank html file to work around html2ps bug.
127 #
128 # Without this blank file after notes.html, html2ps will cut off
129 # the last page of an entry if it occurs in the left column in 2up mode.
130
f0bf547 @jgm Bug fixes due to Dan Robins
authored
131 $blank = "blankpage";
54e85fa @jgm initial commit
authored
132
133 open FILE, ">$blank" or die "unable to open $blank: $!";
134
135 print FILE <<EOF;
136 <html>
137 <head>
138 <title>&nbsp;</title>
139 </head>
140 <body>
141 <p>&nbsp;</p>
142 </body>
143 </html>
144 EOF
145
146 close FILE;
147
b2fde1b @jgm cleanup on sep-offprint
authored
148 # Create a configuration file with appropriate footers
54e85fa @jgm initial commit
authored
149 # and run html2ps and html2pdf on that argument.
150
151 $html2psrc = "html2psrc" . time;
152
153 open FILE, ">$html2psrc" or die "unable to open $html2psrc: $!";
154
155 print FILE <<EOF;
156 BODY {
157 font-size: $fontsize;
158 font-family: $fontfamily;
159 text-align: $textalign;
160 }
f0bf547 @jgm Bug fixes due to Dan Robins
authored
161 A:link {
162 color: blue;
163 }
54e85fa @jgm initial commit
authored
164 \@page {
165 margin-left: 2.5cm;
166 margin-right: 2.5cm;
167 margin-top: 2.5cm;
168 margin-bottom: 2.5cm;
169 }
170 \@html2ps {
171 option {
172 twoup: $twoup;
173 landscape: $twoup;
174 number: 0;
175 }
176 paper { type: $papersize }
177 header {
178 right: "STANFORD ENCYCLOPEDIA OF PHILOSOPHY";
179 left: \$T;
180 }
181 footer {
182 left: \$N;
4e7bee9 @jgm use File::Copy instead of cp; other minor improvements
authored
183 right: $footer;
54e85fa @jgm initial commit
authored
184 }
185 }
186 EOF
187
188 close FILE;
189
190 # name of temporary file to hold postscript output of html2ps
4e7bee9 @jgm use File::Copy instead of cp; other minor improvements
authored
191 $pstemp = "pstemp";
54e85fa @jgm initial commit
authored
192
b2fde1b @jgm cleanup on sep-offprint
authored
193 # preprocess html: takes filename as parameter
194 # (1) remove navigation bars, etc.
195 # (2) replace &#9633; entity reference with appropriate image
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
196
197 sub preprocess_html {
f0bf547 @jgm Bug fixes due to Dan Robins
authored
198 my $file = $_;
b2fde1b @jgm cleanup on sep-offprint
authored
199 # slurp contents of file
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
200 {
201 local( $/, *FILE );
202 open(FILE, "< $file") or die "Couldn't open $file to read";
203 $contents = <FILE>;
204 close(FILE);
205 }
065dba6 @jgm ignore accent glyph entities; get rid of header stuff for notes
authored
206 # get rid of header stuff - in index.html, everything between <body>
207 # and <h1>; in notes.html, everything between <body> and <h2>
208 $contents =~ s/<body>.*?<h(1|2)>/<body><div id="content"><h$1>/gs;
67364e1 @jgm support for more entities, including fudging it for eastern european …
authored
209 # make publication date into regular paragraph
210 $contents =~ s/<br \/><span class="xsmall">(.*)<\/span><\/h1>/<\/h1><p>$1<\/p>/g;
d4664e0 @jgm center copyright notice
authored
211 # center copyright notice
212 $contents =~ s/<div id="foot">(.*?)<\/div>/<center>$1<\/center>/gs;
213
b2fde1b @jgm cleanup on sep-offprint
authored
214 # replace unicode character references
215 %replacements = (
b8298ba @jgm added support for more entities, fixed copy bug
authored
216 "&\#133;" => "&hellip;",
217 "&\#145;" => "&lsquo;",
218 "&\#146;" => "&rsquo;",
219 "&\#147;" => "&ldquo;",
220 "&\#148;" => "&rdquo;",
221 "&\#149;" => "&bull;",
222 "&\#150;" => "&minus;",
67364e1 @jgm support for more entities, including fudging it for eastern european …
authored
223 "&\#257;" => "a",
224 "&\#261;" => "a",
225 "&\#263;" => "c",
226 "&\#269;" => "c",
227 "&\#281;" => "e",
228 "&\#299;" => "i",
229 "&\#321;" => "L",
230 "&\#322;" => "l",
231 "&\#324;" => "n",
232 "&\#333;" => "o",
233 "&\#345;" => "r",
234 "&\#346;" => "S",
235 "&\#347;" => "s",
236 "&\#351;" => "s",
237 "&\#363;" => "u",
238 "&\#365;" => "u",
239 "&\#369;" => "u",
240 "&\#378;" => "z",
241 "&\#380;" => "z",
242 "&\#381;" => "Z",
243 "&\#599;" => "u",
065dba6 @jgm ignore accent glyph entities; get rid of header stuff for notes
authored
244 "&\#768;" => "",
245 "&\#769;" => "",
246 "&\#770;" => "",
247 "&\#771;" => "",
248 "&\#772;" => "",
249 "&\#773;" => "",
250 "&\#775;" => "",
251 "&\#803;" => "",
67364e1 @jgm support for more entities, including fudging it for eastern european …
authored
252 "&\#8209;" => "-",
ef125af @jgm added glyph for 8600
authored
253 "&\#8600;" => "<img alt=\"southeast-arrow\" src=\"http:\/\/plato.stanford.edu\/symbols\/searrow.gif\">",
7c3fb1d @jgm added fix for corner quote unicode chars
authored
254 "<sup>&\#9484;<\/sup>" => "<img alt=\"left-corner-quote\" src=\"http:\/\/plato.stanford.edu\/symbols\/l-corner-quote.gif\">",
255 "<sup>&\#9488;<\/sup>" => "<img alt=\"right-corner-quote\" src=\"http:\/\/plato.stanford.edu\/symbols\/r-corner-quote.gif\">",
6c7a3cf @jgm spacing changes
authored
256 "&\#8463;" => "<img alt=\"hbar\" src=\"http:\/\/plato.stanford.edu\/symbols\/hbar.gif\">",
257 "&\#9633;" => "<img alt=\"Box\" src=\"http:\/\/plato.stanford.edu\/symbols\/Box.gif\">"
b2fde1b @jgm cleanup on sep-offprint
authored
258 );
b8298ba @jgm added support for more entities, fixed copy bug
authored
259 while ( my ($ref, $rep) = each(%replacements) ) {
260 $contents =~ s/$ref/$rep/g;
b2fde1b @jgm cleanup on sep-offprint
authored
261 }
262 # write back to file
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
263 open(FILE, "> $file") or die "Couldn't open $file to write";
264 print FILE $contents;
265 close(FILE);
266 }
267
f0bf547 @jgm Bug fixes due to Dan Robins
authored
268 # preprocess all the html files in the working (i.e., temp) directory
269 opendir(TEMPDIR, ".");
270 preprocess_html foreach (grep /.html?$/, readdir TEMPDIR);
271 closedir TEMPDIR;
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
272
f0bf547 @jgm Bug fixes due to Dan Robins
authored
273 system("html2ps -D -U -f $html2psrc -W b -o $pstemp index.html $blank");
c71db0e @jgm remove header; use local copy; change entities to pictures when needed
authored
274
b2fde1b @jgm cleanup on sep-offprint
authored
275 if ($pdf) {system("ps2pdf -sPAPERSIZE=$papersize $pstemp $current/$entryname.pdf") || print "Created $entryname.pdf\n";};
54e85fa @jgm initial commit
authored
276
4e7bee9 @jgm use File::Copy instead of cp; other minor improvements
authored
277 if ($ps) {copy($pstemp, "$current/$entryname.ps") && print "Created $entryname.ps\n";};
54e85fa @jgm initial commit
authored
278
b2fde1b @jgm cleanup on sep-offprint
authored
279 # note: temporary directory will be deleted automatically on exit
Something went wrong with that request. Please try again.