/
compression.clj
145 lines (124 loc) · 5.65 KB
/
compression.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
(ns me.raynes.fs.compression
"Compression utilities."
(:require [clojure.java.io :as io]
[me.raynes.fs :as fs])
(:import (java.util.zip ZipFile GZIPInputStream)
(org.apache.commons.compress.archivers.tar TarArchiveInputStream
TarArchiveEntry)
(org.apache.commons.compress.compressors bzip2.BZip2CompressorInputStream
xz.XZCompressorInputStream)
(java.io ByteArrayOutputStream File)))
(defn- check-final-path-inside-target-dir! [f target-dir entry]
(when-not (-> f .getCanonicalPath (.startsWith (str (.getCanonicalPath target-dir) File/separator)))
(throw (ex-info "Expanding entry would be created outside target dir"
{:entry entry
:entry-final-path f
:target-dir target-dir}))))
(defn unzip
"Takes the path to a zipfile `source` and unzips it to target-dir."
([source]
(unzip source (name source)))
([source target-dir]
(with-open [zip (ZipFile. (fs/file source))]
(let [entries (enumeration-seq (.entries zip))
target-dir-as-file (fs/file target-dir)
target-file #(fs/file target-dir (str %))]
(doseq [entry entries :when (not (.isDirectory ^java.util.zip.ZipEntry entry))
:let [^File f (target-file entry)]]
(check-final-path-inside-target-dir! f target-dir-as-file entry)
(fs/mkdirs (fs/parent f))
(io/copy (.getInputStream zip entry) f))))
target-dir))
(defn- add-zip-entry
"Add a zip entry. Works for strings and byte-arrays."
[^java.util.zip.ZipOutputStream zip-output-stream [^String name content & remain]]
(.putNextEntry zip-output-stream (java.util.zip.ZipEntry. name))
(if (string? content) ;string and byte-array must have different methods
(doto (java.io.PrintStream. zip-output-stream true)
(.print content))
(.write zip-output-stream ^bytes content))
(.closeEntry zip-output-stream)
(when (seq (drop 1 remain))
(recur zip-output-stream remain)))
(defn make-zip-stream
"Create zip file(s) stream. You must provide a vector of the
following form:
```[[filename1 content1][filename2 content2]...]```.
You can provide either strings or byte-arrays as content.
The piped streams are used to create content on the fly, which means
this can be used to make compressed files without even writing them
to disk."
[& filename-content-pairs]
(let [file
(let [pipe-in (java.io.PipedInputStream.)
pipe-out (java.io.PipedOutputStream. pipe-in)]
(future
(with-open [zip (java.util.zip.ZipOutputStream. pipe-out)]
(add-zip-entry zip (flatten filename-content-pairs))))
pipe-in)]
(io/input-stream file)))
(defn zip
"Create zip file(s) on the fly. You must provide a vector of the
following form:
```[[filename1 content1][filename2 content2]...]```.
You can provide either strings or byte-arrays as content."
[filename & filename-content-pairs]
(io/copy (make-zip-stream filename-content-pairs)
(fs/file filename)))
(defn- slurp-bytes [fpath]
(with-open [data (io/input-stream (fs/file fpath))]
(with-open [out (ByteArrayOutputStream.)]
(io/copy data out)
(.toByteArray out))))
(defn make-zip-stream-from-files
"Like make-zip-stream but takes a sequential of file paths and builds filename-content-pairs
based on those"
[fpaths]
(let [filename-content-pairs (map (juxt fs/base-name slurp-bytes) fpaths)]
(make-zip-stream filename-content-pairs)))
(defn zip-files
"Zip files provided in argument vector to a single zip. Converts the argument list:
```(fpath1 fpath2...)```
into filename-content -pairs, using the original file's basename as the filename in zip`and slurping the content:
```([fpath1-basename fpath1-content] [fpath2-basename fpath2-content]...)``"
[filename fpaths]
(io/copy (make-zip-stream-from-files fpaths)
(fs/file filename)))
(defn- tar-entries
"Get a lazy-seq of entries in a tarfile."
[^TarArchiveInputStream tin]
(when-let [entry (.getNextTarEntry tin)]
(cons entry (lazy-seq (tar-entries tin)))))
(defn untar
"Takes a tarfile `source` and untars it to `target`."
([source] (untar source (name source)))
([source target]
(with-open [tin (TarArchiveInputStream. (io/input-stream (fs/file source)))]
(let [target-dir-as-file (fs/file target)]
(doseq [^TarArchiveEntry entry (tar-entries tin) :when (not (.isDirectory entry))
:let [output-file (fs/file target (.getName entry))]]
(check-final-path-inside-target-dir! output-file target-dir-as-file entry)
(fs/mkdirs (fs/parent output-file))
(io/copy tin output-file)
(when (.isFile entry)
(fs/chmod (apply str (take-last
3 (format "%05o" (.getMode entry))))
(.getPath output-file))))))))
(defn gunzip
"Takes a path to a gzip file `source` and unzips it."
([source] (gunzip source (name source)))
([source target]
(io/copy (-> source fs/file io/input-stream GZIPInputStream.)
(fs/file target))))
(defn bunzip2
"Takes a path to a bzip2 file `source` and uncompresses it."
([source] (bunzip2 source (name source)))
([source target]
(io/copy (-> source fs/file io/input-stream BZip2CompressorInputStream.)
(fs/file target))))
(defn unxz
"Takes a path to a xz file `source` and uncompresses it."
([source] (unxz source (name source)))
([source target]
(io/copy (-> source fs/file io/input-stream XZCompressorInputStream.)
(fs/file target))))