-
Notifications
You must be signed in to change notification settings - Fork 0
/
unpack.rkt
111 lines (93 loc) · 3.67 KB
/
unpack.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#lang racket
(provide (all-defined-out))
(require
file/unzip
file/glob
(prefix-in meta: "merge-meta.rkt"))
(struct archive (zip-loc temp-dir))
(define/match (extract the-archive)
[((archive zip-loc temp-dir))
(parameterize ([current-directory temp-dir])
(unzip zip-loc))])
;; from https://github.com/racket/racket/blob/master/racket/collects/racket/file.rkt
;; with copy-file's exists-ok? (and a hack for directories)
(define (copy-directory/files src dest
[exists-ok? #f]
#:keep-modify-seconds? [keep-modify-seconds? #f]
#:preserve-links? [preserve-links? #f])
(let loop ([src src] [dest dest])
(cond [(and preserve-links?
(link-exists? src))
(make-file-or-directory-link
(resolve-path src)
dest)]
[(file-exists? src)
(copy-file src dest exists-ok?)
(when keep-modify-seconds?
(file-or-directory-modify-seconds
dest
(file-or-directory-modify-seconds src)))]
[(directory-exists? src)
(if exists-ok?
(with-handlers ([exn:fail:filesystem? (const (void))])
(make-directory dest))
(make-directory dest))
(for-each (lambda (e)
(loop (build-path src e)
(build-path dest e)))
(directory-list src))]
[else (raise-not-a-file-or-directory 'copy-directory/files src)])))
;; from https://github.com/racket/racket/blob/master/racket/collects/racket/file.rkt
;; needed by above
(define (raise-not-a-file-or-directory who path)
(raise
(make-exn:fail:filesystem
(format "~a: encountered path that is neither file nor directory\n path: ~a"
who
path)
(current-continuation-marks))))
(define/match (copy-to-_data the-archive)
[((archive _ temp-dir))
(define channels
(filter (λ (name) (and (not (path-has-extension? name ".json"))
(directory-exists? name)))
(directory-list temp-dir #:build? #t)))
(for-each (λ (channel)
(copy-directory/files channel
(build-path "_data" (file-name-from-path channel))
#t))
channels )])
(define/match (delete the-archive)
[((archive _ temp-dir))
(delete-directory/files temp-dir)])
(define (preproc-json dir)
(for/async ([filepath (glob (build-path dir "**.json"))])
(define contents (file->string filepath))
(define unescaped (regexp-replace* #rx"\\\\/" contents "/"))
(display-to-file unescaped filepath #:exists 'truncate/replace)))
(define (run-main archives-dir)
(displayln "0. Make _data dir if needed")
(unless (directory-exists? "_data")
(make-directory "_data"))
(displayln "1. Gather the list of archives to process")
(define zip-locs
(sort (glob (build-path archives-dir "*.zip"))
path<?))
(displayln "2. Associate each with a temporary directory")
(define archives
(map (λ (zip-loc) (archive zip-loc (make-temporary-file "extract-~a" 'directory)))
zip-locs))
(displayln "3. Extract each archive")
(for-each extract archives)
(displayln "4. Gather all the metadata into _data")
(meta:run-main (map archive-temp-dir archives))
(displayln "5. Copy all the channel files into _data")
(for-each copy-to-_data archives)
(displayln "6. Clean up after ourselves")
(for-each delete archives)
(displayln "7. Pre-process json slashes")
(preproc-json "_data"))
(module+ main
(command-line
#:args (archives-dir)
(run-main archives-dir)))