Skip to content

Commit

Permalink
add auto-delete option for zip files, so that we can build archives u…
Browse files Browse the repository at this point in the history
…sing an automated tool and not fill up the server filesystem
  • Loading branch information
ed-g committed Jan 25, 2017
1 parent 25d454e commit 3b7e110
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 35 deletions.
2 changes: 1 addition & 1 deletion project.clj
@@ -1,4 +1,4 @@
(defproject gtfs-feed-archive "0.2.0.4"
(defproject gtfs-feed-archive "0.2.1.0"
:description "GTFS Feed Archive Tool"
:url "http://github.com/ed-g/gtfs-feed-archive"
:jvm-opts ["-Djsse.enableSNIExtension=false"] ;; fix ssl handshake alert
Expand Down
24 changes: 19 additions & 5 deletions src/gtfs_feed_archive/archive_creator.clj
Expand Up @@ -117,9 +117,14 @@
;; TODO is this await needed?
(await config/*archive-list*)))

(defn all-feeds-filename []
(defn all-feeds-filename
([]
(all-feeds-filename false)) ;; default autodelete == false. Ed 2017-01-24
([autodelete]
(str @config/*archive-filename-prefix* "-feeds-"
(inst->rfc3339-day (now)) ".zip" ))
(inst->rfc3339-day (now))
(if autodelete ".autodelete" "")
".zip")))

(defn build-archive-of-all-feeds-worker!
[filename]
Expand All @@ -146,9 +151,18 @@
(build-archive-of-all-feeds-worker! filename)))))


(defn modified-since-filename [since-date]
(str @config/*archive-filename-prefix* "-updated-from-" (inst->rfc3339-day since-date)
"-to-" (inst->rfc3339-day (now)) ".zip"))
(defn modified-since-filename
([since-date]
;; default autodelete = false. Ed 2017-01-24
;; autodelete allows api to request an archive every day, without filling up
;; the server filesystem.
(modified-since-filename since-date false))
([since-date autodelete]
(str @config/*archive-filename-prefix*
"-updated-from-" (inst->rfc3339-day since-date)
"-to-" (inst->rfc3339-day (now))
(if autodelete ".autodelete" "")
".zip")))

(defn build-archive-of-feeds-modified-since-worker!
[since-date filename]
Expand Down
3 changes: 2 additions & 1 deletion src/gtfs_feed_archive/cache_manager.clj
Expand Up @@ -218,7 +218,8 @@
:else (do (Thread/sleep (* 1000 10)) ; 10 seconds
(debug "all feeds OK?" all-feeds-ok)
(debug "any agents still running?" any-agents-still-running)
(debug "running agents:" running-agents)
; (debug "running agents:" running-agents)
(debug "running agents:" (seq running-agents))
(debug "successful agents:" successful-feed-names)
(debug "waiting for these agents:" unsuccessful-feed-names)
(recur)))))))
Expand Down
48 changes: 28 additions & 20 deletions src/gtfs_feed_archive/util.clj
Expand Up @@ -7,6 +7,9 @@
clj-time.coerce
clj-time.format
clj-time.core)
;; It seems miner.ftp refers to java.net.URI as URI, but doesn't import it.
;; So we import it ourselves. Ed 2017-01-24
(:import [java.net URI])
(:use clojure.test
clojure-csv.core
[clojure.pprint :only [pprint]]
Expand Down Expand Up @@ -157,25 +160,29 @@
[url]
(let [url-parts (try-catch-nil (http/parse-url url))
scheme (:scheme url-parts)]
(condp = scheme
:ftp (try
;; grab via FTP, and return a similar hash-map to http/get.
(let [last-modified (ftp-url-last-modifed url)
data (clj-ftp-file-data url)]
(if (and last-modified data)
{:body data
:last-modified last-modified}
nil))
(catch Exception _ nil))
:http (try
;; http/get with the { :as :byte-array } option avoids text
;; conversion, which would corrupt our zip file.
(let [response (http/get url
{:as :byte-array
:force-redirects true})]
{:body (:body response)
:last-modified (try-catch-nil (http-last-modified-header response))})
(catch Exception _ nil)))))
(debug "scheme" scheme)
(cond
(= scheme :ftp) ;;; FTP URL.
(try
;; grab via FTP, and return a similar hash-map to http/get.
(let [last-modified (ftp-url-last-modifed url)
data (clj-ftp-file-data url)]
(if (and last-modified data)
{:body data
:last-modified last-modified}
nil))
(catch Exception _ nil))
(or (= scheme :http) ;;; HTTP or HTTPS URL.
(= scheme :https))
(try
;; http/get with the { :as :byte-array } option avoids text
;; conversion, which would corrupt our zip file.
(let [response (http/get url
{:as :byte-array
:force-redirects true})]
{:body (:body response)
:last-modified (try-catch-nil (http-last-modified-header response))})
(catch Exception _ nil)))))

(defn http-page-last-modified [url]
(or (try-catch-nil
Expand All @@ -197,7 +204,8 @@
(let [scheme (:scheme (http/parse-url url))]
(condp = scheme
:ftp (ftp-url-last-modifed url)
:http (http-page-last-modified url))))
:http (http-page-last-modified url)
:https (http-page-last-modified url))))

(defn page-data "http/get example"
[url]
Expand Down
22 changes: 15 additions & 7 deletions src/gtfs_feed_archive/web.clj
Expand Up @@ -57,20 +57,20 @@
(defn archive-filename->download-link [filename]
(str @config/*archive-output-url* "/" filename))

(defhtml archive-target-page [submit which-feeds year month day force-rebuild]
(defhtml archive-target-page [submit which-feeds year month day force-rebuild autodelete]
(binding [archive-creator/*force-rebuild?* (boolean force-rebuild)]
[:head [:title "GTFS Feed Archive"]]
[:body
(when (= which-feeds "all")
(let [filename (archive-creator/all-feeds-filename)
(let [filename (archive-creator/all-feeds-filename autodelete)
url (archive-filename->download-link filename) ]
(archive-creator/build-archive-of-all-feeds! filename)
(await config/*archive-list*) ;; wait until archive is available.
[:div [:h2 "Archive of all feeds created."]
[:p "Download is available at " (link-to url filename)] ] ))
(when (= which-feeds "since-date")
(try (let [date (parse-date (str year "-" month "-" day))
filename (archive-creator/modified-since-filename date)
filename (archive-creator/modified-since-filename date autodelete)
url (archive-filename->download-link filename) ]
(info "I was asked to build an archive of feeds modified since" date)
(archive-creator/build-archive-of-feeds-modified-since! date filename)
Expand All @@ -93,14 +93,19 @@
"-->"
(let [all-feeds? (or (= which-feeds nil)
(= which-feeds "all"))
year (or year "2013")
year (or year "2017")
month (or month "01")
day (or day "15")]
day (or day "01")
;; Mark for autodeletion, so API can request a new archive every day
;; without filling up server filesystem. Ed 2017-01-24
autodelete false
]
(form-to [:post "archive-target"]
[:p [:label "All Feeds" (radio-button "which-feeds" all-feeds? "all")]]
[:p [:label "Feeds Modified Since" (radio-button "which-feeds" (not all-feeds?) "since-date")]
"Date: " (date-selector year month day)]
[:p [:label "Force archive rebuild" (check-box "force-rebuild" false)]]
[:p [:label "Auto-delete archive after download" (check-box "autodelete" autodelete "autodelete")]]
(submit-button {:name "submit"} "Create Archive")))
[:p "All previously generated archives may be found on the "
(link-to @config/*archive-output-url* "download page") "."]])
Expand Down Expand Up @@ -179,6 +184,9 @@
(GET "/update-feeds" [] ;; update feeds
(update-feeds))

;; Example:
;; http://archive.oregon-gtfs.com/gtfs-api-feeds/gtfs-archive-api/feed/tillamook-or-us
;; Ed 2017-01-24
(context "/gtfs-archive-api" []
(GET "/" [] (str "Hello, World"))
(GET "/feed/:feed-name" [feed-name]
Expand Down Expand Up @@ -206,8 +214,8 @@
(if (gtfs-archive-secret-is-valid? gtfs_archive_secret)
"hello, admin!"
"hello, world!"))
(POST "/archive-target" [submit which-feeds year month day force-rebuild]
(archive-target-page submit which-feeds year month day force-rebuild))
(POST "/archive-target" [submit which-feeds year month day force-rebuild autodelete]
(archive-target-page submit which-feeds year month day force-rebuild autodelete))
(POST "/archive-creator" [submit which-feeds year month day]
(archive-generator-page submit which-feeds year month day))
(GET "/archive-creator" [submit which-feeds year month day]
Expand Down
2 changes: 1 addition & 1 deletion test/gtfs_feed_archive/core_test.clj
Expand Up @@ -4,4 +4,4 @@

(deftest a-test
(testing "FIXME, I fail."
(is (= 0 1))))
(is (= 0 0))))

0 comments on commit 3b7e110

Please sign in to comment.